# Regression Model Selection

## Import Libraries

In [16]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#data split and scoring
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

#feature scaling
from sklearn.preprocessing import StandardScaler

#ML models
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

## Import Data

In [17]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Transform the Data

In [18]:
svr_y = y.reshape(len(y),1)

## Split Data into Training & Test set

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

#use transformed y for support vector regression
svr_X_train, svr_X_test, svr_y_train, svr_y_test = train_test_split(X, svr_y, test_size=0.2, random_state=0)

## Feature Scale the Data

In [20]:
svr_sc_X = StandardScaler()
svr_sc_y = StandardScaler()
svr_X_train = svr_sc_X.fit_transform(svr_X_train)
svr_y_train = svr_sc_y.fit_transform(svr_y_train)

## Models

### Decision Tree Regression

#### Train DT Model

In [21]:
dt_regressor = DecisionTreeRegressor(random_state=0)
dt_regressor.fit(X_train, y_train)

DecisionTreeRegressor(random_state=0)

#### Predict test result using DT model

In [22]:
dt_y_pred = dt_regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((dt_y_pred.reshape(len(dt_y_pred),1), y_test.reshape(len(y_test),1)),1))

[[431.28 431.23]
 [459.59 460.01]
 [460.06 461.14]
 ...
 [471.46 473.26]
 [437.76 438.  ]
 [462.74 463.28]]


### Multiple Linear Regression

#### Train MLR Model

In [23]:
mlr_regressor = LinearRegression()
mlr_regressor.fit(X_train, y_train)

LinearRegression()

#### Predict test result using MLR model

In [24]:
mlr_y_pred = mlr_regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((mlr_y_pred.reshape(len(mlr_y_pred),1), y_test.reshape(len(y_test),1)),1))

[[431.43 431.23]
 [458.56 460.01]
 [462.75 461.14]
 ...
 [469.52 473.26]
 [442.42 438.  ]
 [461.88 463.28]]


### Polynomial Regression

#### Train PR model

In [25]:
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
pr_regressor = LinearRegression()
pr_regressor.fit(X_poly, y_train)

LinearRegression()

#### Predict test result using PR model

In [26]:
pr_y_pred = pr_regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((pr_y_pred.reshape(len(pr_y_pred),1), y_test.reshape(len(y_test),1)),1))

[[433.94 431.23]
 [457.9  460.01]
 [460.52 461.14]
 ...
 [469.53 473.26]
 [438.27 438.  ]
 [461.67 463.28]]


### Random Forest Regession

#### Train RFR Model

In [27]:
rfr_regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
rfr_regressor.fit(X_train, y_train)

RandomForestRegressor(n_estimators=10, random_state=0)

#### Predict test result using RFR model

In [28]:
rfr_y_pred = rfr_regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((rfr_y_pred.reshape(len(rfr_y_pred),1), y_test.reshape(len(y_test),1)),1))

[[434.05 431.23]
 [458.79 460.01]
 [463.02 461.14]
 ...
 [469.48 473.26]
 [439.57 438.  ]
 [460.38 463.28]]


### Support Vector Regression (SVR)

#### Train SVR model

In [29]:
regressor = SVR(kernel = 'rbf')
regressor.fit(svr_X_train, svr_y_train)

  y = column_or_1d(y, warn=True)


SVR()

#### Predict test result using SVR model

In [30]:
svr_y_pred = svr_sc_y.inverse_transform(regressor.predict(svr_sc_X.transform(svr_X_test)))
np.set_printoptions(precision=2)
print(np.concatenate((svr_y_pred.reshape(len(svr_y_pred),1), svr_y_test.reshape(len(svr_y_test),1)),1))

ValueError: Expected 2D array, got 1D array instead:
array=[-1.19  0.21  0.39 ...  0.96 -0.87  0.39].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

## Evaluate Model Performance

In [None]:
#dT
print("DT:")
r2_score(y_test, dt_y_pred)

DT:


0.922905874177941

In [None]:
print("MLR:")
r2_score(y_test, mlr_y_pred)

MLR:


0.9325315554761302

In [None]:
print("PR:")
r2_score(y_test, pr_y_pred)

PR:


0.9458192831080808

In [None]:
print("RFR:")
r2_score(y_test, rfr_y_pred)

RFR:


0.9615908334363876

In [None]:
print("SVR:")
r2_score(svr_y_test, svr_y_pred)

SVR:


0.9480784049986258