# Regression Model Selection

## Decision Tree Regression

### Importing the Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### Loading the dataset

In [2]:
dataset = pd.read_csv('resources/Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


### Splitting the dataset into Training and Test Set

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Training the Decision Tree Regression model on the Training set

In [4]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state = 0)
regressor.fit(X_train, y_train)

DecisionTreeRegressor(random_state=0)

### Predicting the Test set results

In [5]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=3)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[431.28 431.23]
 [459.59 460.01]
 [460.06 461.14]
 ...
 [471.46 473.26]
 [437.76 438.  ]
 [462.74 463.28]]


### Evaluating the Model Performance

In [6]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.922905874177941

## Multiple Linear Regression

### Importing the Libraries

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### Loading the dataset

In [8]:
dataset = pd.read_csv('resources/Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


### Splitting the dataset into Training and Test Set

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Training the multiple linear Regression model on the training set

In [10]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

### Predicting the test set Results

In [11]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=3)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[431.428 431.23 ]
 [458.561 460.01 ]
 [462.753 461.14 ]
 ...
 [469.518 473.26 ]
 [442.418 438.   ]
 [461.883 463.28 ]]


### Evaluating the Model Performance

In [12]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9325315554761303

## Polynomial Regression

### Importing the Libraries

In [13]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### Loading the dataset

In [14]:
dataset = pd.read_csv('resources/Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


### Splitting the dataset into Training and Test Set

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Training the Polynomial Regression model on the Training Set

In [16]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
regressor = LinearRegression()
regressor.fit(X_poly, y_train)

LinearRegression()

### Predicting the test set results

In [17]:
y_pred = regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[433.94 431.23]
 [457.9  460.01]
 [460.52 461.14]
 ...
 [469.53 473.26]
 [438.27 438.  ]
 [461.67 463.28]]


### Evaluating the Model Performance

In [18]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9458193637378739

## Random Forest Regression

### Importing the Libraries

In [19]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### Loading the dataset

In [20]:
dataset = pd.read_csv('resources/Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


### Splitting the dataset into Training and Test Set

In [21]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Training the Random Forest Regression model on the whole dataset

In [22]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X_train, y_train)

RandomForestRegressor(n_estimators=10, random_state=0)

### Predicting the test set results

In [23]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=3)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[434.049 431.23 ]
 [458.785 460.01 ]
 [463.02  461.14 ]
 ...
 [469.479 473.26 ]
 [439.566 438.   ]
 [460.385 463.28 ]]


### Evaluating the Model Performance

In [24]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9615908334363876

## Support Vector Regression

### Importing the Libraries

In [25]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### Loading the dataset

In [26]:
dataset = pd.read_csv('resources/Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


### Splitting the dataset into Training and Test Set

In [27]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Feature Scaling

In [28]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
y_train = y_train.reshape(len(y_train), 1)
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

### Training the SVR model on the training set

In [29]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X_train, y_train)

  return f(**kwargs)


SVR()

### Predicting the Test Set results

In [30]:
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[434.05 431.23]
 [457.94 460.01]
 [461.03 461.14]
 ...
 [470.6  473.26]
 [439.42 438.  ]
 [460.92 463.28]]


### Evaluating the Model Performance

In [31]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9480784049986258