# Regression Model Selection

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

Sample data from UCI Machine Learning Repository (https://archive.ics.uci.edu/ml/datasets/Combined+Cycle+Power+Plant#)

In [2]:
dataset = pd.read_csv('Sample_Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
y.shape

(9568,)

In [4]:
y_reshaped = y.reshape(len(y),1)

## Splitting the entire dataset into a training and test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y_reshaped, test_size = 0.2, random_state = 0)

## Feature Scaling

In [6]:
# Apply feature scaling for SVM model
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train_Scaled = sc_X.fit_transform(X_train)
y_train_Scaled = sc_y.fit_transform(y_train)

## Training the regression models using the training set

In [7]:
# Multiple linear regression model
from sklearn.linear_model import LinearRegression
Linear_Reg_Model = LinearRegression()
Linear_Reg_Model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [8]:
# Decision tree regression model
from sklearn.tree import DecisionTreeRegressor
Decision_Tree_Model = DecisionTreeRegressor(random_state = 0)
Decision_Tree_Model.fit(X_train, y_train)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=0, splitter='best')

In [9]:
# Polynomial regression model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
Polynamial_fit = PolynomialFeatures(degree = 4)
X_poly = Polynamial_fit.fit_transform(X_train)
Polynomial_Reg_Model = LinearRegression()
Polynomial_Reg_Model.fit(X_poly, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [10]:
# Random Forest regression Model
from sklearn.ensemble import RandomForestRegressor
RandomForest_Model = RandomForestRegressor(n_estimators = 10, random_state = 0)
RandomForest_Model.fit(X_train, y_train.reshape(len(y_train),))

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=10, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [11]:
# Support vector regression model
from sklearn.svm import SVR
SVR_Model = SVR(kernel = 'rbf')
SVR_Model.fit(X_train_Scaled, y_train_Scaled.reshape(len(y_train_Scaled),))

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

## Predicting test set results and evaluating the model performance

In [12]:
from sklearn.metrics import r2_score
# Multiple linear regression model
y_pred1 = Linear_Reg_Model.predict(X_test)
print("Multiple Linear Regression Model R2 score: {:.2}".format(r2_score(y_test, y_pred1)))
# Decision tree regression model
y_pred2 = Decision_Tree_Model.predict(X_test)
print("Decision Tree Model R2 score: {:.2}".format(r2_score(y_test, y_pred2)))
# Polynomial regression model
y_pred3 = Polynomial_Reg_Model.predict(Polynamial_fit.transform(X_test))
print("Polynomial Regression Model R2 score: {:.2}".format(r2_score(y_test, y_pred3)))
# Random Forest regression Model
y_pred4 = RandomForest_Model.predict(X_test)
print("Random Forest Model R2 score: {:.2}".format(r2_score(y_test, y_pred4)))
# Support vector regression model
y_pred5 = sc_y.inverse_transform(SVR_Model.predict(sc_X.transform(X_test)))
print("SVM Regression Model R2 score: {:.2}".format(r2_score(y_test, y_pred5)))

Multiple Linear Regression Model R2 score: 0.93
Decision Tree Model R2 score: 0.92
Polynomial Regression Model R2 score: 0.95
Random Forest Model R2 score: 0.96
SVM Regression Model R2 score: 0.95
