# **Generic Model for Regression**

## **Importing the libraries needed**

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### ***Importing the Dataset***

In [94]:
dataset = pd.read_csv('Data.csv') # Please enter the name of your dataset here in order to apply this template
dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [95]:
dataset.columns #Lists the columns from your dataset

Index(['AT', 'V', 'AP', 'RH', 'PE'], dtype='object')

In [96]:
X = dataset[['AT', 'V', 'AP', 'RH']].values # Put here only the features necessary to evaluate the model 
X

array([[  14.96,   41.76, 1024.07,   73.17],
       [  25.18,   62.96, 1020.04,   59.08],
       [   5.11,   39.4 , 1012.16,   92.14],
       ...,
       [  31.32,   74.33, 1012.92,   36.48],
       [  24.48,   69.45, 1013.86,   62.39],
       [  21.6 ,   62.52, 1017.23,   67.87]])

In [97]:
y = dataset['PE'].values # Put here only the value that you want to predict
y

array([463.26, 444.37, 488.56, ..., 429.57, 435.74, 453.28])

In [0]:
# Another way of splitting the Dataset using the slice methodology
# X = dataset.iloc[:, :-1].values
# y = dataset.iloc[:,-1].values

## ***Mutiple Linear Regression Model***

### ***Splitting the data between train and test sets***

In [0]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)

### ***Training the Multiple Linear Regression Model on the Training set***



In [100]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

### ***Predicting the Test set results***

In [101]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision= 2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[455.63 455.27]
 [438.73 436.31]
 [434.14 440.68]
 ...
 [428.87 433.44]
 [469.78 470.46]
 [446.43 448.41]]


### ***Evaluating Multiple Linear Regression Model Performance***

In [102]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9271761736761966

## ***Polynomial Regression Model***

### ***Training the Polynomial Regression Model on the Training set***



In [103]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
regressor = LinearRegression()
regressor.fit(X_poly, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

### ***Predicting the Test Results***

In [104]:
y_pred = regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision= 2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[455.83 455.27]
 [437.42 436.31]
 [434.5  440.68]
 ...
 [433.23 433.44]
 [471.27 470.46]
 [445.17 448.41]]


### ***Evaluating Multiple Linear Regression Model Performance***

In [105]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9411744616859692

## ***Support Vector Regression SVR***

### ***Reshape the y data in order to attend the Model***


In [0]:
y_svr = y.reshape(len(y),1) 
X_svr = X

### ***Splliting the dataset into the Training and Test set***


In [0]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X_svr, y_svr, test_size=0.33, random_state=42)

### ***Feature Scaling***


In [0]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

### ***Training the SVR model on the Training set***


In [109]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X_train,y_train)

  y = column_or_1d(y, warn=True)


SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

### ***Predicting the Test results***


In [110]:
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.inverse_transform(X_test)))
np.set_printoptions(precision= 2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))


[[458.15 455.27]
 [458.15 436.31]
 [458.15 440.68]
 ...
 [458.15 433.44]
 [458.15 470.46]
 [458.15 448.41]]


### ***Evaluating the Model Performance***


In [111]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

-0.04906397872861956

## ***Decision Tree Regression***

### ***Splliting the dataset into the Training and Test set***


In [0]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)

### ***Training the Decision Tree Regression model on the Training set***


In [113]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state = 0)
regressor.fit(X_train, y_train)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=0, splitter='best')

### ***Predicting the Test set results***


In [114]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision= 2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[453.62 455.27]
 [436.96 436.31]
 [438.04 440.68]
 ...
 [433.34 433.44]
 [469.27 470.46]
 [449.77 448.41]]


### ***Evaluating the Model Performance***


In [115]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9248954030111136

## ***Random Forest Regression***

### ***Splliting the dataset into the Training and Test set***


In [0]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)

### ***Training the Random Forest Regression model on the Training set***


In [117]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(random_state = 0)
regressor.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

### ***Predicting the Test set results***


In [118]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision= 2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[455.13 455.27]
 [436.12 436.31]
 [434.48 440.68]
 ...
 [433.24 433.44]
 [469.85 470.46]
 [447.97 448.41]]


### ***Evaluating the Model Performance***


In [119]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9607992656549409