In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
%cd /content/drive/MyDrive/Course Material/Supervised Machine Learning/Regression Analysis

/content/drive/MyDrive/Course Material/Supervised Machine Learning/Regression Analysis


In [6]:
data = pd.read_csv("Regression_Data.csv")

In [7]:
data

Unnamed: 0,Ind_Data,Dependent_Data
0,1.1,39343
1,1.3,46205
2,1.5,37731
3,2.0,43525
4,2.2,39891
5,2.9,56642
6,3.0,60150
7,3.2,54445
8,3.2,64445
9,3.7,57189


In [8]:
data.shape

(30, 2)

In [9]:
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values
X.shape

(30, 1)

# Creating test data

In [10]:
test_data = np.linspace(1,11,30).reshape(-1,1)
test_data

array([[ 1.        ],
       [ 1.34482759],
       [ 1.68965517],
       [ 2.03448276],
       [ 2.37931034],
       [ 2.72413793],
       [ 3.06896552],
       [ 3.4137931 ],
       [ 3.75862069],
       [ 4.10344828],
       [ 4.44827586],
       [ 4.79310345],
       [ 5.13793103],
       [ 5.48275862],
       [ 5.82758621],
       [ 6.17241379],
       [ 6.51724138],
       [ 6.86206897],
       [ 7.20689655],
       [ 7.55172414],
       [ 7.89655172],
       [ 8.24137931],
       [ 8.5862069 ],
       [ 8.93103448],
       [ 9.27586207],
       [ 9.62068966],
       [ 9.96551724],
       [10.31034483],
       [10.65517241],
       [11.        ]])

In [17]:
test_labels = y

# Applying Ridge Regression

In [12]:
ridge = Ridge()

In [13]:
#help(Ridge)

In [14]:
RR = ridge.fit(X, y)

In [15]:
y_pred = RR.predict(test_data)

In [18]:
print('RMSE:', np.sqrt(metrics.mean_squared_error(test_labels, y_pred)))
print('MAE:', metrics.mean_absolute_error(test_labels, y_pred))
print('R2: %0.2f' %(metrics.r2_score(test_labels, y_pred)))

RMSE: 10116.077264648284
MAE: 8366.54929243921
R2: 0.86


# Applying Grid Search Cross validation on Ridge Regression

In [19]:
ridge_reg = Ridge()
from sklearn.model_selection import GridSearchCV
params_Ridge = {'alpha': [10, 5, 3 , 2, 1.5, 1.2, 1.1, 1, 0.1, 0.01, 0.001, 0.0001 ,0] , "fit_intercept": [True, False], "solver": ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']}
Ridge_GS = GridSearchCV(ridge_reg, param_grid=params_Ridge, n_jobs=-1, cv =5)
RR_GSCV = Ridge_GS.fit(X,y)
RR_GSCV.best_params_

{'alpha': 10, 'fit_intercept': True, 'solver': 'sag'}

# Applying Grid Search Ridge Regression on test data

In [20]:
y_pred_GSCV = RR_GSCV.predict(test_data)

In [21]:
print('RMSE:', np.sqrt(metrics.mean_squared_error(test_labels, y_pred_GSCV)))
print('MAE:', metrics.mean_absolute_error(test_labels, y_pred_GSCV))
print('R2: %0.2f' %(metrics.r2_score(test_labels, y_pred_GSCV)))


RMSE: 9790.382295765916
MAE: 7872.193822219492
R2: 0.87


# Applying LASSO Regression

In [22]:
from sklearn.linear_model import Lasso

In [23]:
lasso_model = Lasso()

In [24]:
lm = lasso_model.fit(X,y)

In [25]:
y_pred_lm = lm.predict(test_data)

In [27]:
print('RMSE:', np.sqrt(metrics.mean_squared_error(test_labels, y_pred_lm)))
print('MAE:', metrics.mean_absolute_error(test_labels, y_pred_lm))
print('R2: %0.2f' %(metrics.r2_score(test_labels, y_pred_lm)))

RMSE: 10160.252045393187
MAE: 8423.71754055037
R2: 0.86


# Applying Grid Search Lasso Regression

In [28]:
lasso_reg = Lasso()
params_Lasso = {'alpha': [10, 5, 1, 0.1, 0.01, 0.001, 0.0001 ,0] , "fit_intercept": [True, False],
                "max_iter": [1000, 10000, 100000, 1000000]}
Lasso_GS = GridSearchCV(lasso_reg, param_grid = params_Lasso, n_jobs = -1, cv = 5)
lasso_GSCV = Lasso_GS.fit(X,y)
lasso_GSCV.best_params_

{'alpha': 10, 'fit_intercept': True, 'max_iter': 1000}

In [29]:
y_pred_LassoCV = lasso_GSCV.predict(test_data)

In [30]:
print('RMSE:', np.sqrt(metrics.mean_squared_error(test_labels, y_pred_LassoCV)))
print('MAE:', metrics.mean_absolute_error(test_labels, y_pred_LassoCV))
print('R2: %0.2f' %(metrics.r2_score(test_labels, y_pred_LassoCV)))

RMSE: 10158.962314938803
MAE: 8422.071915336537
R2: 0.86


# Elastic Net

In [31]:
from sklearn.linear_model import ElasticNet

In [32]:
elastic_model = ElasticNet()

In [37]:
#help(ElasticNet)

In [33]:
elastic_model = elastic_model.fit(X,y)

In [34]:
y_pred_em = elastic_model.predict(test_data)

In [35]:
print('RMSE:', np.sqrt(metrics.mean_squared_error(test_labels, y_pred_em)))
print('MAE:', metrics.mean_absolute_error(test_labels, y_pred_em))
print('R2: %0.2f' %(metrics.r2_score(test_labels, y_pred_em)))

RMSE: 9661.674956501829
MAE: 7612.00310605522
R2: 0.87


# Applying Grid Search Elastic Net

In [39]:
l1_ratio = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]

In [40]:
elastic_net = ElasticNet()
params_elasticNet = {'l1_ratio': l1_ratio , "fit_intercept": [True, False],
                "max_iter": [1,5,10,100,1000,10000, 100000], 'alpha': [1.0, 2.0, 3.0, 5.0, 10.0, 100.0]}
elasticNet_GS = GridSearchCV(elastic_net, param_grid = params_elasticNet, n_jobs = -1, cv = 5)
elastic_GSCV = elasticNet_GS.fit(X,y)
elastic_GSCV.best_params_

{'alpha': 1.0, 'fit_intercept': True, 'l1_ratio': 0.6, 'max_iter': 1}

In [41]:
y_pred_elasticCV = elastic_GSCV.predict(test_data)

In [42]:
print('RMSE:', np.sqrt(metrics.mean_squared_error(test_labels, y_pred_elasticCV)))
print('MAE:', metrics.mean_absolute_error(test_labels, y_pred_elasticCV))
print('R2: %0.2f' %(metrics.r2_score(test_labels, y_pred_elasticCV)))

RMSE: 9734.378475951635
MAE: 7766.412465690312
R2: 0.87
