# Various Regression Models


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
train = pd.read_csv('train.air.csv')

In [3]:
train

Unnamed: 0,CO,C6H6,NMHC,Nox,NO2,O3,T,RH,AH
0,2.6,11.9,1046,166,113,1268,13.6,48.9,0.7578
1,2.0,9.4,955,103,92,972,13.3,47.7,0.7255
2,2.2,9.0,939,131,114,1074,11.9,54.0,0.7502
3,1.6,6.5,836,131,116,1110,11.2,59.6,0.7888
4,1.2,3.6,690,62,77,733,11.3,56.8,0.7603
...,...,...,...,...,...,...,...,...,...
5548,3.9,13.6,1102,523,187,1583,18.2,36.3,0.7487
5549,2.4,11.4,1027,353,179,1269,24.3,23.7,0.7119
5550,2.4,12.4,1063,293,175,1092,26.9,18.3,0.6406
5551,2.1,9.5,961,235,156,770,28.3,13.5,0.5139


In [4]:
test = pd.read_csv('test.air.csv')

In [5]:
from sklearn.preprocessing import StandardScaler

In [6]:
std = StandardScaler()

In [7]:
train_arr = std.fit_transform(train)

In [8]:
test_arr = std.transform(test)

In [9]:
from sklearn.model_selection import train_test_split, GridSearchCV

In [10]:
X_train = train_arr[:, 1:]
y_train = train_arr[:, 0]

X_test = test_arr[:, 1:]
y_test = test_arr[:, 0]

In [11]:
from sklearn.linear_model import RidgeCV, LassoCV, ElasticNetCV
from sklearn.metrics import mean_squared_error

In [12]:
ridge = RidgeCV(alphas = [1e-3, 1e-2, 1e-1, 1, 10], cv = 10, fit_intercept = False).fit(X_train, y_train)
y_ridge = ridge.predict(X_test)
print("Coefficient for optimal Ridge Regression", ridge.coef_)
print("Optimal alpha", ridge.alpha_)
print("Mean Square Prediction Error", mean_squared_error(y_test, y_ridge))

Coefficient for optimal Ridge Regression [ 0.77812833  0.05127949  0.11590548  0.07914592 -0.01384698 -0.07005888
  0.02985376 -0.01095076]
Optimal alpha 10.0
Mean Square Prediction Error 0.12053749422976588


In [13]:
lasso = LassoCV(alphas = [1e-3, 1e-2, 1e-1, 1, 10], cv = 10, fit_intercept = False).fit(X_train, y_train)
y_lasso = lasso.predict(X_test)
print("Coefficient for optimal Lasso Regression", lasso.coef_)
print("Optimal alpha", lasso.alpha_)
print("Mean Square Prediction Error", mean_squared_error(y_test, y_lasso))

Coefficient for optimal Lasso Regression [ 0.80243571  0.          0.12283557  0.07516101  0.         -0.06826291
  0.01281224 -0.        ]
Optimal alpha 0.01
Mean Square Prediction Error 0.12006054875785242


In [14]:
elastic = ElasticNetCV(alphas = [1e-3, 1e-2, 1e-1, 1, 10], cv = 10, fit_intercept = False).fit(X_train, y_train)
y_elastic = elastic.predict(X_test)
print("Coefficient for optimal ElasticNet Regression", elastic.coef_)
print("Optimal alpha", elastic.alpha_)
print("Mean Square Prediction Error", mean_squared_error(y_test, y_elastic))

Coefficient for optimal ElasticNet Regression [ 0.71809334  0.09117461  0.12524573  0.06924798  0.         -0.07762137
  0.01380704 -0.        ]
Optimal alpha 0.01
Mean Square Prediction Error 0.11954105258013399


# Adaptive Lasso by Transforming input data X with weight initialization of Ridge Regression

In [18]:
X_t = np.zeros(X_train.shape)
wt = ridge.coef_
X_tt = np.zeros(X_test.shape)


for i in range(X_train.shape[1]):
    X_t[:, i]= X_train[:, i]/wt[i]
for i in range(X_test.shape[1]):
    X_tt[:, i] = X_test[:, i]/wt[i]
    


In [27]:
a_lasso = LassoCV(alphas = [1e-3, 1e-2, 1e-1, 1, 10], cv = 10, fit_intercept = False).fit(X_t, y_train)
y_alasso = a_lasso.predict(X_tt)

wt_u = np.zeros(wt.shape)
for i in range(len(wt_u)):
    wt_u[i]  = lasso.coef_[i]/wt[i]
    
print("Coefficient for optimal Adaptive Lasso Regression",wt_u)
print("Optimal alpha", lasso.alpha_)
print("Mean Square Prediction Error", mean_squared_error(y_test, y_alasso))

Coefficient for optimal Adaptive Lasso Regression [ 1.03123826  0.          1.05979083  0.9496511  -0.          0.9743649
  0.42916678  0.        ]
Optimal alpha 0.01
Mean Square Prediction Error 0.12077661517022441


In [51]:
""" Based on Mean Square Prediction Error , ElasticNet Regression performed best on this dataset"""

' Based on Mean Square Prediction Error , ElasticNet Regression performed best on this dataset'