## Ridge Regression (L2 Regularization)
- We know that, in order to avoid the overfitting we use this model 
- Here we use additional parameters in the cost function to avoid overfitting

In [1]:
# we will take the same data of house price prediction from sklearn 

from sklearn.datasets import fetch_california_housing

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline 

In [4]:
california = fetch_california_housing()

In [5]:
california.data

array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]])

In [7]:
dataset = pd.DataFrame(california.data, columns=california.feature_names)

In [8]:
dataset.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [14]:
dataset['price'] = california.target

In [15]:
dataset.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,price
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [16]:
## independent and dependent features
X=dataset.iloc[:,:-1] ##independent features
y=dataset.iloc[:,-1] ##dependent feature

In [21]:
from sklearn.model_selection import train_test_split

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.3, random_state=1)

In [23]:
from sklearn.preprocessing import StandardScaler

In [24]:
scaler = StandardScaler()

In [25]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [26]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [27]:
ridge_regressor = Ridge()

In [51]:
ridge_regressor.fit(X_train_scaled, y_train)

Ridge(alpha=1.0000000000000002e-06)

In [52]:
y_predicted = ridge_regressor.predict(X_test_scaled)

In [53]:
print('MSE', mean_squared_error(y_true=y_test, y_pred=y_predicted))

MSE 0.5274297609964576


In [54]:
### We need to do hyperparameter tuning, means for the various values of hyper parameter how much model is improving
i = 1000000
print("Results for Ridge regression")
while i > 0.000001:
    ridge_regressor = Ridge(i)
    ridge_regressor.fit(X_train_scaled, y_train)
    y_predicted = ridge_regressor.predict(X_test_scaled)
    print(f'for i ={i}','R2', r2_score(y_true=y_test, y_pred=y_predicted))
    i = i/10



Results for Ridge regression
for i =1000000 R2 0.006522270596066582
for i =100000.0 R2 0.05995398621321568
for i =10000.0 R2 0.32492176325827793
for i =1000.0 R2 0.5477507728416198
for i =100.0 R2 0.600168169039337
for i =10.0 R2 0.6048195492098214
for i =1.0 R2 0.6050530102298646
for i =0.1 R2 0.6050727016435924
for i =0.01 R2 0.6050746323621016
for i =0.001 R2 0.6050748250477745
for i =0.0001 R2 0.605074844312478
for i =1e-05 R2 0.6050748462389097
for i =1.0000000000000002e-06 R2 0.6050748464315526


In [40]:
# for lambda we got best results as we decrease the value of Lambda
from sklearn.linear_model import Lasso

In [41]:
# Lets check these same results for Lasso Regression
i = 1000000
print("Results for Lasso regression")
while i > 0.000001:
    lasso_regressor = Lasso(i)
    lasso_regressor.fit(X_train, y_train)
    y_predicted = lasso_regressor.predict(X_test)
    print(f'for i ={i}','R2', r2_score(y_true=y_test, y_pred=y_predicted))
    i = i/10 

Results for Lasso regression
for i =1000000 R2 -2.6088946816216207e-06
for i =100000.0 R2 -2.6088946816216207e-06
for i =10000.0 R2 -2.6088946816216207e-06
for i =1000.0 R2 -2.6088946816216207e-06
for i =100.0 R2 -2.6088946816216207e-06
for i =10.0 R2 0.0005270666210486219
for i =1.0 R2 0.2814288096063229
for i =0.1 R2 0.5424586329599225
for i =0.01 R2 0.5973764638890007
for i =0.001 R2 0.6045211343091196
for i =0.0001 R2 0.6050216374751813
for i =1e-05 R2 0.605069547081214
for i =1.0000000000000002e-06 R2 0.6050743167543845


In [42]:
# As we reducing the value of hyperparameter accuracy is increasing 

In [43]:
# Now for elastic net 
from sklearn.linear_model import ElasticNet


In [50]:
i = 10000
print("Results for Elastic net regression")
while i > 0.000001:
    elastic_regressor = ElasticNet(alpha=i, l1_ratio=0.5,)
    elastic_regressor.fit(X_train, y_train)
    y_predicted = elastic_regressor.predict(X_test)
    print(f'for i ={i}','R2', r2_score(y_true=y_test, y_pred=y_predicted))
    i = i/10

Results for Elastic net regression
for i =10000 R2 -2.6088946816216207e-06
for i =1000.0 R2 -2.6088946816216207e-06
for i =100.0 R2 -2.6088946816216207e-06
for i =10.0 R2 0.0005248003795422784
for i =1.0 R2 0.42347750281481866
for i =0.1 R2 0.5743555777988429
for i =0.01 R2 0.6010116775914994
for i =0.001 R2 0.6047154618543444
for i =0.0001 R2 0.6050394952212483
for i =1e-05 R2 0.6050713173123643
for i =1.0000000000000002e-06 R2 0.6050744935968115
