# Day 28

### Ridge Regression

In [1]:
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np
import pandas as pd

In [2]:
house = load_boston()

In [3]:
df = pd.DataFrame(house['data'], columns=house['feature_names'])
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [4]:
X = df.to_numpy()
y = house['target'].reshape((-1, 1))

In [5]:
house_ridge = Ridge().fit(X, y)
house_linear = LinearRegression().fit(X, y)

In [6]:
x_test = np.array([X[1]])

print("Predction: ")
print("--------> Ridge :", house_ridge.predict(x_test))
print("\t> Linear :",house_linear.predict(x_test))

print("Scores:")
print("--------> Ridge: ", house_ridge.score(X,y))
print("\t> Linear: ", house_linear.score(X,y))

Predction: 
--------> Ridge : [[24.80547336]]
	> Linear : [[25.02556238]]
Scores:
--------> Ridge:  0.7388703133867616
	> Linear:  0.7406426641094095


In [7]:
x_train, x_test, y_train, y_test = train_test_split(df, y, test_size=0.3)

In [8]:
line = LinearRegression()
line.fit(x_train, y_train)
print(line.score(x_test, y_test))
print(line.score(x_train, y_train))

0.7766574937366504
0.7260189248185049


In [9]:
print("On Train Data : ",cross_val_score(line, x_train, y_train, cv=5).mean())
print("On Test Data : ",cross_val_score(line, x_test, y_test,  cv=5).mean())

On Train Data :  0.6539387313133838
On Test Data :  0.6091269770842729


In [10]:
rid = Ridge()
rid.fit(x_train, y_train)

print(rid.score(x_test, y_test))
print(rid.score(x_train, y_train))

print("On Train Data : ",cross_val_score(rid, x_train, y_train).mean())
print("On Test Data : ",cross_val_score(rid, x_test, y_test).mean())

0.7786093928138997
0.7230392707256627
On Train Data :  0.6334278018329074
On Test Data :  0.7016964908818052




In [11]:
rid = Ridge(solver='sag') # SAG solver only works good when all features are in same scale
rid.fit(x_train, y_train)

print(rid.score(x_test, y_test))
print(rid.score(x_train, y_train))

print("On Train Data : ",cross_val_score(rid, x_train, y_train).mean())
print("On Test Data : ",cross_val_score(rid, x_test, y_test).mean())

0.7466923211893874
0.6834621910010154




On Train Data :  0.6277415875566491
On Test Data :  0.6612502638999131


In [12]:
rid = Ridge(solver='sag', max_iter=100) # Setting a fixed iteration limit
rid.fit(x_train, y_train)

print(rid.score(x_test, y_test))
print(rid.score(x_train, y_train))

print("On Train Data : ",cross_val_score(rid, x_train, y_train).mean())
print("On Test Data : ",cross_val_score(rid, x_test, y_test).mean())



0.7034755220035549
0.6349089590096955
On Train Data :  0.578968826351212
On Test Data :  0.6353130270636979




In [13]:
rid = Ridge(solver='sag', tol=1.0, max_iter=100) # overriding the default tolorance values
rid.fit(x_train, y_train)

print(rid.score(x_test, y_test))
print(rid.score(x_train, y_train))

print("On Train Data : ",cross_val_score(rid, x_train, y_train).mean())
print("On Test Data : ",cross_val_score(rid, x_test, y_test).mean())

0.412029778111422
0.22388274636864924
On Train Data :  0.18076460879246461
On Test Data :  0.09460837855779458




In [14]:
# Picking an alpha value from a set
rr = RidgeCV(alphas=(0.1, 0.001, 0.0001, 0.05, 0.5, 0.005))
rr.fit(x_train, y_train)

RidgeCV(alphas=array([1.e-01, 1.e-03, 1.e-04, 5.e-02, 5.e-01, 5.e-03]), cv=None,
        fit_intercept=True, gcv_mode=None, normalize=False, scoring=None,
        store_cv_values=False)

In [15]:
rr.alpha_

0.05

In [16]:
rid = Ridge(alpha=rr.alpha_) # Substituting the picked alpha value from above 
rid.fit(x_train, y_train)

print(rid.score(x_test, y_test))
print(rid.score(x_train, y_train))

print("On Train Data : ",cross_val_score(rid, x_train, y_train).mean())
print("On Test Data : ",cross_val_score(rid, x_test, y_test).mean())

0.777050064334577
0.7259957905530208
On Train Data :  0.6330302403743067
On Test Data :  0.7004606904284274




### Lasso Regression

In [17]:
from sklearn.linear_model import LassoCV, Lasso

In [18]:
ll = LassoCV(alphas=(0.1, 0.001, 0.0001, 0.05, 0.5, 0.005))
ll.fit(x_train, y_train)

  y = column_or_1d(y, warn=True)


LassoCV(alphas=(0.1, 0.001, 0.0001, 0.05, 0.5, 0.005), copy_X=True, cv='warn',
        eps=0.001, fit_intercept=True, max_iter=1000, n_alphas=100, n_jobs=None,
        normalize=False, positive=False, precompute='auto', random_state=None,
        selection='cyclic', tol=0.0001, verbose=False)

In [19]:
ll.alpha_

0.005

In [20]:
las = Lasso(alpha=ll.alpha_) # Substituting the picked alpha value from above 
las.fit(x_train, y_train)

print(las.score(x_test, y_test))
print(las.score(x_train, y_train))

print("On Train Data : ",cross_val_score(las, x_train, y_train).mean())
print("On Test Data : ",cross_val_score(las, x_test, y_test).mean())

0.7775533729842701
0.7259262673322726
On Train Data :  0.6330995004226238
On Test Data :  0.6998302792795862


