# Machine Learning for Absolute Beginners

## Lasso and Ridge Regression


## 0. Load Libraries

In [1]:
import pandas as pd

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

## 1. Load Dataset

In [2]:
boston = load_boston()

In [3]:
features = boston.data
target = boston.target

In [4]:
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

In [5]:
df = pd.DataFrame(features_standardized, columns=boston.feature_names)

In [6]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.419782,0.28483,-1.287909,-0.272599,-0.144217,0.413672,-0.120013,0.140214,-0.982843,-0.666608,-1.459,0.441052,-1.075562
1,-0.417339,-0.487722,-0.593381,-0.272599,-0.740262,0.194274,0.367166,0.55716,-0.867883,-0.987329,-0.303094,0.441052,-0.492439
2,-0.417342,-0.487722,-0.593381,-0.272599,-0.740262,1.282714,-0.265812,0.55716,-0.867883,-0.987329,-0.303094,0.396427,-1.208727
3,-0.41675,-0.487722,-1.306878,-0.272599,-0.835284,1.016303,-0.809889,1.077737,-0.752922,-1.106115,0.113032,0.416163,-1.361517
4,-0.412482,-0.487722,-1.306878,-0.272599,-0.835284,1.228577,-0.51118,1.077737,-0.752922,-1.106115,0.113032,0.441052,-1.026501


In [7]:
df['MEDV'] = target

In [8]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,-0.419782,0.28483,-1.287909,-0.272599,-0.144217,0.413672,-0.120013,0.140214,-0.982843,-0.666608,-1.459,0.441052,-1.075562,24.0
1,-0.417339,-0.487722,-0.593381,-0.272599,-0.740262,0.194274,0.367166,0.55716,-0.867883,-0.987329,-0.303094,0.441052,-0.492439,21.6
2,-0.417342,-0.487722,-0.593381,-0.272599,-0.740262,1.282714,-0.265812,0.55716,-0.867883,-0.987329,-0.303094,0.396427,-1.208727,34.7
3,-0.41675,-0.487722,-1.306878,-0.272599,-0.835284,1.016303,-0.809889,1.077737,-0.752922,-1.106115,0.113032,0.416163,-1.361517,33.4
4,-0.412482,-0.487722,-1.306878,-0.272599,-0.835284,1.228577,-0.51118,1.077737,-0.752922,-1.106115,0.113032,0.441052,-1.026501,36.2


## 2. Apply Lasso Regression

Ridge and Lasso regression are techniques to reduce *model complexity* and prevent *over-fitting* which may result from linear regression.


In [44]:
X = df.drop(['MEDV'],axis=1)
y = df['MEDV']

In [45]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=0)

In [51]:
lasso = Lasso(alpha=0.5)

In [52]:
lasso.fit(features_standardized,target)

Lasso(alpha=0.5)

In [53]:
lasso.coef_

array([-0.11526463,  0.        , -0.        ,  0.39707879, -0.        ,
        2.97425861, -0.        , -0.17056942, -0.        , -0.        ,
       -1.59844856,  0.54313871, -3.66614361])

In [54]:
y_pred = lasso.predict(X_test)

In [55]:
r2_score(y_test,y_pred)

0.5982658824157939

## 3. Apply Ridge Regression

In [63]:
ridge = Ridge(alpha=0.5)

In [64]:
ridge.fit(features_standardized,target)

Ridge(alpha=0.5)

In [65]:
ridge.coef_

array([-0.92396151,  1.07393055,  0.12895159,  0.68346136, -2.0427575 ,
        2.67854971,  0.01627328, -3.09063352,  2.62636926, -2.04312573,
       -2.05646414,  0.8490591 , -3.73711409])

In [66]:
y_pred = ridge.predict(X_test)

In [67]:
r2_score(y_test,y_pred)

0.6596163569270437