In [2]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler

In [6]:
df_housing = pd.read_csv('housing-data.csv')
df_housing.head()

Unnamed: 0,DATE,HOUSTNSA
0,01-01-1959,96.2
1,01-02-1959,99.0
2,01-03-1959,127.7
3,01-04-1959,150.8
4,01-05-1959,152.5


In [None]:
df_housing.columns

In [None]:
# To scale numeric features, select numeric feaures
df_housing_numeric = df_housing[['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']]

In [None]:
# Scale the numeric feaures and convert to DataFrame
X_scaler = StandardScaler()
num_scaled = X_scaler.fit_transform(df_housing_numeric)
df_num_scaled = pd.DataFrame(num_scaled, columns = df_housing_numeric.columns)
df_num_scaled.head()

In [None]:
X = pd.concat([df_num_scaled, df_housing['CHAS']], axis = 1)
y = df_housing['PRICE']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=0)

In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred_train = lr.predict(X_train)
y_pred_test = lr.predict(X_test)

train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)
print('Train R2 ', train_r2)
print('Test R2  ', test_r2)

## Regularization

### Ridge

In [None]:
# Build and evaluate Ridge model
ridge_model = Ridge(alpha=15)
ridge_model.fit(X_train, y_train)

y_pred_train = ridge_model.predict(X_train)
y_pred_test = ridge_model.predict(X_test)

train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)
print('Train R2 ', train_r2)
print('Test R2  ', test_r2)


In [None]:
lr.coef_

In [None]:
ridge_model.coef_

In [None]:
# Build and evaluate Lasso model

lasso_model = Lasso(alpha=0.5)
lasso_model.fit(X_train, y_train)

y_pred_train = lasso_model.predict(X_train)
y_pred_test = lasso_model.predict(X_test)

train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)
print('Train R2 ', train_r2)
print('Test R2  ', test_r2)


In [None]:
lasso_model.coef_

In [None]:
# Build and evaluate ElasticNet  model
en_model = ElasticNet(alpha=1, l1_ratio = 0.5)
en_model.fit(X_train, y_train)

y_pred_train = en_model.predict(X_train)
y_pred_test = en_model.predict(X_test)

train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)
print('Train R2 ', train_r2)
print('Test R2  ', test_r2)

## Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
# Grid Search - Ridge
param = {'alpha':[0.01,0.1, 1, 10, 20, 30]}

model = Ridge()

grid_cv = GridSearchCV(estimator=model, param_grid= param, cv=5, 
                       scoring='r2')
grid_cv.fit(X_train, y_train)
grid_cv.best_params_


In [None]:
# Lasso
param = {'alpha':[ 0.001, 0.01,  0.1, 1]}

model = Lasso()

grid_cv = GridSearchCV(estimator=model, param_grid= param, cv=5, scoring='r2' )
grid_cv.fit(X_train, y_train)
grid_cv.best_params_


In [None]:
# ElasticNet
param = {'alpha':[ 0.01, 0.1, 0.3, 0.5],
         'l1_ratio': [0.3, 0.5, 0.7, 0.9]}

algo_name = ElasticNet()

grid_cv = GridSearchCV(estimator=algo_name, param_grid= param, cv=5, scoring='r2' )
grid_cv.fit(X_train, y_train)
grid_cv.best_params_