## Importing Necessary Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.preprocessing import PolynomialFeatures, StandardScaler,MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score

In [2]:
df=pd.read_csv("model_selection_data.csv")
#https://drive.google.com/file/d/1tX62d7cA8Vb_5zOEuhwjE5vLZ5aq7RjG/view?usp=drive_link

In [4]:
df.head(2)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6


In [5]:
# The Boston Housing Dataset
# The Boston Housing Dataset is a derived from information collected by the U.S. Census Service concerning housing in the area of Boston MA. The following describes the dataset columns:
# CRIM - per capita crime rate by town
# ZN - proportion of residential land zoned for lots over 25,000 sq.ft.
# INDUS - proportion of non-retail business acres per town.
# CHAS - Charles River dummy variable (1 if tract bounds river; 0 otherwise)
# NOX - nitric oxides concentration (parts per 10 million)
# RM - average number of rooms per dwelling
# AGE - proportion of owner-occupied units built prior to 1940
# DIS - weighted distances to five Boston employment centres
# RAD - index of accessibility to radial highways
# TAX - full-value property-tax rate per $10,000
# PTRATIO - pupil-teacher ratio by town
# B - 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
# LSTAT - % lower status of the population
# Target - Median value of owner-occupied homes in $1000's

## Missing Values

In [6]:
df.isnull().mean()

CRIM       0.0
ZN         0.0
INDUS      0.0
CHAS       0.0
NOX        0.0
RM         0.0
AGE        0.0
DIS        0.0
RAD        0.0
TAX        0.0
PTRATIO    0.0
B          0.0
LSTAT      0.0
Target     0.0
dtype: float64

## EDA

In [6]:
# skipping in this problem statement considering the major focus on modelling part
# But EDA is always a mandatory step in every modelling project

In [7]:
X=df.drop(["Target"],axis=1)
y=df["Target"]

In [8]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)

In [9]:
X_train.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
477,15.0234,0.0,18.1,0.0,0.614,5.304,97.3,2.1007,24.0,666.0,20.2,349.48,24.91
15,0.62739,0.0,8.14,0.0,0.538,5.834,56.5,4.4986,4.0,307.0,21.0,395.62,8.47
332,0.03466,35.0,6.06,0.0,0.4379,6.031,23.3,6.6407,1.0,304.0,16.9,362.25,7.83
423,7.05042,0.0,18.1,0.0,0.614,6.103,85.1,2.0218,24.0,666.0,20.2,2.52,23.29
19,0.7258,0.0,8.14,0.0,0.538,5.727,69.5,3.7965,4.0,307.0,21.0,390.95,11.28


In [10]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(404, 13)
(102, 13)
(404,)
(102,)


## Base Model

In [None]:
#data --> standardscaling --> polynomialfeatures --> linearregression

In [11]:
# Pipeline Creation and fitting
linear_reg_pipeline=make_pipeline(StandardScaler(),PolynomialFeatures(degree=2),LinearRegression())
linear_reg_pipeline.fit(X_train, y_train)

In [12]:
# Predictions
y_pred_linear_reg_train = linear_reg_pipeline.predict(X_train)
y_pred_linear_reg_test = linear_reg_pipeline.predict(X_test)

In [13]:
# R2 Calculations
r2_linear_reg_train = r2_score(y_train, y_pred_linear_reg_train)
r2_linear_reg_test = r2_score(y_test, y_pred_linear_reg_test)

In [14]:
print("Training R-squared:")
print(f"Linear Regression: {r2_linear_reg_train:.2f}")
print("\nTesting R-squared:")
print(f"Linear Regression: {r2_linear_reg_test:.2f}")

Training R-squared:
Linear Regression: 0.94

Testing R-squared:
Linear Regression: 0.81


In [15]:
# Sign of overfitting because there is a difference of 13% in train and test performance

## Ridge Regression (Regularisation Technique)

In [40]:
ridge_reg_pipeline=make_pipeline(StandardScaler(),PolynomialFeatures(degree=2),Ridge(alpha=85))
ridge_reg_pipeline.fit(X_train, y_train)

In [41]:
# Predictions
y_pred_ridge_reg_train = ridge_reg_pipeline.predict(X_train)
y_pred_ridge_reg_test = ridge_reg_pipeline.predict(X_test)

In [42]:
# R2 Calculations
r2_ridge_reg_train = r2_score(y_train, y_pred_ridge_reg_train)
r2_ridge_reg_test = r2_score(y_test, y_pred_ridge_reg_test)

In [43]:
print("Training R-squared:")
print(f"Ridge Regression: {r2_ridge_reg_train:.2f}")
print("\nTesting R-squared:")
print(f"Ridge Regression: {r2_ridge_reg_test:.2f}")

Training R-squared:
Ridge Regression: 0.89

Testing R-squared:
Ridge Regression: 0.82


In [44]:
#alpha=85, Train:89%, Test:82%
# Achieve as high as possible test r2 value with min diff with train r2 value

## Ridge Regression (Regularisation Technique)

In [66]:
ridge_reg_pipeline=make_pipeline(MinMaxScaler(),PolynomialFeatures(degree=2),Ridge(alpha=0.5))
ridge_reg_pipeline.fit(X_train, y_train)

In [67]:
# Predictions
y_pred_ridge_reg_train = ridge_reg_pipeline.predict(X_train)
y_pred_ridge_reg_test = ridge_reg_pipeline.predict(X_test)

In [68]:
# R2 Calculations
r2_ridge_reg_train = r2_score(y_train, y_pred_ridge_reg_train)
r2_ridge_reg_test = r2_score(y_test, y_pred_ridge_reg_test)

In [69]:
print("Training R-squared:")
print(f"Ridge Regression: {r2_ridge_reg_train:.2f}")
print("\nTesting R-squared:")
print(f"Ridge Regression: {r2_ridge_reg_test:.2f}")

Training R-squared:
Ridge Regression: 0.89

Testing R-squared:
Ridge Regression: 0.84


In [65]:
#alpha=2, Train:89%, Test:84%
# Achieve as high as possible test r2 value with min diff with train r2 value

In [None]:
y=7x1+8x2+0*x3

## Lasoo Regression (Regularisation Technique)

In [111]:
lasso_reg_pipeline=make_pipeline(StandardScaler(),PolynomialFeatures(degree=2),Lasso(alpha=0.45))
lasso_reg_pipeline.fit(X_train, y_train)

In [112]:
# Predictions
y_pred_lasso_reg_train = lasso_reg_pipeline.predict(X_train)
y_pred_lasso_reg_test = lasso_reg_pipeline.predict(X_test)

In [113]:
# R2 Calculations
r2_lasso_reg_train = r2_score(y_train, y_pred_lasso_reg_train)
r2_lasso_reg_test = r2_score(y_test, y_pred_lasso_reg_test)

In [114]:
print("Training R-squared:")
print(f"Lasso Regression: {r2_lasso_reg_train:.2f}")
print("\nTesting R-squared:")
print(f"Lasso Regression: {r2_lasso_reg_test:.2f}")

Training R-squared:
Lasso Regression: 0.83

Testing R-squared:
Lasso Regression: 0.78


In [115]:
#alpha=0.45, Train:83%, Test:78%

## Lasoo Regression (Regularisation Technique - MinMaxScaler)

In [128]:
lasso_reg_pipeline=make_pipeline(MinMaxScaler(),PolynomialFeatures(degree=2),Lasso(alpha=0.021))
lasso_reg_pipeline.fit(X_train, y_train)

  model = cd_fast.enet_coordinate_descent(


In [129]:
# Predictions
y_pred_lasso_reg_train = lasso_reg_pipeline.predict(X_train)
y_pred_lasso_reg_test = lasso_reg_pipeline.predict(X_test)

In [130]:
# R2 Calculations
r2_lasso_reg_train = r2_score(y_train, y_pred_lasso_reg_train)
r2_lasso_reg_test = r2_score(y_test, y_pred_lasso_reg_test)

In [131]:
print("Training R-squared:")
print(f"Lasso Regression: {r2_lasso_reg_train:.2f}")
print("\nTesting R-squared:")
print(f"Lasso Regression: {r2_lasso_reg_test:.2f}")

Training R-squared:
Lasso Regression: 0.85

Testing R-squared:
Lasso Regression: 0.82


In [None]:
#alpha=0.021, Train:85%, Test:82%

In [None]:
#[0.0001,0.001,0.01,0.1,1,10,100,1000,10000]

In [None]:
# Best Results for all iterations

#alpha=85, Train:89%, Test:82% (Ridge + StandardScaler) (Rejected)
#alpha=2, Train:89%, Test:84% (Ridge + MinMaxScaler) (First Choice)
#alpha=0.45, Train:83%, Test:78%  (Lasso + StandardScaler) (Rejected)
#alpha=0.021, Train:85%, Test:82%  (Lasso + MinMaxScaler) (Second Choice)

### Outcome:
# MinMaxScaler works better than StandardScaler in this case
# Ridge works better than Lasso in this case