# Regularization Code

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
df=pd.read_csv("Advertising.csv")
df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   Radio      200 non-null    float64
 2   Newspaper  200 non-null    float64
 3   Sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB


# EDA Exploratory Data Analysis

In [4]:
df.describe()

Unnamed: 0,TV,Radio,Newspaper,Sales
count,200.0,200.0,200.0,200.0
mean,147.0425,23.264,30.554,15.1305
std,85.854236,14.846809,21.778621,5.283892
min,0.7,0.0,0.3,1.6
25%,74.375,9.975,12.75,11.0
50%,149.75,22.9,25.75,16.0
75%,218.825,36.525,45.1,19.05
max,296.4,49.6,114.0,27.0


In [5]:
df.corr()

Unnamed: 0,TV,Radio,Newspaper,Sales
TV,1.0,0.054809,0.056648,0.901208
Radio,0.054809,1.0,0.354104,0.349631
Newspaper,0.056648,0.354104,1.0,0.15796
Sales,0.901208,0.349631,0.15796,1.0


In [18]:
X=df[['TV','Radio','Newspaper']]
y=df['Sales']

In [19]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

# Lasso Regression

In [8]:
#Modeling with default parameters

In [17]:
from sklearn.linear_model import Lasso    #import
lasso_model=Lasso()                    #save as a model name
lasso_model.fit(X_train,y_train)    #fit on train data

In [10]:
#Prediction

In [23]:
test_predictions=lasso_model.predict(X_test)
train_predictions=lasso_model.predict(X_train)

In [12]:
#Evaluation

In [24]:
from sklearn.metrics import mean_squared_error
train_rmse=np.sqrt(mean_squared_error(y_test,test_predictions))
test_rmse=np.sqrt(mean_squared_error(y_train,train_predictions))
print("train RMSE:",train_rmse)
print("test RMSE:",test_rmse)

train RMSE: 1.5903008459656902
test RMSE: 1.6866227172174735


In [26]:
#Hyperparameter tuning

In [27]:
from sklearn.model_selection import GridSearchCV 

In [29]:
estimator=Lasso()
param_grid={'alpha':[0.1,0.2,0.3,0.4,0.5,0.6,1]}

In [30]:
model_hp=GridSearchCV(estimator,param_grid,cv=5)

model_hp.fit(X_train,y_train)

model_hp.best_params_

{'alpha': 0.1}

# Ridge Regression

In [32]:
#Modelling with default parameters

In [33]:
from sklearn.linear_model import Ridge

ridge_model=Ridge()

ridge_model.fit(X_train,y_train)

In [34]:
#Prediction

In [35]:
test_predictions=ridge_model.predict(X_test)
train_predictions=ridge_model.predict(X_train)

In [37]:
#Evaluation

In [38]:
from sklearn.metrics import mean_squared_error
train_rmse=np.sqrt(mean_squared_error(y_test,test_predictions))
test_rmse=np.sqrt(mean_squared_error(y_train,train_predictions))
print("train RMSE:",train_rmse)
print("test RMSE:",test_rmse)

train RMSE: 1.5942537764655764
test RMSE: 1.685002274040875


In [39]:
#Hyperpapameter Tuning

In [40]:
#Identifying the best alpha value for Ridge Regression

In [41]:
from sklearn.model_selection import GridSearchCV

In [42]:
estimator=Ridge()

param_grid={"alpha":list(range(1,11))}

In [43]:
model_hp=GridSearchCV(estimator,param_grid,cv=5)

model_hp.fit(X_train,y_train)

model_hp.best_params_

{'alpha': 10}

# Elasticnet Regression

In [45]:
from sklearn.linear_model import ElasticNet

enr_model=ElasticNet()

enr_model.fit(X_train,y_train)

In [46]:
#Prediction

In [47]:
test_predictions=enr_model.predict(X_test)
train_predictions=enr_model.predict(X_train)

In [48]:
#Evalutions

In [49]:
from sklearn.metrics import mean_squared_error
train_rmse=np.sqrt(mean_squared_error(y_test,test_predictions))
test_rmse=np.sqrt(mean_squared_error(y_train,train_predictions))
print("train RMSE:",train_rmse)
print("test RMSE:",test_rmse)

train RMSE: 1.5922933855390244
test RMSE: 1.685473159626713


In [50]:
#Hyperparameter tuning

In [53]:
from sklearn.model_selection import GridSearchCV

In [61]:
estimator=ElasticNet()

param_grid = {"alpha":[0.1,0.2,1,2,3,5,10],
              "l1_ratio":[0.1,0.5,0.75,0.9,0.95,1]}

In [63]:
enr_hp=GridSearchCV(estimator,param_grid,cv=5)

enr_hp.fit(X_train,y_train)

enr_hp.best_params_

{'alpha': 2, 'l1_ratio': 0.1}