# Regularization with SciKit-Learn

### Imports

In [131]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [132]:
df = pd.read_csv('Advertising.csv')

In [133]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


### Data and Setup

In [134]:
X = df.drop('sales', axis= 1)
y = df['sales']

### Polynomial Conversion

In [135]:
from sklearn.preprocessing import PolynomialFeatures

In [136]:
polynomial_converter = PolynomialFeatures(degree=3, include_bias=False)

In [137]:
poly_feature = polynomial_converter.fit_transform(X)

In [138]:
poly_feature.shape

(200, 19)

### Train | Test Split

In [139]:
from sklearn.model_selection import train_test_split

In [140]:
X_train, X_test, y_train, y_test = train_test_split(poly_feature, y, test_size=0.3, random_state=42)

### Scaling the Data

In [141]:
from sklearn.preprocessing import StandardScaler

In [142]:
scaler = StandardScaler()

In [143]:
scaler.fit(X_train)

In [144]:
X_train = scaler.transform(X_train)

In [145]:
X_test = scaler.transform(X_test)

### Ridge Regression

In [146]:
from sklearn.linear_model import Ridge

In [147]:
ridge_model =  Ridge(alpha=10)

In [148]:
ridge_model.fit(X_train, y_train)

In [149]:
y_prediction = ridge_model.predict(X_test)

In [150]:
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [151]:
RAE= mean_absolute_error(y_test, y_prediction)
RAE

0.6296591346758602

In [152]:
RMSE = np.sqrt(mean_squared_error(y_test, y_prediction))
RMSE

0.8916327541710874

### Choosing an alpha value with Cross-Validation

In [153]:
from sklearn.linear_model import RidgeCV

In [154]:
ridge_cv_model = RidgeCV(alphas=(0.1, 1.0, 10.0),cv=10, scoring='neg_mean_absolute_error')

In [155]:
ridge_cv_model.fit(X_train, y_train)

In [156]:
ridge_cv_model.alpha_

0.1

In [157]:
from sklearn.metrics import SCORERS

In [158]:
# SCORERS.keys()

In [159]:
y_prediction = ridge_cv_model.predict(X_test)

In [160]:
MAE = mean_absolute_error(y_test, y_prediction)
MAE

0.46671241131138985

In [161]:
RMSE = np.sqrt(mean_squared_error(y_test,y_prediction))
RMSE

0.5945136671793954

In [162]:
ridge_cv_model.coef_

array([ 5.90523815,  0.46316396,  0.68028713, -6.17743395,  3.73671928,
       -1.40708382,  0.00624704,  0.11128917, -0.2617823 ,  2.17135744,
       -0.51480159,  0.70587211,  0.60311504, -0.53271216,  0.5716495 ,
       -0.34685826,  0.36744388, -0.03938079, -0.12192939])