## Regularization -Deep Neuron AI

In [1]:
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sb

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso,Ridge

from sklearn.metrics import mean_absolute_error,mean_squared_error

import warnings
warnings.filterwarnings("ignore")

In [2]:
sb.set_style("darkgrid")
plt.rcParams['font.size'] = 16
plt.rcParams['figure.figsize']=(14,7)
plt.rcParams['figure.facecolor'] = '#FFF'

In [3]:
data = pd.read_csv("car_prediction.csv")

In [4]:
numerical = list()
category = list()

minmax = MinMaxScaler()
lblenc = LabelEncoder()

### Feature Selection

In [5]:
features = ['CarName','horsepower','enginesize','peakrpm','highwaympg','doornumber','carlength']

In [6]:
X = data[features]
Y = data.price

In [7]:
for col in X:
    if X[col].dtype == "O":
        category.append(col)
    else:
        numerical.append(col)

In [8]:
numerical

['horsepower', 'enginesize', 'peakrpm', 'highwaympg', 'carlength']

In [9]:
X[numerical] = minmax.fit_transform(X[numerical])

In [10]:
for cat_col in category:
    X[cat_col] = lblenc.fit_transform(X[cat_col])

In [11]:
X

Unnamed: 0,CarName,horsepower,enginesize,peakrpm,highwaympg,doornumber,carlength
0,2,0.262500,0.260377,0.346939,0.289474,1,0.413433
1,3,0.262500,0.260377,0.346939,0.289474,1,0.413433
2,1,0.441667,0.343396,0.346939,0.263158,1,0.449254
3,4,0.225000,0.181132,0.551020,0.368421,0,0.529851
4,5,0.279167,0.283019,0.551020,0.157895,0,0.529851
...,...,...,...,...,...,...,...
200,139,0.275000,0.301887,0.510204,0.315789,0,0.711940
201,138,0.466667,0.301887,0.469388,0.236842,0,0.711940
202,140,0.358333,0.422642,0.551020,0.184211,0,0.711940
203,142,0.241667,0.316981,0.265306,0.289474,0,0.711940


In [12]:
Y

0      13495.0
1      16500.0
2      16500.0
3      13950.0
4      17450.0
        ...   
200    16845.0
201    19045.0
202    21485.0
203    22470.0
204    22625.0
Name: price, Length: 205, dtype: float64

In [13]:
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.23)

In [14]:
x_train.shape

(157, 7)

In [15]:
x_test.shape

(48, 7)

In [16]:
y_train.shape

(157,)

In [17]:
model = LinearRegression()
model.fit(x_train,y_train)

LinearRegression()

In [18]:
car_predict = model.predict(x_test)

In [19]:
model.score(x_train,y_train) * 100

84.45158909499732

In [20]:
model.score(x_test,y_test) *100

77.36343253210481

In [21]:
abs_error = mean_absolute_error(y_test,car_predict)

In [22]:
abs_error

3173.0233798835775

In [23]:
mse = mean_squared_error(y_test,car_predict,squared=False)

In [24]:
mse

4165.966518263472

In [25]:
x_test[:10]

Unnamed: 0,CarName,horsepower,enginesize,peakrpm,highwaympg,doornumber,carlength
151,120,0.058333,0.116981,0.265306,0.578947,1,0.262687
80,66,0.283333,0.184906,0.55102,0.368421,1,0.476119
113,85,0.195833,0.222642,0.346939,0.210526,0,0.862687
198,143,0.475,0.260377,0.387755,0.157895,0,0.71194
169,126,0.283333,0.320755,0.265306,0.368421,1,0.523881
3,4,0.225,0.181132,0.55102,0.368421,0,0.529851
41,39,0.220833,0.184906,0.673469,0.315789,0,0.51194
55,53,0.220833,0.033962,0.755102,0.184211,1,0.416418
31,43,0.116667,0.116981,0.755102,0.578947,1,0.052239
167,122,0.283333,0.320755,0.265306,0.368421,1,0.523881


### Ridge Regularization (L2)

penalizes the size (square of the magnitude) of the regression coefficients

In [26]:
ridge_model = Ridge(alpha = 0.01)
ridge_model.fit(x_train, y_train)

Ridge(alpha=0.01)

In [27]:
ridge_model.score(x_train,y_train)

0.8445125408183769

In [28]:
ridge_model.score(x_test,y_test)

0.7734440195594472

### Lasso Regularization (L1)

Lasso Regression (Least Absolute Shrinkage and Selection Operator) adds “Absolute value of magnitude” of coefficient, as penalty term to the loss function.

In [29]:
lasso_model = Lasso(alpha = 0.02, tol = 0.01)

In [30]:
lasso_model.fit(x_train, y_train)

Lasso(alpha=0.02, tol=0.01)

In [31]:
lasso_model.score(x_train,y_train) 

0.84451588726031

In [32]:
lasso_model.score(x_test,y_test)

0.7736321040449055