In [41]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error

In [3]:
df = pd.read_excel('Xiaomi Raw Data.xlsx')

In [4]:
df.head()

Unnamed: 0,Customer,Day,Browser,No. of Clicks,Pages Viewed,Time (min),Amount Spent (Rs.)
0,1,Monday,Chrome,13,4,8.5,2668.0
1,2,Saturday,Chrome,15,5,16.9,3469.0
2,3,Monday,Internet Explorer,20,6,7.3,5215.0
3,4,Monday,Chrome,42,6,30.1,10423.0
4,5,Saturday,Internet Explorer,20,6,13.3,5404.0


In [9]:
le = LabelEncoder()
df['Day'] = le.fit_transform(df['Day'])
df['Browser'] = le.fit_transform(df['Browser'])

In [10]:
df.head()

Unnamed: 0,Customer,Day,Browser,No. of Clicks,Pages Viewed,Time (min),Amount Spent (Rs.)
0,1,1,0,13,4,8.5,2668.0
1,2,2,0,15,5,16.9,3469.0
2,3,1,2,20,6,7.3,5215.0
3,4,1,0,42,6,30.1,10423.0
4,5,2,2,20,6,13.3,5404.0


In [11]:
x = df.drop(['Customer','Amount Spent (Rs.)'], axis=1)

In [12]:
y = df['Amount Spent (Rs.)']

In [14]:
x_train, x_test, y_train, y_test = train_test_split (x,y, test_size =0.3, random_state = 42)

In [16]:
lr = LinearRegression()

In [17]:
lr.fit(x_train,y_train)

LinearRegression()

In [18]:
lr.score(x_test,y_test)

0.8869885601945395

In [20]:
y_pred = lr.predict(x_test)

In [21]:
mean_squared_error(y_test,y_pred)

1159618.3697958344

In [22]:
np.sqrt(mean_squared_error(y_test,y_pred))

1076.855779478308

# Lasso Regression

In [48]:
ls = Lasso(alpha = 0.1)

In [49]:
ls.fit(x_train, y_train)

Lasso(alpha=0.1)

In [50]:
ls.score(x_test, y_test)

0.88698872154046

In [51]:
y_pred = ls.predict(x_test)

In [52]:
np.sqrt(mean_squared_error(y_test,y_pred))

1076.8550107669753

# Ridge Regression

In [30]:
rd = Ridge()

In [31]:
rd.fit(x_train, y_train)

Ridge()

In [32]:
y_pred = rd.predict(x_test)

In [34]:
rd.score(x_test, y_test)

0.8869885661424748

In [35]:
np.sqrt(mean_squared_error(y_test,y_pred))

1076.855751140166

# Manual Hyperparameter Tuning

In [40]:
for i in range(1,10):
    ls = Lasso(alpha= i)
    ls.fit(x_train, y_train)    
    y_pred = ls.predict(x_test)
    print(i, np.sqrt(mean_squared_error(y_test,y_pred)))   
    

1 1076.8490476991183
2 1076.8442770873987
3 1076.8413879038508
4 1076.8405400123618
5 1076.8414950705567
6 1076.8443730461834
7 1076.8491739238254
8 1076.8562115200243
9 1076.8648595288516


# GridSearchCV Hyper Parameter Tuning

In [42]:
#Creating Dictionary for parameters
param = {'alpha': [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]}

In [44]:
gcv = GridSearchCV(ls, param, cv = 10)

In [45]:
#Fitting into GridSearchCV
gcv.fit(x,y)

GridSearchCV(cv=10, estimator=Lasso(alpha=9),
             param_grid={'alpha': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
                                   1]})

In [46]:
#to get the best score
gcv.best_score_

0.8869221901249815

In [47]:
#to get the best parameter value
gcv.best_params_

{'alpha': 0.1}