# <font color = "purple">Linear regression, Ridge regression </font>

In [27]:
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.model_selection import train_test_split,RandomizedSearchCV
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer, make_column_transformer, TransformedTargetRegressor
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV
from sklearn.metrics import mean_squared_error, r2_score, make_scorer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
from sklearn.neighbors import KNeighborsRegressor
%matplotlib inline

In [28]:
customers = pd.read_csv("Ecommerce Customers")

In [29]:
customers.head()

Unnamed: 0,Email,Address,Avatar,Avg. Session Length,Time on App,Time on Website,Length of Membership,Yearly Amount Spent
0,mstephenson@fernandez.com,"835 Frank Tunnel\nWrightmouth, MI 82180-9605",Violet,34.497268,12.655651,39.577668,4.082621,587.951054
1,hduke@hotmail.com,"4547 Archer Common\nDiazchester, CA 06566-8576",DarkGreen,31.926272,11.109461,37.268959,2.664034,392.204933
2,pallen@yahoo.com,"24645 Valerie Unions Suite 582\nCobbborough, D...",Bisque,33.000915,11.330278,37.110597,4.104543,487.547505
3,riverarebecca@gmail.com,"1414 David Throughway\nPort Jason, OH 22070-1220",SaddleBrown,34.305557,13.717514,36.721283,3.120179,581.852344
4,mstephens@davidson-herman.com,"14023 Rodriguez Passage\nPort Jacobville, PR 3...",MediumAquaMarine,33.330673,12.795189,37.536653,4.446308,599.406092


In [30]:
customers.describe()

Unnamed: 0,Avg. Session Length,Time on App,Time on Website,Length of Membership,Yearly Amount Spent
count,500.0,500.0,500.0,500.0,500.0
mean,33.053194,12.052488,37.060445,3.533462,499.314038
std,0.992563,0.994216,1.010489,0.999278,79.314782
min,29.532429,8.508152,33.913847,0.269901,256.670582
25%,32.341822,11.388153,36.349257,2.93045,445.038277
50%,33.082008,11.983231,37.069367,3.533975,498.887875
75%,33.711985,12.75385,37.716432,4.126502,549.313828
max,36.139662,15.126994,40.005182,6.922689,765.518462


In [31]:
customers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Email                 500 non-null    object 
 1   Address               500 non-null    object 
 2   Avatar                500 non-null    object 
 3   Avg. Session Length   500 non-null    float64
 4   Time on App           500 non-null    float64
 5   Time on Website       500 non-null    float64
 6   Length of Membership  500 non-null    float64
 7   Yearly Amount Spent   500 non-null    float64
dtypes: float64(5), object(3)
memory usage: 31.4+ KB


In [32]:
y = customers['Yearly Amount Spent']
X = customers[['Avg. Session Length', 'Time on App','Time on Website', 'Length of Membership']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [33]:
numeric_features  = ["Avg. Session Length" ,"Time on App" , "Time on Website" , "Length of Membership"]


In [34]:
numeric_preprocessing = make_pipeline(SimpleImputer(strategy='median'), 
                                      StandardScaler())
numeric_preprocessing

Pipeline(steps=[('simpleimputer', SimpleImputer(strategy='median')),
                ('standardscaler', StandardScaler())])

In [35]:
preprocessing = ColumnTransformer([
    ('numeric', numeric_preprocessing, numeric_features)
])


In [36]:
preprocessing.fit(X_train);

In [37]:
new_columns = numeric_features 
X_train_enc = pd.DataFrame(preprocessing.transform(X_train), index=X_train.index, columns=new_columns)
X_train_enc

Unnamed: 0,Avg. Session Length,Time on App,Time on Website,Length of Membership
202,-1.499941,-0.730834,-0.042984,0.274430
428,-1.163743,2.110040,-0.060729,0.200101
392,0.228480,-0.546783,0.048110,1.141258
86,0.846567,0.508317,0.072683,-0.887289
443,-0.004191,0.494168,0.580927,0.518859
...,...,...,...,...
63,-0.238887,-0.383563,0.336960,-0.129222
326,0.187526,-1.088966,1.400968,0.714731
337,-1.198424,0.448846,0.357796,-0.577043
11,0.848145,-0.473301,0.006833,0.174638


In [38]:
lin_rg = make_pipeline(preprocessing, LinearRegression())
lin_rg.fit(X_train_enc, y_train);
predictions= lin_rg.predict(X_test)

In [39]:
print('MAE:', metrics.mean_absolute_error(y_test, predictions))
print('MSE:', metrics.mean_squared_error(y_test, predictions))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions)))

MAE: 1522.2502419256443
MSE: 2317334.860754925
RMSE: 1522.2794949531853


# <font color = "purple"> Ridge regression</font>

In [40]:
ridge = Ridge(max_iter=200)
pipe = Pipeline([("preprocessing" , preprocessing), ("ridge" , ridge)])
param_choices = {
    "ridge__alpha" : np.arange(0,500)
}
random_search = RandomizedSearchCV(pipe, param_choices,
                                   n_iter = 50, 
                                   verbose = 1,
                                   random_state = 123)

In [41]:
random_search.fit(X_train, y_train);

Fitting 5 folds for each of 50 candidates, totalling 250 fits


In [42]:
random_search.best_params_

{'ridge__alpha': 5}

In [43]:
random_search.best_score_

0.9797291636667446

# <font color = "purple"> KNeighbor regression</font>

In [81]:
knnreg = KNeighborsRegressor()
pipe = Pipeline([("preprocessing" , preprocessing), ("knnreg" , knnreg)])
param_choices = {
    "knnreg__n_neighbors" : np.arange(1,500)
}
random_search2 = RandomizedSearchCV(pipe, param_choices,
                                   n_iter = 50, 
                                   verbose = 1,
                                   random_state = 123)

In [84]:
random_search2.fit(X_train, y_train);

Fitting 5 folds for each of 50 candidates, totalling 250 fits


Traceback (most recent call last):
  File "E:\anacondajadid\lib\site-packages\sklearn\model_selection\_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "E:\anacondajadid\lib\site-packages\sklearn\metrics\_scorer.py", line 397, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\utils\metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\pipeline.py", line 622, in score
    return self.steps[-1][-1].score(Xt, y, **score_params)
  File "E:\anacondajadid\lib\site-packages\sklearn\base.py", line 553, in score
    y_pred = self.predict(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_regression.py", line 208, in predict
    neigh_dist, neigh_ind = self.kneighbors(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_base.py", line 683, in kneighbors
    (n_s

Traceback (most recent call last):
  File "E:\anacondajadid\lib\site-packages\sklearn\model_selection\_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "E:\anacondajadid\lib\site-packages\sklearn\metrics\_scorer.py", line 397, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\utils\metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\pipeline.py", line 622, in score
    return self.steps[-1][-1].score(Xt, y, **score_params)
  File "E:\anacondajadid\lib\site-packages\sklearn\base.py", line 553, in score
    y_pred = self.predict(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_regression.py", line 208, in predict
    neigh_dist, neigh_ind = self.kneighbors(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_base.py", line 683, in kneighbors
    (n_s

Traceback (most recent call last):
  File "E:\anacondajadid\lib\site-packages\sklearn\model_selection\_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "E:\anacondajadid\lib\site-packages\sklearn\metrics\_scorer.py", line 397, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\utils\metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\pipeline.py", line 622, in score
    return self.steps[-1][-1].score(Xt, y, **score_params)
  File "E:\anacondajadid\lib\site-packages\sklearn\base.py", line 553, in score
    y_pred = self.predict(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_regression.py", line 208, in predict
    neigh_dist, neigh_ind = self.kneighbors(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_base.py", line 683, in kneighbors
    (n_s

Traceback (most recent call last):
  File "E:\anacondajadid\lib\site-packages\sklearn\model_selection\_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "E:\anacondajadid\lib\site-packages\sklearn\metrics\_scorer.py", line 397, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\utils\metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\pipeline.py", line 622, in score
    return self.steps[-1][-1].score(Xt, y, **score_params)
  File "E:\anacondajadid\lib\site-packages\sklearn\base.py", line 553, in score
    y_pred = self.predict(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_regression.py", line 208, in predict
    neigh_dist, neigh_ind = self.kneighbors(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_base.py", line 683, in kneighbors
    (n_s

Traceback (most recent call last):
  File "E:\anacondajadid\lib\site-packages\sklearn\model_selection\_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "E:\anacondajadid\lib\site-packages\sklearn\metrics\_scorer.py", line 397, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\utils\metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\pipeline.py", line 622, in score
    return self.steps[-1][-1].score(Xt, y, **score_params)
  File "E:\anacondajadid\lib\site-packages\sklearn\base.py", line 553, in score
    y_pred = self.predict(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_regression.py", line 208, in predict
    neigh_dist, neigh_ind = self.kneighbors(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_base.py", line 683, in kneighbors
    (n_s

Traceback (most recent call last):
  File "E:\anacondajadid\lib\site-packages\sklearn\model_selection\_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "E:\anacondajadid\lib\site-packages\sklearn\metrics\_scorer.py", line 397, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\utils\metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\pipeline.py", line 622, in score
    return self.steps[-1][-1].score(Xt, y, **score_params)
  File "E:\anacondajadid\lib\site-packages\sklearn\base.py", line 553, in score
    y_pred = self.predict(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_regression.py", line 208, in predict
    neigh_dist, neigh_ind = self.kneighbors(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_base.py", line 683, in kneighbors
    (n_s

Traceback (most recent call last):
  File "E:\anacondajadid\lib\site-packages\sklearn\model_selection\_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "E:\anacondajadid\lib\site-packages\sklearn\metrics\_scorer.py", line 397, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\utils\metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\pipeline.py", line 622, in score
    return self.steps[-1][-1].score(Xt, y, **score_params)
  File "E:\anacondajadid\lib\site-packages\sklearn\base.py", line 553, in score
    y_pred = self.predict(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_regression.py", line 208, in predict
    neigh_dist, neigh_ind = self.kneighbors(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_base.py", line 683, in kneighbors
    (n_s

Traceback (most recent call last):
  File "E:\anacondajadid\lib\site-packages\sklearn\model_selection\_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "E:\anacondajadid\lib\site-packages\sklearn\metrics\_scorer.py", line 397, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\utils\metaestimators.py", line 120, in <lambda>
    out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
  File "E:\anacondajadid\lib\site-packages\sklearn\pipeline.py", line 622, in score
    return self.steps[-1][-1].score(Xt, y, **score_params)
  File "E:\anacondajadid\lib\site-packages\sklearn\base.py", line 553, in score
    y_pred = self.predict(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_regression.py", line 208, in predict
    neigh_dist, neigh_ind = self.kneighbors(X)
  File "E:\anacondajadid\lib\site-packages\sklearn\neighbors\_base.py", line 683, in kneighbors
    (n_s

In [83]:
random_search2.best_params_

{'knnreg__n_neighbors': 6}