In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso, RidgeCV, LassoCV, ElasticNet, ElasticNetCV, LinearRegression
from sklearn.model_selection import train_test_split

import statsmodels.api as sm

import matplotlib.pyplot as plt
import seaborn as sns
import os

from pandas_profiling import ProfileReport

In [2]:
%autosave 300

Autosaving every 300 seconds


In [3]:
os.listdir()

['.ipynb_checkpoints',
 'admission_prediction.csv',
 'advertising.csv',
 'in1__linear regression.ipynb',
 'in2__feature selection and regularization.ipynb',
 'in3__regularization norms.ipynb']

In [4]:
df = pd.read_csv("Admission_Prediction.csv")
df

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337.0,118.0,4.0,4.5,4.5,9.65,1,0.92
1,2,324.0,107.0,4.0,4.0,4.5,8.87,1,0.76
2,3,,104.0,3.0,3.0,3.5,8.00,1,0.72
3,4,322.0,110.0,3.0,3.5,2.5,8.67,1,0.80
4,5,314.0,103.0,2.0,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...,...
495,496,332.0,108.0,5.0,4.5,4.0,9.02,1,0.87
496,497,337.0,117.0,5.0,5.0,5.0,9.87,1,0.96
497,498,330.0,120.0,5.0,4.5,5.0,9.56,1,0.93
498,499,312.0,103.0,4.0,4.0,5.0,8.43,0,0.73


In [5]:
df['TOEFL Score'].unique()

array([118., 107., 104., 110., 103., 115., 109., 101., 102., 108., 106.,
       111., 112., 105., 114., 116., 119., 120.,  98.,  93.,  99.,  97.,
       117., 113., 100.,  nan,  95.,  96.,  94.,  92.])

#### *We're gonna regress against the 'Chance of Admit'*

In [7]:
ProfileReport(df).to_widgets()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]

VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…

In [8]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337.0,118.0,4.0,4.5,4.5,9.65,1,0.92
1,2,324.0,107.0,4.0,4.0,4.5,8.87,1,0.76
2,3,,104.0,3.0,3.0,3.5,8.0,1,0.72
3,4,322.0,110.0,3.0,3.5,2.5,8.67,1,0.8
4,5,314.0,103.0,2.0,2.0,3.0,8.21,0,0.65


In [9]:
### It was emphasized in the lecture that this ain't the best way of imputation.
## Going forward we'll learn new and better ways..

# Imputing missing GRE values

df['GRE Score'] = df['GRE Score'].fillna(df['GRE Score'].mean())

In [10]:
df[df['GRE Score'].isnull()]

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit


In [11]:
# Imputing missing TOEFL values

df['TOEFL Score'] = df['TOEFL Score'].fillna(df['TOEFL Score'].mean())

In [12]:
df[df['TOEFL Score'].isnull()]

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit


In [13]:
## Imputing missing TOEFL values

df['University Rating'] = df['University Rating'].fillna(df['University Rating'].mean())

In [14]:
df[df['University Rating'].isnull()]

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit


In [15]:
df

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337.000000,118.0,4.0,4.5,4.5,9.65,1,0.92
1,2,324.000000,107.0,4.0,4.0,4.5,8.87,1,0.76
2,3,316.558763,104.0,3.0,3.0,3.5,8.00,1,0.72
3,4,322.000000,110.0,3.0,3.5,2.5,8.67,1,0.80
4,5,314.000000,103.0,2.0,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...,...
495,496,332.000000,108.0,5.0,4.5,4.0,9.02,1,0.87
496,497,337.000000,117.0,5.0,5.0,5.0,9.87,1,0.96
497,498,330.000000,120.0,5.0,4.5,5.0,9.56,1,0.93
498,499,312.000000,103.0,4.0,4.0,5.0,8.43,0,0.73


In [16]:
## We don't need serial no. column

try:
    df.drop(['Serial No.'], axis=1, inplace=True)
except Exception as e:
    print(e)

In [17]:
df.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,337.0,118.0,4.0,4.5,4.5,9.65,1,0.92
1,324.0,107.0,4.0,4.0,4.5,8.87,1,0.76
2,316.558763,104.0,3.0,3.0,3.5,8.0,1,0.72
3,322.0,110.0,3.0,3.5,2.5,8.67,1,0.8
4,314.0,103.0,2.0,2.0,3.0,8.21,0,0.65


In [18]:
y = df[['Chance of Admit']]
y

Unnamed: 0,Chance of Admit
0,0.92
1,0.76
2,0.72
3,0.80
4,0.65
...,...
495,0.87
496,0.96
497,0.93
498,0.73


In [19]:
x = df.drop(['Chance of Admit'], axis=1)
x

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,337.000000,118.0,4.0,4.5,4.5,9.65,1
1,324.000000,107.0,4.0,4.0,4.5,8.87,1
2,316.558763,104.0,3.0,3.0,3.5,8.00,1
3,322.000000,110.0,3.0,3.5,2.5,8.67,1
4,314.000000,103.0,2.0,2.0,3.0,8.21,0
...,...,...,...,...,...,...,...
495,332.000000,108.0,5.0,4.5,4.0,9.02,1
496,337.000000,117.0,5.0,5.0,5.0,9.87,1
497,330.000000,120.0,5.0,4.5,5.0,9.56,1
498,312.000000,103.0,4.0,4.0,5.0,8.43,0


## Why Standardization (StandardScalar()) ?!

If my dataset is varying a lot, probably it might become difficult for my model to be find or recognize concrete relations between the features and features or features and label. So what if we could standardize the data **without changing its meaning per se**, then our model optimization is sure to increase.<br>
Behind the scenes, it ain't doing nothing but calculating the z-scores and converting the **Normal distribution** into **Standard normal** and for those who are not Normal distribution it will just standardize their statistics.
<br><br>
Moreover, Standardization of a dataset is a common requirement for many
machine learning estimators: they might behave badly if the
individual features do not more or less look like standard normally
distributed data (e.g. Gaussian with 0 mean and unit variance).

In [20]:
scaler = StandardScaler()
df1_x = scaler.fit_transform(x)

df1_x = pd.DataFrame(df1_x)
df1_x

Unnamed: 0,0,1,2,3,4,5,6
0,1.842741e+00,1.788542,0.778906,1.137360,1.098944,1.776806,0.886405
1,6.708143e-01,-0.031058,0.778906,0.632315,1.098944,0.485859,0.886405
2,5.124333e-15,-0.527313,-0.107877,-0.377773,0.017306,-0.954043,0.886405
3,4.905178e-01,0.465197,-0.107877,0.127271,-1.064332,0.154847,0.886405
4,-2.306679e-01,-0.692731,-0.994659,-1.387862,-0.523513,-0.606480,-1.128152
...,...,...,...,...,...,...,...
495,1.392000e+00,0.134360,1.665688,1.137360,0.558125,0.734118,0.886405
496,1.842741e+00,1.623124,1.665688,1.642404,1.639763,2.140919,0.886405
497,1.211704e+00,2.119379,1.665688,1.137360,1.639763,1.627851,0.886405
498,-4.109644e-01,-0.692731,0.778906,0.632315,1.639763,-0.242367,-1.128152


In [21]:
df1_x.columns = x.columns

In [22]:
df1_x.columns

Index(['GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'LOR', 'CGPA',
       'Research'],
      dtype='object')

In [23]:
ProfileReport(df1_x).to_widgets()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]

VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…

### Now we oughta deal with Multicollinearity, lets get on it..

In [24]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [25]:
df1_vifs = [variance_inflation_factor(df1_x, i) for i in range(len(df1_x.columns))]
df1_vifs  # variance inflation factors

[4.1532675722258245,
 3.792866110594645,
 2.5087682422787614,
 2.7757495092534947,
 2.0373076624897517,
 4.651669561154733,
 1.4593106786827281]

In [26]:
vif_df = pd.DataFrame()
vif_df['Features'] = x.columns
vif_df['VIF'] = df1_vifs

In [27]:
vif_df

Unnamed: 0,Features,VIF
0,GRE Score,4.153268
1,TOEFL Score,3.792866
2,University Rating,2.508768
3,SOP,2.77575
4,LOR,2.037308
5,CGPA,4.65167
6,Research,1.459311


**=>** Since each of 'em are less than 10, we needn't drop any columns from our features.

In [28]:
df1_x.describe()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,4.35052e-15,9.419132e-16,5.608847e-16,2.926548e-16,-1.3322680000000001e-17,3.091971e-15,-2.202682e-16
std,1.001002,1.001002,1.001002,1.001002,1.001002,1.001002,1.001002
min,-2.394225,-2.512331,-1.881441,-2.39795,-2.686789,-2.940115,-1.128152
25%,-0.681409,-0.692731,-0.9946589,-0.8828175,-0.5235128,-0.7430227,-1.128152
50%,5.124333e-15,-0.03105811,-0.1078766,0.1272712,0.01730621,-0.02720919,0.8864053
75%,0.6708143,0.796033,0.7789057,0.6323155,0.5581253,0.7672196,0.8864053
max,2.113186,2.119379,1.665688,1.642404,1.639763,2.223672,0.8864053


- Since my model would've already adapted to the data I've provided it to train so it goes w/o saying that it's not a good idea to test my model accuracy w/ the same dataset.
<br><br>
- So as to avoid this hurdle, from now on we beforehand will be splitting our dataset into two parts viz. 
    - Train dataset 
    - Test dataset

In [29]:
x_train, x_test, y_train, y_test = train_test_split(df1_x, y, test_size=0.25, random_state = 100)

## random_state: similar to np.random.seed(int_val) to fixate a certain dataset

In [30]:
x_train

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
219,-0.410964,-0.527313,-0.107877,0.127271,0.017306,-0.258918,-1.128152
391,0.129925,-0.196476,-0.107877,-1.387862,-0.523513,0.121746,-1.128152
375,-1.132150,-1.023567,-0.994659,-1.387862,-1.064332,-1.516763,-1.128152
19,-1.222298,-0.858149,-0.107877,0.127271,-0.523513,-0.126513,-1.128152
229,0.670814,0.630615,0.778906,-0.377773,-0.523513,0.717568,0.886405
...,...,...,...,...,...,...,...
343,-1.042002,-0.692731,-0.994659,-0.882817,0.017306,-0.738885,-1.128152
359,0.400370,-0.031058,-0.994659,-1.387862,-2.145970,-0.225816,-1.128152
323,-1.042002,-0.858149,-0.994659,-1.387862,-1.064332,-0.656132,-1.128152
280,-0.501113,-0.858149,-0.107877,1.137360,0.558125,0.105196,0.886405


In [31]:
x_test

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
69,1.031407,1.292288,0.778906,1.137360,0.558125,0.965827,0.886405
29,-0.591261,-1.354404,-0.994659,-1.892906,-1.605151,-2.112585,-1.128152
471,-0.501113,-0.692731,-0.107877,-1.387862,0.558125,-0.805087,-1.128152
344,-1.943484,-1.850658,-0.994659,-1.892906,-1.605151,-2.046382,-1.128152
54,0.490518,0.465197,-0.107877,-0.377773,0.017306,-0.954043,-1.128152
...,...,...,...,...,...,...,...
46,1.121555,1.126869,1.665688,0.632315,1.639763,1.197535,0.886405
50,-0.320816,-1.519822,-0.107877,-0.882817,1.098944,-0.457525,0.886405
78,-1.853336,-2.016077,-0.994659,-0.377773,-1.605151,-1.715370,0.886405
304,-0.320816,-0.196476,-0.994659,-0.882817,-1.605151,-0.242367,-1.128152


In [32]:
y_train

Unnamed: 0,Chance of Admit
219,0.74
391,0.71
375,0.38
19,0.62
229,0.82
...,...
343,0.59
359,0.81
323,0.62
280,0.68


In [33]:
y_test

Unnamed: 0,Chance of Admit
69,0.78
29,0.54
471,0.64
344,0.47
54,0.70
...,...
46,0.86
50,0.76
78,0.44
304,0.62


In [34]:
lm = LinearRegression()
lm

LinearRegression()

In [35]:
lm.fit(x_train, y_train)

LinearRegression()

In [36]:
import pickle

pickle.dump(lm, open("linear_mod.sav", "wb"))

In [37]:
os.listdir()

['.ipynb_checkpoints',
 'admission_prediction.csv',
 'admission_prediction_model.sav',
 'advertising.csv',
 'in1__linear regression.ipynb',
 'in2__feature selection and regularization.ipynb',
 'in3__regularization norms.ipynb']

In [38]:
x_test

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
69,1.031407,1.292288,0.778906,1.137360,0.558125,0.965827,0.886405
29,-0.591261,-1.354404,-0.994659,-1.892906,-1.605151,-2.112585,-1.128152
471,-0.501113,-0.692731,-0.107877,-1.387862,0.558125,-0.805087,-1.128152
344,-1.943484,-1.850658,-0.994659,-1.892906,-1.605151,-2.046382,-1.128152
54,0.490518,0.465197,-0.107877,-0.377773,0.017306,-0.954043,-1.128152
...,...,...,...,...,...,...,...
46,1.121555,1.126869,1.665688,0.632315,1.639763,1.197535,0.886405
50,-0.320816,-1.519822,-0.107877,-0.882817,1.098944,-0.457525,0.886405
78,-1.853336,-2.016077,-0.994659,-0.377773,-1.605151,-1.715370,0.886405
304,-0.320816,-0.196476,-0.994659,-0.882817,-1.605151,-0.242367,-1.128152


In [39]:
lm.predict([[-0.320816, 0.299778, -0.107877, 0.632315, 0.017306, 0.701017, -1.128152]])



array([[0.75839084]])

In [40]:
df

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,337.000000,118.0,4.0,4.5,4.5,9.65,1,0.92
1,324.000000,107.0,4.0,4.0,4.5,8.87,1,0.76
2,316.558763,104.0,3.0,3.0,3.5,8.00,1,0.72
3,322.000000,110.0,3.0,3.5,2.5,8.67,1,0.80
4,314.000000,103.0,2.0,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...
495,332.000000,108.0,5.0,4.5,4.0,9.02,1,0.87
496,337.000000,117.0,5.0,5.0,5.0,9.87,1,0.96
497,330.000000,120.0,5.0,4.5,5.0,9.56,1,0.93
498,312.000000,103.0,4.0,4.0,5.0,8.43,0,0.73


In [41]:
lm.predict([[324.000000, 107.0, 4.0, 4.0, 4., 8.87, 1]])



array([[8.55367947]])

- But if you remember our **'Chances of Admit'** are supposed to be in 0 to 1 range, the heck we're getting this unprecedented outcome for ?!
<br><br>
- Well it's not that unprecedented, it's just that whatever the hell you did with train data, you gotta do the same with the testing data. Ain't no biggie! 

**Note:** Now you must have got the idea why creating the object of StandardScaler() was important as you aren't supposed to create a new object here.

In [42]:
## Now try the very exact data after transformation

scaler.transform([[324.000000, 107.0, 4.0, 4.0, 4., 8.87, 1]])



array([[ 0.67081429, -0.03105811,  0.77890565,  0.63231549,  0.55812525,
         0.48585943,  0.88640526]])

In [43]:
lm.predict([[ 0.67081429, -0.03105811,  0.77890565,  0.63231549,  0.55812525,
         0.48585943,  0.88640526]])



array([[0.79167681]])

**=> Must have been clarified now!**

In [44]:
## Let's compare the accuracies for testing as well as training dataset:

print("Accuracy w testing data: ", lm.score(x_test, y_test)*100)
print("Accuracy w training data: ", lm.score(x_train, y_train)*100)

Accuracy w testing data:  82.62844735686966
Accuracy w training data:  81.68409566831644


In [45]:
## now lets compare the adjusted r-squares for both..

def adj_r2(x, y):
    r2 = lm.score(x, y)
    
    n = len(x.index)
    p = len(x.columns)
    
    return 1-(1-r2)*(n-1)/(n-p-1)

In [46]:
print("adjusted-Accuracy w testing data: ", adj_r2(x_test, y_test)*100)
# print("adjusted-Accuracy w training data: ", adj_r2(x_train, y_train)*100)

adjusted-Accuracy w testing data:  81.58912369446014


### # Let's build our actual model equation:

In [48]:
coefs = lm.coef_
cols = list(x.columns)
coefs, cols

(array([[0.015458  , 0.01908417, 0.00381077, 0.00315846, 0.01678637,
         0.07622763, 0.01400522]]),
 ['GRE Score',
  'TOEFL Score',
  'University Rating',
  'SOP',
  'LOR',
  'CGPA',
  'Research'])

In [49]:
lm.intercept_

array([0.7181156])

In [50]:
## Model equation:

f"{coefs[0][0]}*{cols[0]} + {coefs[0][1]}*{cols[1]} + {coefs[0][2]}*{cols[2]} + {coefs[0][3]}*{cols[3]} +\
{coefs[0][4]}*{cols[4]} + {coefs[0][5]}*{cols[5]} + {coefs[0][6]}*{cols[6]} + {lm.intercept_[0]}"

'0.015457995513533156*GRE Score + 0.019084173755603083*TOEFL Score + 0.003810774038518875*University Rating + 0.003158457873203794*SOP +0.016786367976654195*LOR + 0.07622763419234045*CGPA + 0.01400522000866257*Research + 0.7181156002659718'

## # Regularization: 

### Cross Validation

The idea behing the **cross validation** is that our whole dataset will be divided into the desired number of subsets and according to the number of iterations passed as parameter, any single of the divided subset will be used as test dataset (and rest of 'em, goes w/o saying, as training datasets).
<br><br>
    **=>** and by doing so, the **best possible value of lambda (or alpha)** will be calculated and by substituting it in the equation behind the scenes, eventually our model will be **regularized**.

### i. LASSO (L1 Norm)

In [51]:
lassocv = LassoCV(alphas=None, cv=5, max_iter=500000, normalize=True)
lassocv.fit(x_train, y_train)

  y = column_or_1d(y, warn=True)
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Lasso())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 


LassoCV(cv=5, max_iter=500000, normalize=True)

In [52]:
lassocv.alpha_ # found the best possible val of alpha by cross-validation

2.7745719225519405e-05

In [53]:
## Lasso Linear model

lasso = Lasso(alpha = lassocv.alpha_)
lasso.fit(x_train, y_train)

Lasso(alpha=2.7745719225519405e-05)

In [54]:
lasso.score(x_test, y_test)*100

82.6321706823379

### ii. Ridge (L2 Norm)

In [55]:
np.random.uniform(0, 10, 50)

array([5.39121848, 2.51403095, 7.17858607, 7.67365185, 4.51646341,
       3.9869272 , 7.6509048 , 6.62032992, 6.96662157, 1.93285776,
       8.16753746, 9.58425455, 2.9491882 , 3.28640843, 4.39804621,
       9.24180165, 0.21141819, 1.83348359, 0.09681168, 8.02443489,
       8.40745959, 7.10434875, 5.025082  , 0.92358616, 1.33366902,
       0.5312049 , 8.33329663, 0.85507784, 8.63129014, 0.81913924,
       0.80429481, 9.96492583, 3.02724436, 2.42950548, 7.16685976,
       4.12188674, 9.08177092, 8.55044624, 6.81119178, 8.92278543,
       8.12854049, 9.88253726, 9.04814803, 9.86289899, 6.51057542,
       1.99061482, 7.25183732, 7.87393809, 3.56651921, 7.77534415])

In [56]:
ridgecv = RidgeCV(alphas= np.random.uniform(0, 10, 50), cv=10, normalize=True)
ridgecv.fit(x_train, y_train)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

RidgeCV(alphas=array([5.63799564, 0.30670632, 8.56292802, 6.10773021, 4.1375653 ,
       7.02161414, 8.65318133, 0.10264626, 3.66544615, 8.81677062,
       4.46411238, 7.18539692, 6.5832426 , 0.38416278, 3.44166527,
       9.49734029, 2.18321662, 6.13956342, 9.08906584, 2.18368502,
       2.16192513, 7.20084211, 1.94377282, 6.70033306, 2.23086817,
       7.30252572, 5.65302966, 3.81651992, 2.00784109, 3.2345024 ,
       9.20917149, 7.98362149, 6.72859861, 0.75519554, 2.78579323,
       0.33853258, 8.22305477, 7.83424109, 2.06546957, 3.39776579,
       9.67045661, 8.53630562, 2.95279532, 2.13351494, 2.86595005,
       5.47392654, 5.20155531, 0.89280948, 1.31016884, 2.75208813]),
        cv=10, normalize=True)

In [57]:
ridgecv.alpha_

0.10264625671992711

In [58]:
ridge_lr = Ridge(ridgecv.alpha_)
ridge_lr.fit(x_train, y_train)

Ridge(alpha=0.10264625671992711)

In [59]:
ridge_lr.score(x_test, y_test)*100

82.63193575371903

### iii. ElasticNet (L1 + L2 Norm)

In [60]:
elastic_cv = ElasticNetCV(alphas=None, cv=10)
elastic_cv.fit(x_train, y_train)

  y = column_or_1d(y, warn=True)


ElasticNetCV(cv=10)

In [61]:
elastic_cv.alpha_

0.00032049828688228085

In [62]:
elastic_cv.l1_ratio_

0.5

In [63]:
elastic_lr = ElasticNet(alpha=elastic_cv.alpha_, l1_ratio=elastic_cv.l1_ratio_)
elastic_lr.fit(x_train, y_train)

ElasticNet(alpha=0.00032049828688228085)

In [64]:
elastic_lr.score(x_test, y_test)*100

82.65118379982933