In [1]:
from sklearn import datasets
import pandas as pd
# Train and CV-score model for each combination
from sklearn.linear_model import ElasticNet
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from scipy import stats
import matplotlib.pyplot as plt

# Regularisation
Note: Data has already been scaled

In [3]:
X,y = datasets.load_diabetes(return_X_y=True, as_frame=True)
X.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641


In [4]:
y.head()

0    151.0
1     75.0
2    141.0
3    206.0
4    135.0
Name: target, dtype: float64

In [5]:
from sklearn.linear_model import Ridge, Lasso, LinearRegression

linreg = LinearRegression().fit(X, y)
ridge = Ridge(alpha=0.2).fit(X, y)
lasso = Lasso(alpha=0.2).fit(X, y)

coefs = pd.DataFrame({
    "coef_linreg": pd.Series(linreg.coef_, index = X.columns),
    "coef_ridge": pd.Series(ridge.coef_, index = X.columns),
    "coef_lasso": pd.Series(lasso.coef_, index= X.columns)})\

coefs\
    .applymap(lambda x: int(x))\
    .style.applymap(lambda x: 'color: red' if x == 0 else 'color: black')

Unnamed: 0,coef_linreg,coef_ridge,coef_lasso
age,-10,7,0
sex,-239,-182,-75
bmi,519,457,511
bp,324,284,234
s1,-792,-48,0
s2,476,-78,0
s3,101,-189,-170
s4,177,119,0
s5,751,400,450
s6,67,97,0


**Let's check the p-values of our features before regularization**

In [6]:
import statsmodels.api as sm
ols = sm.OLS(y, sm.add_constant(X)).fit()
ols.summary()

0,1,2,3
Dep. Variable:,target,R-squared:,0.518
Model:,OLS,Adj. R-squared:,0.507
Method:,Least Squares,F-statistic:,46.27
Date:,"Tue, 31 May 2022",Prob (F-statistic):,3.8299999999999998e-62
Time:,18:49:44,Log-Likelihood:,-2386.0
No. Observations:,442,AIC:,4794.0
Df Residuals:,431,BIC:,4839.0
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,152.1335,2.576,59.061,0.000,147.071,157.196
age,-10.0122,59.749,-0.168,0.867,-127.448,107.424
sex,-239.8191,61.222,-3.917,0.000,-360.151,-119.488
bmi,519.8398,66.534,7.813,0.000,389.069,650.610
bp,324.3904,65.422,4.958,0.000,195.805,452.976
s1,-792.1842,416.684,-1.901,0.058,-1611.169,26.801
s2,476.7458,339.035,1.406,0.160,-189.621,1143.113
s3,101.0446,212.533,0.475,0.635,-316.685,518.774
s4,177.0642,161.476,1.097,0.273,-140.313,494.442

0,1,2,3
Omnibus:,1.506,Durbin-Watson:,2.029
Prob(Omnibus):,0.471,Jarque-Bera (JB):,1.404
Skew:,0.017,Prob(JB):,0.496
Kurtosis:,2.726,Cond. No.,227.0


**Let's check the p-values of our features before regularization**

In [7]:
linreg = LinearRegression().fit(X, y)
ridge = Ridge(alpha=0.2).fit(X, y)
lasso = Lasso(alpha=0.2).fit(X, y)

coefs = pd.DataFrame({
    "coef_linreg": pd.Series(linreg.coef_, index = X.columns),
    "coef_ridge": pd.Series(ridge.coef_, index = X.columns),
    "coef_lasso": pd.Series(lasso.coef_, index= X.columns),
    "p-values (%)" : pd.Series(ols.pvalues*100, index = X.columns)})\

coefs\
    .applymap(lambda x: int(x))\
    .style.applymap(lambda x: 'color: red' if x == 0 else 'color: black')

Unnamed: 0,coef_linreg,coef_ridge,coef_lasso,p-values (%)
age,-10,7,0,86
sex,-239,-182,-75,0
bmi,519,457,511,0
bp,324,284,234,0
s1,-792,-48,0,5
s2,476,-78,0,16
s3,101,-189,-170,63
s4,177,119,0,27
s5,751,400,450,0
s6,67,97,0,30


In [8]:
ols.pvalues

const    1.009679e-208
age       8.669998e-01
sex       1.041429e-04
bmi       4.299558e-14
bp        1.023819e-06
s1        5.794735e-02
s2        1.603892e-01
s3        6.347207e-01
s4        2.734557e-01
s5        1.556021e-05
s6        3.059983e-01
dtype: float64

# Model Tuning

## Model Tuning the hard way

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.20, random_state=1)

In [11]:
# Select hyperparam values to try

alphas = [0.01, 0.1, 1] # L1 + L2 
l1_ratios = [0.2, 0.5, 0.8] # L1 / L2 ratio

# create all combinations [(0.01, 0.2), (0.01, 0.5), (...)]
import itertools
hyperparams = itertools.product(alphas, l1_ratios) 

In [12]:
for hyperparam in hyperparams:
    alpha = hyperparam[0]
    l1_ratio = hyperparam[1]
    model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    r2 = cross_val_score(model, X_train, y_train, cv=5).mean()
    print(f"alpha: {alpha}, l1_ratio: {l1_ratio},   r2: {r2}")

alpha: 0.01, l1_ratio: 0.2,   r2: 0.30970941644612154
alpha: 0.01, l1_ratio: 0.5,   r2: 0.3655340015692019
alpha: 0.01, l1_ratio: 0.8,   r2: 0.441695946812292
alpha: 0.1, l1_ratio: 0.2,   r2: 0.04607457091738236
alpha: 0.1, l1_ratio: 0.5,   r2: 0.0802909185027088
alpha: 0.1, l1_ratio: 0.8,   r2: 0.17781850644643044
alpha: 1, l1_ratio: 0.2,   r2: -0.02142017085463834
alpha: 1, l1_ratio: 0.5,   r2: -0.019482178997087242
alpha: 1, l1_ratio: 0.8,   r2: -0.011426671381456721


## Model Tuning the easy way- GridSearchCV

In [13]:
from sklearn.model_selection import GridSearchCV

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

# Instanciate model
model = ElasticNet()

# Hyperparameter Grid
grid = {'alpha': [0.01, 0.1, 1], 
        'l1_ratio': [0.2, 0.5, 0.8]}

# Instanciate Grid Search
search = GridSearchCV(model, grid, 
                           scoring = 'r2',
                           cv = 5,
                           n_jobs=-1 # paralellize computation
                          ) 

# Fit data to Grid Search
search.fit(X_train,y_train);

In [14]:
# Best score
search.best_score_

# Best Params
search.best_params_

# Best estimator
search.best_estimator_

ElasticNet(alpha=0.01, l1_ratio=0.8)

## Model Tuning the easy way- RandomSearchCV

In [None]:

# Instanciate model
model = ElasticNet()

# Hyperparameter Grid
grid = {'l1_ratio': stats.uniform(0, 1), 'alpha': [0.001, 0.01, 0.1, 1]}

# Instanciate Grid Search
search = RandomizedSearchCV(model, grid, 
                            scoring='r2',
                            n_iter=100,  # number of draws
                            cv=5, n_jobs=-1)

# Fit data to Grid Search
search.fit(X_train, y_train)
search.best_estimator_

**Choosing hyperparameter probability**

In [None]:
from scipy import stats

dist = stats.norm(10, 2) # if you have a best guess (say: 10)
r = dist.rvs(size=10000) # Random draws
plt.hist(r);


# dist = stats.randint(1,100) # if you have no idea
# dist = stats.uniform(1, 100) # same

# dist = stats.loguniform(0.01, 1) # Coarse grain search

# r = dist.rvs(size=10000) # Random draws
# plt.hist(r);

# Support Vector Machines

In [2]:
from sklearn.svm import SVC
svc = SVC(kernel='linear', C=10)

# equivalent but with SGD solver
from sklearn.linear_model import SGDClassifier
svc_bis = SGDClassifier(loss='hinge', penalty='l2', alpha=1/10)