In [4]:
from datasets import load_diabetes
from tools import *
from sklearn.linear_model import Lasso
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
import plotly.express as px

In [5]:
df_original, df_train, df_test = load_diabetes()

In [6]:
train_X = df_train.drop(['target'], axis=1)
train_y = df_train['target']
test_X = df_test.drop(['target'], axis=1)
test_y = df_test['target']
original_X = df_original.drop(['target'], axis=1)
original_y = df_original['target']
features = train_X.columns

## Alpha selection

In [14]:
n_alphas = 80
alphas = np.logspace(-2, 1, n_alphas)

In [15]:
scores = []
for i in alphas:
    scores.append(cross_val_score(Lasso(alpha=i), original_X, original_y).mean())

result = pd.DataFrame(zip(alphas, scores), columns=['alpha', 'R^2 (mean)'])
px.line(result, x='alpha', y='R^2 (mean)')

In [16]:
coefs = []
for a in alphas:
    lasso = Lasso(alpha=a)
    lasso.fit(train_X, train_y)
    coefs.append(lasso.coef_)

In [17]:
# nejdele se drzi economy, coz dava smysl vzhledem k feature selection
result2 = pd.DataFrame(coefs, columns=features)
result2['alphas'] = alphas
px.line(result2, x='alphas', y=features)

In [18]:
model = Lasso(alpha=0.06).fit(train_X, train_y)

In [19]:
model_performance(model, train_X, train_y, test_X, test_y)

Train score
0.5054127039264911
Test score
0.5220612284011737


## Polynomicke Lasso

In [20]:
train_X_3 = polynomial_features(train_X, 3)
original_X_3 = polynomial_features(original_X, 3)
test_X_3 = polynomial_features(test_X, 3)
train_X_3.shape

(309, 30)

In [21]:
features = train_X.columns

In [23]:
scores = []
for i in alphas:
    scores.append(cross_val_score(Lasso(alpha=i), original_X_3, original_y).mean())

result = pd.DataFrame(zip(alphas, scores), columns=['alpha', 'R^2 (mean)'])
px.line(result, x='alpha', y='R^2 (mean)')

In [24]:
coefs = []
for a in alphas:
    ridge = Lasso(alpha=a)
    ridge.fit(test_X, test_y)
    coefs.append(ridge.coef_)

In [25]:
result2 = pd.DataFrame(coefs, columns=features)
result2['alphas'] = alphas
px.line(result2, x='alphas', y=features)