# Diabetes Example

## Setup

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV, KFold

## Data

In [3]:
from sklearn import datasets
X, y = datasets.load_diabetes(return_X_y=True)

## Lasso Grid Search

In [None]:
p = make_pipeline(
    StandardScaler(),
    Lasso()
)

gs = GridSearchCV(
  p,
  param_grid = {"lasso__alpha": np.logspace(-4, 1, 100)},
  scoring = 'neg_root_mean_squared_error',
  cv = KFold(10, shuffle=True, random_state=12345)
).fit(
  X, y
)

In [None]:
gs.best_params_

In [None]:
gs.best_index_

In [None]:
gs.best_score_

In [None]:
gs.best_estimator_

## Uncertainty

In [None]:
alpha = np.array(gs.cv_results_["param_lasso__alpha"], dtype="float64")
score = -gs.cv_results_["mean_test_score"]
score_std = gs.cv_results_["std_test_score"]
n_folds = gs.cv.get_n_splits()

In [None]:
alpha

In [None]:
score_std

In [None]:
plt.figure(layout="constrained")

ax = sns.lineplot(x=alpha, y=score)
ax.set_xscale("log")

plt.fill_between(
  x = alpha,
  y1 = score + 1.96*score_std / np.sqrt(n_folds),
  y2 = score - 1.96*score_std / np.sqrt(n_folds),
  alpha = 0.2
)

ax.set_xlim(1e-5, 1)
#ax.set_ylim(54.4, 54.6)

plt.show()

## Traceplot

In [None]:
alpha = np.logspace(-4, 2, 100)
betas = []

for a in alpha:
    p = p.set_params(lasso__alpha = a)
    p = p.fit(X, y)
    
    betas.append(p.named_steps["lasso"].coef_)

res = pd.DataFrame(
  data = betas, columns = p[:-1].get_feature_names_out()
).assign(
  alpha = alpha  
)

res

In [None]:
g = sns.relplot(
  data = res.melt(id_vars="alpha", value_name="coef values", var_name="feature"),
  x = "alpha", y = "coef values", hue = "feature",
  kind = "line", aspect=2
)
g.set(xscale="log")
plt.axvline(x = gs.best_params_["lasso__alpha"], color="k", linestyle="--")
plt.show()