In [1]:
from sklearn.datasets import make_regression, make_friedman2, fetch_california_housing
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})


In [2]:
X, y = make_regression(n_samples=3000, n_features=800, n_informative=4, noise=1, random_state=42)

In [3]:
lambdas = np.linspace(0.0001, 0.4, 30)

In [4]:
gs = GridSearchCV(Lasso(), {"alpha": lambdas}, cv=10, scoring='neg_mean_squared_error')

In [5]:
gs.fit(X, y)
scores = -gs.cv_results_['mean_test_score']
std_error = -gs.cv_results_['std_test_score']

In [6]:
fig, ax = plt.subplots()
ax.semilogx(lambdas, scores, color='blue')
ax.fill_between(lambdas, scores + std_error, scores - std_error, alpha=0.1)
ax.axvline(gs.best_params_['alpha'], color='r', linestyle='--')
ax.set_xlabel("$\log(\lambda)$")
ax.set_ylabel("$\log(MSE)$")
ax.set_xlim([lambdas[0], lambdas[-1]])
ax.plot()
ws = 1.45 
hs = 1.25
fig.set_size_inches(w=5.73/ws, h=3.5/hs)
plt.tight_layout()
plt.savefig('lasso_cv.pgf', bbox_inches='tight', pad_inches=0)