In [70]:
import pandas as pd
import numpy as np
import sklearn
import os
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier
from joblib import Memory
from sklearn.preprocessing import StandardScaler
from matplotlib.pyplot import scatter
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [71]:
data = load_diabetes()
x, y = data.data, data.target

In [72]:
X_train, X_test, y_train, y_test = train_test_split(x, y)

In [73]:
scaler = StandardScaler().fit(X_train)
X_test_std = scaler.transform(X_test)
X_train_std = scaler.transform(X_train)

In [74]:
#https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html
kf = KFold(n_splits = 3, shuffle = True, random_state = 1)
for train_index, test_index in kf.split(x):
        print("TRAIN: \n", train_index, "\n", "TEST: \n", test_index)
        X_train, X_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]

TRAIN: 
 [  0   1   2   3   7   8   9  10  12  14  15  16  19  20  21  22  24  25
  26  28  30  32  33  34  35  36  37  38  42  43  44  45  48  49  50  51
  52  53  54  55  56  57  59  60  63  64  68  70  71  72  74  75  76  77
  79  83  84  86  87  88  91  94  96  97  99 100 103 104 105 108 109 110
 111 112 113 114 115 116 118 120 121 123 124 125 126 129 130 131 133 134
 135 136 137 138 140 141 142 143 144 145 147 148 149 150 151 152 153 154
 155 156 157 158 160 161 163 166 167 169 170 172 175 176 177 178 181 182
 183 184 187 188 190 193 194 195 196 198 199 200 201 202 203 204 205 209
 210 211 212 215 216 217 219 220 222 223 225 226 227 229 231 234 235 236
 237 238 239 240 241 243 244 245 247 248 250 252 253 254 255 259 261 262
 263 264 265 266 268 269 270 272 274 276 278 279 280 281 282 287 288 291
 295 296 297 298 300 301 302 303 305 306 308 309 312 313 316 317 319 321
 322 323 325 329 330 333 334 335 336 337 341 342 343 344 345 346 348 349
 350 352 354 355 356 357 358 360 361 362 3

In [75]:
def score_by_fold():
    # lasso instance and parameters
    alphas = np.logspace(-4, -0.5, 30)

    lasso_cv = LassoCV(alphas=alphas, random_state=0, max_iter=10000)
    k_fold = KFold(10)
    for k, (train, test) in enumerate(k_fold.split(x, y)):
        lasso_cv.fit(x[train], y[train])
        print(k, lasso_cv.alpha_, lasso_cv.score(x[test], y[test]))

In [76]:
#https://jakevdp.github.io/PythonDataScienceHandbook/01.07-timing-and-profiling.html
%time score_by_fold()

0 0.07880462815669913 0.5230435282533477
1 0.05968456995122311 0.24481536685622396
2 0.0001 0.35382092959998723
3 0.04520353656360245 0.6050679857248777
4 0.05968456995122311 0.27436767129003514
5 0.0037065129109221566 0.6218191590431197
6 0.0001 0.418309774275902
7 0.03423597957607583 0.43149099777307875
8 0.04520353656360245 0.42585403749603024
9 0.008531678524172814 0.6827010716027995
Wall time: 367 ms


In [77]:
#https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html
def my_cross_val():
    list = [3, 5, 10] #cv = 3 ; cv = 5; cv = 10
    lasso = Lasso()
    
    for n in list:
        print(cross_val_score(lasso, x, y, cv=n))
        print("Max: ", max(cross_val_score(lasso, x, y, cv=n)), "\n")

In [78]:
#https://jakevdp.github.io/PythonDataScienceHandbook/01.07-timing-and-profiling.html
%time my_cross_val()

[0.34542985 0.34712314 0.36884726]
Max:  0.36884725706191346 

[0.28349047 0.35157959 0.3533813  0.33481474 0.36453281]
Max:  0.36453281428665385 

[0.37566102 0.10727689 0.30374655 0.38050755 0.22515853 0.41506179
 0.30380439 0.34876534 0.32262628 0.42874276]
Max:  0.4287427630907267 

Wall time: 59 ms


In [79]:
print("---------------------------------------------------------------------------")

---------------------------------------------------------------------------


In [80]:
print("Dummy regression for R^2 0.0")
print("Dummy regression for RMSE 74.83065148804492")
print("Linear regression for R^2 0.5006299763691243")
print("Linear regression for RMSE 52.576105773603004")
print("LassoCV = 0.40050426939292")
print("RidgeCV = 0.40457524692110")
print("Cross val score cv equal 3 = 0.36884725706191346")
print("Cross val score cv equal 5 = 0.36453281428665385")
print("Cross val score cv equal 10 = 0.4287427630907267")

Dummy regression for R^2 0.0
Dummy regression for RMSE 74.83065148804492
Linear regression for R^2 0.5006299763691243
Linear regression for RMSE 52.576105773603004
LassoCV = 0.40050426939292
RidgeCV = 0.40457524692110
Cross val score cv equal 3 = 0.36884725706191346
Cross val score cv equal 5 = 0.36453281428665385
Cross val score cv equal 10 = 0.4287427630907267
