# Cross-Validation

這邊要提一下，以下介紹的用法跟 GridSearchCV 一樣，但是這邊介紹直接使用的方法，
有更客製化的需求請移駕。 <br>

我們先看 leave-one-out Cross-Validation，有關更詳細的Cross-Validation請移駕。

In [2]:
# Ridge CV
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error

X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=87) 

regression = RidgeCV(alphas=[0.1, 1.0, 2.0, 3.0, 10.0], cv=5)

regression.fit(X_train, y_train)
y_pred = regression.predict(X_test)


print('w 係數：', regression.coef_)
print('w_0 截距：', regression.intercept_)
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))



w 係數： [  10.45038725 -224.93565861  490.77513259  244.23047743  -55.07711743
  -63.80868593 -230.26779614   78.85533454  459.55154482   51.0316344 ]
w_0 截距： 153.28049940624166
Mean squared error: 2835.40


In [3]:
# Ridge Classification CV
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeClassifierCV
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split


X, y = load_iris(return_X_y=True)
#y = LabelBinarizer().fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=87) 

classifier = RidgeClassifierCV(alphas=[0.1, 1.0, 2.0, 3.0, 10.0], cv=5)
classifier.fit(X_train, y_train)

# The Score will Return the mean accuracy on the given test data and labels.
print('Training accuracy: ', classifier.score(X_train, y_train))
print('Testing accuracy: ', classifier.score(X_test, y_test))


Training accuracy:  0.8666666666666667
Testing accuracy:  0.8666666666666667


In [1]:
# Lasso CV
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error

X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=87) 

regression = LassoCV(alphas=[0.1, 1.0, 2.0, 3.0, 10.0], cv=5)

regression.fit(X_train, y_train)
y_pred = regression.predict(X_test)


print('w 係數：', regression.coef_)
print('w_0 截距：', regression.intercept_)
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))


w 係數： [   0.         -181.44098627  537.1813375   209.16300741  -38.73795149
   -0.         -229.73372399    0.          506.55762896    0.        ]
w_0 截距： 153.28164489672588
Mean squared error: 2928.64


In [6]:
# MultiTaskLassoCV
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import MultiTaskLassoCV
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle




X, y1 = load_diabetes(return_X_y=True)
y2 = shuffle(y1, random_state=1)
y3 = shuffle(y1, random_state=2)

y = np.vstack((y1, y2, y3)).T
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=87) 

regression = MultiTaskLassoCV(alphas=[0.1, 1.0, 2.0, 3.0, 10.0], cv=5)

regression.fit(X_train, y_train)
y_pred = regression.predict(X_test)


print('w 係數：', regression.coef_)
print('w_0 截距：', regression.intercept_)
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))



w 係數： [[   0.         -192.42185781  533.66870888  216.69004044  -50.23983911
   -19.53850471 -236.71986476    0.          512.22495343    8.57237899]
 [   0.          131.5338771   -98.48692344  -12.23382425   75.70668859
    49.60017772   41.85166385   -0.          -14.99760369    7.33958332]
 [  -0.           31.71609646  -51.16202318   89.61543053  -44.10199773
    -1.74762899  -97.42160267   -0.           44.52862199   -1.72804783]]
w_0 截距： [153.2758319  151.50961102 153.44141271]
Mean squared error: 4660.86


In [7]:
# MultiTaskElasticNetCV
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import MultiTaskElasticNetCV
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle




X, y1 = load_diabetes(return_X_y=True)
y2 = shuffle(y1, random_state=1)
y3 = shuffle(y1, random_state=2)

y = np.vstack((y1, y2, y3)).T
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=87) 

regression = MultiTaskElasticNetCV(alphas=[0.1, 1.0, 2.0, 3.0, 10.0], cv=5)

regression.fit(X_train, y_train)
y_pred = regression.predict(X_test)


print('w 係數：', regression.coef_)
print('w_0 截距：', regression.intercept_)
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))


w 係數： [[ 11.69709521  -0.20325121  37.9332106   25.23227265  10.05742679
    7.63492093 -25.8588858   25.41068962  35.49092446  20.56736076]
 [  1.25371508   5.40274335  -4.16510184  -0.32451228   5.03241965
    5.43299369   1.64389384   0.89227352  -0.30968029   1.74425426]
 [ -0.61273316   3.5580441    1.10759718   4.65793449  -1.45600068
    0.0846052   -5.97781311   2.70267112   3.18730893   1.70292226]]
w_0 截距： [152.54825598 151.64012658 153.272735  ]
Mean squared error: 5383.04
