# Cross Validation

In [26]:
import numpy as np
from sklearn.model_selection import KFold


In [27]:
data = np.linspace(1,100,10)

In [28]:
data

array([  1.,  12.,  23.,  34.,  45.,  56.,  67.,  78.,  89., 100.])

In [29]:
kfold = KFold(10, True, 1)



In [30]:
i=1
for train, test in kfold.split(data):
    print("Set - ",i)
    print("Train : ", data[train])
    print("Test :",data[test])
    i+=1

Set -  1
Train :  [  1.  12.  34.  45.  56.  67.  78.  89. 100.]
Test : [23.]
Set -  2
Train :  [ 1. 12. 23. 34. 45. 56. 67. 78. 89.]
Test : [100.]
Set -  3
Train :  [  1.  12.  23.  34.  45.  56.  78.  89. 100.]
Test : [67.]
Set -  4
Train :  [  1.  12.  23.  34.  56.  67.  78.  89. 100.]
Test : [45.]
Set -  5
Train :  [ 12.  23.  34.  45.  56.  67.  78.  89. 100.]
Test : [1.]
Set -  6
Train :  [  1.  12.  23.  45.  56.  67.  78.  89. 100.]
Test : [34.]
Set -  7
Train :  [  1.  23.  34.  45.  56.  67.  78.  89. 100.]
Test : [12.]
Set -  8
Train :  [  1.  12.  23.  34.  45.  56.  67.  89. 100.]
Test : [78.]
Set -  9
Train :  [  1.  12.  23.  34.  45.  56.  67.  78. 100.]
Test : [89.]
Set -  10
Train :  [  1.  12.  23.  34.  45.  67.  78.  89. 100.]
Test : [56.]


# GRID Search

In [31]:
from pandas import read_csv
from sklearn.linear_model import Ridge
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV


In [32]:
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/auto-insurance.csv'
dataframe = read_csv(url, header=None)


In [33]:
dataframe.head()

Unnamed: 0,0,1
0,108,392.5
1,19,46.2
2,13,15.7
3,124,422.2
4,40,119.4


In [34]:
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]


In [35]:
# define model
model = Ridge()


In [36]:
# define evaluation
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)


In [37]:
# define search space
space = dict()
space['solver'] = ['svd', 'cholesky', 'lsqr', 'sag']
space['alpha'] = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]
space['fit_intercept'] = [True, False]
space['normalize'] = [True, False]


In [38]:
# define search
search = GridSearchCV(model, space, scoring='r2', n_jobs=-1, cv=cv)


In [39]:
# execute search
result = search.fit(X, y)


In [40]:
# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

Best Score: 0.6080635521495938
Best Hyperparameters: {'alpha': 1, 'fit_intercept': True, 'normalize': False, 'solver': 'sag'}


# Randomized Search 

In [41]:
from scipy.stats import loguniform
from pandas import read_csv
from sklearn.linear_model import Ridge
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import RandomizedSearchCV


In [42]:
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/auto-insurance.csv'
dataframe = read_csv(url, header=None)


In [43]:
dataframe.head()

Unnamed: 0,0,1
0,108,392.5
1,19,46.2
2,13,15.7
3,124,422.2
4,40,119.4


In [44]:
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]


In [45]:
# define model
model = Ridge()


In [46]:
# define evaluation
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)


In [None]:
# define search space
space = dict()
space['solver'] = ['svd', 'cholesky', 'lsqr', 'sag']
space['alpha'] = loguniform(1e-5, 100)
space['fit_intercept'] = [True, False]
space['normalize'] = [True, False]


In [47]:
# define search
search = RandomizedSearchCV(model, space, n_iter=500, scoring='r2', n_jobs=-1, cv=cv, random_state=1)


In [48]:
# execute search
result = search.fit(X, y)




In [49]:
# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

Best Score: 0.6084341670696581
Best Hyperparameters: {'solver': 'sag', 'normalize': False, 'fit_intercept': True, 'alpha': 1e-05}
