# Parameter Tuning

In [1]:
import pandas
import numpy
from sklearn import cross_validation
from scipy.stats import uniform
from sklearn.linear_model import Ridge
from sklearn.grid_search import GridSearchCV
from sklearn.grid_search import RandomizedSearchCV



In [2]:
# these examples use the Pima Indian diabetes dataset
url = "pima-indians-diabetes.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values

In [3]:
# separate array into features (X) and label (y) parts
X = array[:,0:8]
y = array[:,8]

## Grid Search Parameter Tuning
Builds and evaluates a model for each combination of algorithm parameters specified in a grid. In this example, a one-dimensional Grid Search is performed to tune the alpha parameter value for the Ridge Regression algorithm.

In [4]:
alphas = numpy.array([1,0.1,0.01,0.001,0.0001,0])
param_grid = dict(alpha=alphas)

model = Ridge()
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid.fit(X, y)

print(grid.best_score_)
print(grid.best_estimator_.alpha)

0.27961755931297233
1.0


In this case, the Grid Search found an alpha value of 1.0 returned the best result.

## Random Search Parameter Tuning
Samples algorithm parameters from a random distribution for a fixed number of iterations. A model is contructed and evaluated for each combination of parameters chosen. In this example, different random values between 0 and 1  are used to tune the alpha parameter value for the Ridge Regression algorithm.

In [5]:
param_grid = {'alpha': uniform()}
seed = 8

model = Ridge()
iterations = 100
rsearch = RandomizedSearchCV(estimator=model, param_distributions=param_grid,
n_iter=iterations, random_state=seed)
rsearch.fit(X, y)

print(rsearch.best_score_)
print(rsearch.best_estimator_.alpha)

0.2796174544841304
0.9946444284805842


An optimal value close to 1.0 was returned.