In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegressionCV
X, y = load_iris(return_X_y=True)
clf = LogisticRegressionCV(cv=5, random_state=0,
                           multi_class='multinomial').fit(X, y)
clf.predict(X[:2, :])

clf.predict_proba(X[:2, :]).shape

clf.score(X, y) 

In [None]:
X,y

## Grid Search CV 

In [2]:
# Grid Search for Algorithm Tuning
import numpy as np
from sklearn import datasets
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
# load the diabetes datasets
dataset = datasets.load_diabetes()
# prepare a range of alpha values to test
alphas = np.array([1,0.1,0.01,0.001,0.0001,0,0.005,0.006,0.045])
# create and fit a ridge regression model, testing each alpha
model = Ridge()
grid = GridSearchCV(estimator=model, param_grid=dict(alpha=alphas))
grid.fit(dataset.data, dataset.target)
print(grid)
# summarize the results of the grid search
print(grid.best_score_)

print(grid.best_estimator_.alpha)

GridSearchCV(estimator=Ridge(),
             param_grid={'alpha': array([1.0e+00, 1.0e-01, 1.0e-02, 1.0e-03, 1.0e-04, 0.0e+00, 5.0e-03,
       6.0e-03, 4.5e-02])})
0.4823231384163484
0.0001


### GridSearch Example

In [31]:
from sklearn import svm
from sklearn import datasets
from sklearn.model_selection import GridSearchCV
iris = datasets.load_iris()
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters)
clf.fit(iris.data, iris.target)
print(clf)
# summarize the results of the grid search
print('Best Score:',clf.best_score_)

print('Best Estimator:',clf.best_estimator_)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})
Best Score: 0.9800000000000001
Best Estimator: SVC(C=1, kernel='linear')


In [33]:
from sklearn import svm
from sklearn import datasets
from sklearn.model_selection import GridSearchCV
iris = datasets.load_iris()
parameters = {'kernel':('linear', 'poly', 'rbf', 'sigmoid'), 'C':[0.1,1, 10,100,1000],'gamma':[1,0.1,0.001,0.0001,0.00001,0.000001]}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters)
clf.fit(iris.data, iris.target)
print(clf)
# summarize the results of the grid search
print('Best Score:',clf.best_score_)

print('Best Estimator:',clf.best_estimator_)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.001, 0.0001, 1e-05, 1e-06],
                         'kernel': ('linear', 'poly', 'rbf', 'sigmoid')})
Best Score: 0.9866666666666667
Best Estimator: SVC(C=1000, gamma=0.001, kernel='sigmoid')


## Randomized Search CV

In [4]:
# Randomized Search for Algorithm Tuning
import numpy as np
from scipy.stats import uniform 
from sklearn import datasets
from sklearn.linear_model import Ridge
from sklearn.model_selection import RandomizedSearchCV
# load the diabetes datasets
dataset = datasets.load_diabetes()
# prepare a uniform distribution to sample for the alpha parameter
param_grid = {'alpha': uniform()}
# create and fit a ridge regression model, testing random alpha values
model = Ridge()
rsearch = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10)
rsearch.fit(dataset.data, dataset.target)
print(rsearch)
# summarize the results of the random parameter search
print(rsearch.best_score_)
print(rsearch.best_estimator_.alpha)

RandomizedSearchCV(estimator=Ridge(),
                   param_distributions={'alpha': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000025FC4023460>})
0.48068193427300565
0.06267326831206854


In [3]:
# Randomized Search for Algorithm Tuning
import numpy as np
from scipy.stats import uniform as sp_rand
from sklearn import datasets
from sklearn.linear_model import Ridge
from sklearn.model_selection import RandomizedSearchCV
# load the diabetes datasets
dataset = datasets.load_diabetes()
# prepare a uniform distribution to sample for the alpha parameter
param_grid = {'alpha': sp_rand()}
# create and fit a ridge regression model, testing random alpha values
model = Ridge()
rsearch = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10)
rsearch.fit(dataset.data, dataset.target)
print(rsearch)
# summarize the results of the random parameter search
print(rsearch.best_score_)
print(rsearch.best_estimator_.alpha)

RandomizedSearchCV(cv=None, error_score='raise',
          estimator=Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001),
          fit_params=None, iid=True, n_iter=10, n_jobs=1,
          param_distributions={'alpha': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000000D3A8FC1240>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring=None, verbose=0)
0.4891095321395234
0.04545764086321924


### Randomized Search Example

In [1]:
from sklearn.datasets import load_iris
>>> from sklearn.linear_model import LogisticRegression
>>> from sklearn.model_selection import RandomizedSearchCV
>>> from scipy.stats import uniform
>>> iris = load_iris()
>>> logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,
...                               random_state=0)
>>> distributions = dict(C=uniform(loc=0, scale=4),
...                      penalty=['l2', 'l1'])
>>> clf = RandomizedSearchCV(logistic, distributions, random_state=0,n_iter=15)
>>> clf.fit(iris.data, iris.target)
print(clf)
# summarize the results of the grid search
print('Best Score:',clf.best_score_)

print('Best Estimator:',clf.best_estimator_)
>>> print(clf.best_params_)

RandomizedSearchCV(estimator=LogisticRegression(max_iter=200, random_state=0,
                                                solver='saga', tol=0.01),
                   n_iter=15,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001BE0F36C160>,
                                        'penalty': ['l2', 'l1']},
                   random_state=0)
Best Score: 0.9800000000000001
Best Estimator: LogisticRegression(C=2.195254015709299, max_iter=200, penalty='l1',
                   random_state=0, solver='saga', tol=0.01)
{'C': 2.195254015709299, 'penalty': 'l1'}


# More Examples

### https://github.com/justmarkham/scikit-learn-videos/blob/master/08_grid_search.ipynb

### https://github.com/codebasics/py/blob/master/ML/15_gridsearch/15_grid_search.ipynb

### https://github.com/codebasics/py/blob/master/ML/15_gridsearch/Exercise/15_grid_search_cv_exercise.ipynb

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=0, shuffle=False)
clf = RandomForestClassifier(n_estimators=100, max_depth=8,
                             random_state=0)
clf.fit(X, y)

print(clf.feature_importances_)

print(clf.predict([[0, 0, 0, 0]]))

[0.07929254 0.85576344 0.03174793 0.03319609]
[1]


In [8]:
make_classification

<function sklearn.datasets.samples_generator.make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)>