In [8]:
# exhaustive search

import numpy as np
from sklearn import linear_model, datasets
from sklearn.model_selection import GridSearchCV

iris = datasets.load_iris()
features, target = iris.data, iris.target

logistic = linear_model.LogisticRegression

penalty = ['l1', 'l2']

C = np.logspace(0, 4, 10)

hyperparameters = {'penalty': ['l1', 'l2'], 'C':np.logspace(0, 4, 10)}

gridsearch = GridSearchCV(logistic,
                         hyperparameters,
                         cv=5,
                         verbose=0)

# best_model = gridsearch.fit(features, target)

In [11]:
# randomized search

from scipy.stats import uniform
from sklearn import linear_model, datasets
from sklearn.model_selection import RandomizedSearchCV

penalty = ['l1', 'l2']
C = uniform(loc=0, scale=4)

params = dict(C=C, penalty=penalty)

randomizedsearch = RandomizedSearchCV(
    logistic, params, random_state=1, n_iter=100, cv=5,
    verbose=0, n_jobs=-1)

# best_model = randomizedsearch.fit(features, target)

In [13]:
# selecting from multiple algos

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

np.random.seed(0)

pipe = Pipeline([('classifier', RandomForestClassifier())])

search_space = [{'classifier': [LogisticRegression()],
                'classifier__penalty': ['l1', 'l2'],
                'classifier__C': np.logspace(0, 4, 10)},
               {'classifier': [RandomForestClassifier()],
               'classifier__n_estimators': [10, 100, 1000],
               'classifier__max_features': [1, 2, 3]}]

gridsearch = GridSearchCV(pipe, search_space, cv=5, verbose=0)

best_model = gridsearch.fit(features, target)

ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_i

In [14]:
best_model

GridSearchCV(cv=5, error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('classifier',
                                        RandomForestClassifier(bootstrap=True,
                                                               ccp_alpha=0.0,
                                                               class_weight=None,
                                                               criterion='gini',
                                                               max_depth=None,
                                                               max_features='auto',
                                                               max_leaf_nodes=None,
                                                               max_samples=None,
                                                               min_impurity_decrease=0.0,
                                                               min_impurity_split=None,
                                       

In [15]:
best_model.best_estimator_.get_params()['classifier']

LogisticRegression(C=7.742636826811269, class_weight=None, dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [16]:
best_model.predict(features)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [22]:
# selecting best model when preprocessing

import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

np.random.seed(0)

iris = datasets.load_iris()
features, target = iris.data, iris.target

preprocess = FeatureUnion([('std', StandardScaler()), ('pca', PCA())])

pipe = Pipeline([('preprocess', preprocess),
                ('classifier', LogisticRegression())])

search_space = [{'preprocess__pca__n_components': [1, 2, 3],
                'classifier__penalty': ['l1', 'l2'],
                'classifier__C': np.logspace(0, 4, 10)}]

clf = GridSearchCV(pipe, search_space, cv=5, verbose=0, n_jobs=-1)

best_model = clf.fit(features, target)

In [23]:
best_model

GridSearchCV(cv=5, error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('preprocess',
                                        FeatureUnion(n_jobs=None,
                                                     transformer_list=[('std',
                                                                        StandardScaler(copy=True,
                                                                                       with_mean=True,
                                                                                       with_std=True)),
                                                                       ('pca',
                                                                        PCA(copy=True,
                                                                            iterated_power='auto',
                                                                            n_components=None,
                                                       

In [24]:
best_model.best_estimator_.get_params()['preprocess__pca__n_components']

2

In [25]:
# speeding up with parallelization

from sklearn import linear_model

logistic = linear_model.LogisticRegression()

penalty = ['l1', 'l2']

C = np.logspace(0, 4, 1000)

hyperparameters = dict(C=C, penalty=penalty)

gridsearch = GridSearchCV(logistic, hyperparameters, cv=5, n_jobs=-1, verbose=1)

best_model = gridsearch.fit(features, target)

Fitting 5 folds for each of 2000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-1)]: Done 1232 tasks      | elapsed:   14.7s
[Parallel(n_jobs=-1)]: Done 3012 tasks      | elapsed:   36.1s
[Parallel(n_jobs=-1)]: Done 5168 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 6968 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 9168 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  2.0min finished
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [26]:
best_model

GridSearchCV(cv=5, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': array([1.00000000e+00, 1.009...
       8.39312950e+03, 8.47086827e+03, 8.54932707e+03, 8.62851257e+03,
       8.70843150e+03, 8.78909065e+03, 8.87049689e+03, 8.95265713e+03,
       9.03557835e+03, 9.11926760e+03, 9.20373200e+03, 9.28897872e+03,
       9.37501502e+03, 9.46184819e+03, 9.54948564e+03, 9.63793480e+03,
       9

In [27]:
# speeding up model selection

features, target = iris.data, iris.target

logit = linear_model.LogisticRegressionCV(Cs=100)

logit.fit(features, target)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

LogisticRegressionCV(Cs=100, class_weight=None, cv=None, dual=False,
                     fit_intercept=True, intercept_scaling=1.0, l1_ratios=None,
                     max_iter=100, multi_class='auto', n_jobs=None,
                     penalty='l2', random_state=None, refit=True, scoring=None,
                     solver='lbfgs', tol=0.0001, verbose=0)

In [29]:
# evaluating performance after selection

from sklearn.model_selection import cross_val_score

logistic = linear_model.LogisticRegression()

C = np.logspace(0, 4, 20)

hyperparameters = dict(C=C)

gridsearch = GridSearchCV(logistic, hyperparameters, cv=5, n_jobs=-1,verbose=0)
                        
cross_val_score(gridsearch, features, target).mean()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0.9800000000000001