# Speeding Up Model Selection with Parallelization

You can speed up model selection using all cores in your machine. n_jobs = -1

In [31]:
# Load libraries

import numpy as np
from sklearn import linear_model, datasets
from sklearn.model_selection import GridSearchCV

In [32]:
# Load data

iris = datasets.load_iris()

# Create Feature matrix and target vector

features = iris.data
target = iris.target

In [33]:
# Create Logistic Regression as Learning Algorithm

logistic = linear_model.LogisticRegression(max_iter=1000, solver = "liblinear")
logistic

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [34]:
# Create range of candidate penalty hyperparameter values.
# Two Possible values for regularization penalty.

penalty = ["l1", "l2"]

In [35]:
# Create Range of candidate regularization hyperparameter values 
# We define 1000 possible values of C.

C = np.logspace(0,4,1000)

In [36]:
# Create dictionary hyperparameter candidates. 2 Hyperparameters == C & Penalty

hyperparameters = dict(C=C, penalty = penalty)

### Selecting best model

For each combination of C and regularization penalty values we train a model that evaluate it using K Fold Cross Validation.

1000 possible values of C * 2 penalty values * 5 folds = 10000 candidates models from which the best was selected. 

In [37]:
# Create GRIDSEARCH

#N_jobs = 1 will execute the code with 1 core

gridsearch = GridSearchCV(logistic, hyperparameters, cv=5,n_jobs = -1, verbose = 1) # FOLDS = 5 ; VERBOSE determines the message printed by the processing. [0,3]


In [38]:
# Fit grid search

best_model = gridsearch.fit(features,target)
best_model

Fitting 5 folds for each of 2000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 4984 tasks      | elapsed:   11.1s
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:   25.6s finished


GridSearchCV(cv=5, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=1000, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='liblinear',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': array([1.00000000e+00,...
       8.39312950e+03, 8.47086827e+03, 8.54932707e+03, 8.62851257e+03,
       8.70843150e+03, 8.78909065e+03, 8.87049689e+03, 8.95265713e+03,
       9.03557835e+03, 9.11926760e+03, 9.20373200e+03, 9.28897872e+03,
       9.37501502e+03, 9.46184819e+03, 9.54948564e+03, 9.63793480e+03,
       9.

# Speeding Up Model Selection Using Algorithm Specific Methods


In [41]:
logistic

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

### Logistic Regression Cross Validated 

Has the ability to use Cross Validation to identify and use the best hyperparameter C. Range of reasonable values (0.0001 to 1.0 )

In [44]:
logisticCV = linear_model.LogisticRegressionCV()
logisticCV

LogisticRegressionCV(Cs=10, class_weight=None, cv=None, dual=False,
                     fit_intercept=True, intercept_scaling=1.0, l1_ratios=None,
                     max_iter=100, multi_class='auto', n_jobs=None,
                     penalty='l2', random_state=None, refit=True, scoring=None,
                     solver='lbfgs', tol=0.0001, verbose=0)