In [10]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

import warnings
import sklearn.exceptions
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)

In [11]:
df = pd.read_csv('employee.csv')
X = df.drop(['left'], axis=1)
y = df['left']
df

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,department,salary,left
0,0.38,0.53,2,157,3,0,0,sales,low,1
1,0.80,0.86,5,262,6,0,0,sales,medium,1
2,0.11,0.88,7,272,4,0,0,sales,medium,1
3,0.72,0.87,5,223,5,0,0,sales,low,1
4,0.37,0.52,2,159,3,0,0,sales,low,1
...,...,...,...,...,...,...,...,...,...,...
14994,0.40,0.57,2,151,3,0,0,support,low,1
14995,0.37,0.48,2,160,3,0,0,support,low,1
14996,0.37,0.53,2,143,3,0,0,support,low,1
14997,0.11,0.96,6,280,4,0,0,support,low,1


# PERCENT POSITIVE

Finding percentage of positive label in the dataset

In [12]:
percent_pos = ((df['left']== 1).sum() / len(df['left'])) * 100
percent_pos

23.80825388359224

In [13]:
# Transform X values into One Hot Encoding for categorical variables and Standardizing for numerical variables
# https://stackoverflow.com/questions/43798377/one-hot-encode-categorical-variables-and-scale-continuous-ones-simultaneouely

# Get categorical columns
cat = list(X.select_dtypes(['object']).columns)
# Get numerical columns
cont = list(X.select_dtypes(['int64']).columns)

# Scale numerical values
cont_transform = Pipeline(steps=[('scaler', StandardScaler())])
# Encode categorical values
cat_transform = Pipeline(steps=[('categories', OneHotEncoder(sparse=False, handle_unknown='ignore'))])
# Transform the dataset into the scaled and encoded version
preprocessor = ColumnTransformer(transformers=[('cont', cont_transform, cont),
                                               ('cat', cat_transform, cat)])
X = pd.DataFrame(preprocessor.fit_transform(X))
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,-1.462863,-0.882040,-0.341235,-0.411165,-0.147412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1,0.971113,1.220423,1.713436,-0.411165,-0.147412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,2.593763,1.420657,0.343655,-0.411165,-0.147412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
3,0.971113,0.439508,1.028546,-0.411165,-0.147412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
4,-1.462863,-0.841993,-0.341235,-0.411165,-0.147412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14994,-1.462863,-1.002181,-0.341235,-0.411165,-0.147412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
14995,-1.462863,-0.821970,-0.341235,-0.411165,-0.147412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
14996,-1.462863,-1.162368,-0.341235,-0.411165,-0.147412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
14997,1.782438,1.580845,0.343655,-0.411165,-0.147412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0


# LOGISTIC REGRESSION

In [1]:
def logisticRegression(X_train, X_test, y_train, y_test):
    train_metrics_log = pd.DataFrame(columns=['LG: Accuracy','LG: Precision','LG: AUC','LG: F1'])
    test_metrics_log = pd.DataFrame(columns=['LG: Accuracy','LG: Precision','LG: AUC','LG: F1'])
    
    pipe = Pipeline(steps=[('classifier', LogisticRegression())])

    # Setting Parameters to L1 and L2 regularized + unregularized model
    parameters = [{'classifier': [LogisticRegression(max_iter=5000, n_jobs=-1, class_weight='balanced')],
                   'classifier__solver': ['saga'],
                   'classifier__penalty': ['l1'],
                   'classifier__C': np.logspace(-8,4,13)},
                  {'classifier': [LogisticRegression(max_iter=5000, n_jobs=-1, class_weight='balanced')],
                   'classifier__solver': ['sag', 'saga'],
                   'classifier__penalty': ['none']},
                  {'classifier': [LogisticRegression(max_iter=5000, n_jobs=-1, class_weight='balanced')],
                   'classifier__solver': ['sag', 'saga'],
                   'classifier__penalty': ['l2'],
                   'classifier__C': np.logspace(-8,4,13)}]

    # Perform 5-fold cross-validation using grid search
    clf = GridSearchCV(pipe, parameters, cv=StratifiedKFold(n_splits=5), 
                       scoring=['accuracy', 'precision', 'roc_auc', 'f1'], refit=False, verbose=1)

    # Fitting training set on the cross validation
    hyperparams = clf.fit(X_train, y_train)
    # Storing best parameters for each metric model
    results = hyperparams.cv_results_['params']
    solution_log = pd.DataFrame(results)

    # ACCURACY MODEL
    solution_log['Accuracy'] = hyperparams.cv_results_['mean_test_accuracy']
    best_accuracy = results[np.argmin(hyperparams.cv_results_['rank_test_accuracy'])]
    # creating new model with optimal hyperparameters
    if 'classifier__C' in best_accuracy:
        accuracy_model = LogisticRegression(penalty = best_accuracy['classifier__penalty'],
                                            C = best_accuracy['classifier__C'],
                                            solver = best_accuracy['classifier__solver'],
                                            max_iter = 5000,
                                            n_jobs = -1,
                                            class_weight='balanced')
    else:
        accuracy_model = LogisticRegression(penalty = best_accuracy['classifier__penalty'],
                                            solver = best_accuracy['classifier__solver'],
                                            max_iter = 5000,
                                            n_jobs = -1,
                                            class_weight='balanced')
    # Training on the new model
    accuracy_model.fit(X_train, y_train)
    y_acc_train = accuracy_model.predict(X_train)
    acc_train_score = accuracy_score(y_train, y_acc_train)
    
    # Testing and scoring the model
    y_acc_test = accuracy_model.predict(X_test)
    acc_test_score = accuracy_score(y_test, y_acc_test)

    # PRECISION MODEL
    solution_log['Precision'] = hyperparams.cv_results_['mean_test_precision']
    best_precision = results[np.argmin(hyperparams.cv_results_['rank_test_precision'])]
    
    # creating new model with optimal hyperparameters
    if 'classifier__C' in best_precision:
        precision_model = LogisticRegression(penalty = best_precision['classifier__penalty'],
                                             C = best_precision['classifier__C'],
                                             solver = best_precision['classifier__solver'],
                                             max_iter = 5000,
                                             n_jobs = -1,
                                             class_weight='balanced')
    else:
        precision_model = LogisticRegression(penalty = best_precision['classifier__penalty'],
                                             solver = best_precision['classifier__solver'],
                                             max_iter = 5000,
                                             n_jobs = -1,
                                             class_weight='balanced')
    # Training on the new model
    precision_model.fit(X_train, y_train)
    y_prec_train = precision_model.predict(X_train)
    prec_train_score = precision_score(y_train, y_prec_train)

    # Testing and scoring the model
    y_prec_test = precision_model.predict(X_test)
    prec_test_score = precision_score(y_test, y_prec_test)

    # ROC AUC MODEL
    solution_log['ROC AUC'] = hyperparams.cv_results_['mean_test_roc_auc']
    best_roc_auc = results[np.argmin(hyperparams.cv_results_['rank_test_roc_auc'])]
    
    # creating new model with optimal hyperparameters
    if 'classifier__C' in best_roc_auc:
        roc_model = LogisticRegression(penalty = best_roc_auc['classifier__penalty'],
                                       C = best_roc_auc['classifier__C'],
                                       solver = best_roc_auc['classifier__solver'],
                                       max_iter = 5000,
                                       n_jobs = -1,
                                       class_weight='balanced')
    else:
        roc_model = LogisticRegression(penalty = best_roc_auc['classifier__penalty'],
                                       solver = best_roc_auc['classifier__solver'],
                                       max_iter = 5000,
                                       n_jobs = -1,
                                       class_weight='balanced')
    # Training on the new model
    roc_model.fit(X_train, y_train)
    y_roc_train = roc_model.predict(X_train)
    roc_train_score = roc_auc_score(y_train, y_roc_train)
    
    # Testing and scoring the model
    y_roc_test = roc_model.predict(X_test)
    roc_test_score = roc_auc_score(y_test, y_roc_test)

    # F1 MODEL
    solution_log['F1'] = hyperparams.cv_results_['mean_test_f1']
    best_f1 = results[np.argmin(hyperparams.cv_results_['rank_test_f1'])]
    
    
    # creating new model with optimal hyperparameters
    if 'classifier__C' in best_f1:
        f1_model = LogisticRegression(penalty = best_f1['classifier__penalty'],
                                      C = best_f1['classifier__C'],
                                      solver = best_f1['classifier__solver'],
                                      max_iter = 5000,
                                      n_jobs = -1,
                                      class_weight='balanced')
    else:
        f1_model = LogisticRegression(penalty = best_f1['classifier__penalty'],
                                      solver = best_f1['classifier__solver'],
                                      max_iter = 5000,
                                      n_jobs = -1,
                                      class_weight='balanced')
    
    # Training on the new model
    f1_model.fit(X_train, y_train)
    y_f1_train = f1_model.predict(X_train)
    f1_train_score = f1_score(y_train, y_f1_train)
    
    # Testing and scoring the model
    y_f1_test = f1_model.predict(X_test)
    f1_test_score = f1_score(y_test, y_f1_test)

    train_metrics_log = train_metrics_log.append({'LG: Accuracy': acc_train_score, 'LG: Precision': prec_train_score, 
                                          'LG: AUC': roc_train_score, 'LG: F1': f1_train_score}, ignore_index=True)

    test_metrics_log = test_metrics_log.append({'LG: Accuracy': acc_test_score, 'LG: Precision': prec_test_score, 
                                          'LG: AUC': roc_test_score, 'LG: F1': f1_test_score}, ignore_index=True)
    
    return train_metrics_log, test_metrics_log, solution_log

# RANDOM FOREST CLASSIFIER

In [15]:
 def randomForest(X_train, X_test, y_train, y_test):
    train_metrics_rf = pd.DataFrame(columns=['RF: Accuracy','RF: Precision','RF: AUC','RF: F1'])
    test_metrics_rf = pd.DataFrame(columns=['RF: Accuracy','RF: Precision','RF: AUC','RF: F1'])

    randomForest = RandomForestClassifier()

    # Setting parameters according to CNM06
    param_grid = {
        'n_estimators': [1024],
        'criterion': ['gini', 'entropy'],
        'max_features': [1,2,4,6,8,12,16],
        'n_jobs': [-1],
        'class_weight': ['balanced']}

    # Perform 5-fold cross-validation using grid search
    clf = GridSearchCV(estimator=randomForest, param_grid=param_grid, cv=StratifiedKFold(n_splits=5), 
                       scoring=['accuracy', 'precision', 'roc_auc', 'f1'], refit=False, verbose=1)

    # Fitting training set on the cross validation
    hyperparams = clf.fit(X_train, y_train)
    # Storing best parameters for each metric model
    results = hyperparams.cv_results_['params']
    solution_rf = pd.DataFrame(results)

    # ACCURACY MODEL
    solution_rf['Accuracy'] = hyperparams.cv_results_['mean_test_accuracy']
    best_accuracy = results[np.argmin(hyperparams.cv_results_['rank_test_accuracy'])]
    # creating new model with optimal hyperparameters
    accuracy_model = RandomForestClassifier(n_estimators = best_accuracy['n_estimators'],
                                            criterion = best_accuracy['criterion'],
                                            max_features = best_accuracy['max_features'],
                                            n_jobs = -1,
                                            class_weight='balanced')
    # Training on the new model
    accuracy_model.fit(X_train, y_train)
    y_acc_train = accuracy_model.predict(X_train)
    acc_train_score = accuracy_score(y_train, y_acc_train)

    # Testing and scoring the model
    y_acc_test = accuracy_model.predict(X_test)
    acc_test_score = accuracy_score(y_test, y_acc_test)

    # PRECISION MODEL
    solution_rf['Precision'] = hyperparams.cv_results_['mean_test_precision']
    best_precision = results[np.argmin(hyperparams.cv_results_['rank_test_precision'])]
    # creating new model with optimal hyperparameters
    precision_model = RandomForestClassifier(n_estimators = best_precision['n_estimators'],
                                             criterion = best_precision['criterion'],
                                             max_features = best_precision['max_features'],
                                             n_jobs = -1,
                                             class_weight='balanced')
    # Training on the new model
    precision_model.fit(X_train, y_train)
    y_prec_train = precision_model.predict(X_train)
    prec_train_score = precision_score(y_train, y_prec_train)

    # Testing and scoring the model
    y_prec_test = precision_model.predict(X_test)
    prec_test_score = precision_score(y_test, y_prec_test)

    # ROC AUC MODEL
    solution_rf['ROC AUC'] = hyperparams.cv_results_['mean_test_roc_auc']
    best_roc_auc = results[np.argmin(hyperparams.cv_results_['rank_test_roc_auc'])]
    # creating new model with optimal hyperparameters
    roc_model = RandomForestClassifier(n_estimators = best_roc_auc['n_estimators'],
                                       criterion = best_roc_auc['criterion'],
                                       max_features = best_roc_auc['max_features'],
                                       n_jobs = -1,
                                       class_weight='balanced')
    # Training on the new model
    roc_model.fit(X_train, y_train)
    y_roc_train = roc_model.predict(X_train)
    roc_train_score = roc_auc_score(y_train, y_roc_train)

    # Testing and scoring the model
    y_roc_test = roc_model.predict(X_test)
    roc_test_score = roc_auc_score(y_test, y_roc_test)

    # F1 MODEL
    solution_rf['F1'] = hyperparams.cv_results_['mean_test_f1']
    best_f1 = results[np.argmin(hyperparams.cv_results_['rank_test_f1'])]
    # creating new model with optimal hyperparameters
    f1_model = RandomForestClassifier(n_estimators = best_f1['n_estimators'],
                                      criterion = best_f1['criterion'],
                                      max_features = best_f1['max_features'],
                                      n_jobs = -1,
                                      class_weight='balanced')
    # Training on the new model
    f1_model.fit(X_train, y_train)
    y_f1_train = f1_model.predict(X_train)
    f1_train_score = f1_score(y_train, y_f1_train)

    # Testing and scoring the model
    y_f1_test = f1_model.predict(X_test)
    f1_test_score = f1_score(y_test, y_f1_test)

    train_metrics_rf = train_metrics_rf.append({'RF: Accuracy': acc_train_score, 'RF: Precision': prec_train_score,
                                                'RF: AUC': roc_train_score, 'RF: F1': f1_train_score}, ignore_index=True)

    test_metrics_rf = test_metrics_rf.append({'RF: Accuracy': acc_test_score, 'RF: Precision': prec_test_score,
                                              'RF: AUC': roc_test_score, 'RF: F1': f1_test_score}, ignore_index=True)
    
    return train_metrics_rf, test_metrics_rf, solution_rf

# DECISION TREE CLASSIFIER

In [16]:
def decisionTrees(X_train, X_test, y_train, y_test):
    train_metrics_dt = pd.DataFrame(columns=['DT: Accuracy','DT: Precision','DT: AUC','DT: F1'])
    test_metrics_dt = pd.DataFrame(columns=['DT: Accuracy','DT: Precision','DT: AUC','DT: F1'])

    pipe = Pipeline(steps=[('classifier', DecisionTreeClassifier())])

    # Setting parameters according to CNM06 + passing a list of min_samples_leaf
    parameters = [{'classifier': [DecisionTreeClassifier(class_weight='balanced')],
                   'classifier__criterion': ['gini', 'entropy'],
                   'classifier__splitter': ['best'],
                   'classifier__min_samples_leaf': [1,2,4,6,8,10,12,14,16,18]}]

    # Perform 5-fold cross-validation using grid search
    clf = GridSearchCV(estimator=pipe, param_grid=parameters, cv=StratifiedKFold(n_splits=5), 
                       scoring=['accuracy', 'precision', 'roc_auc', 'f1'], refit=False, verbose=1)
    
    # Fitting training set on the cross validation
    hyperparams = clf.fit(X_train, y_train)
    # Storing best parameters for each metric model
    results = hyperparams.cv_results_['params']
    solution_dt = pd.DataFrame(results)

    # ACCURACY
    solution_dt['Accuracy'] = hyperparams.cv_results_['mean_test_accuracy']
    best_accuracy = results[np.argmin(hyperparams.cv_results_['rank_test_accuracy'])]
    # creating new model with optimal hyperparameters
    accuracy_model = DecisionTreeClassifier(criterion = best_accuracy['classifier__criterion'],
                                            splitter = best_accuracy['classifier__splitter'],
                                            min_samples_leaf = best_accuracy['classifier__min_samples_leaf'],
                                            class_weight = 'balanced')

    # Training on the new model
    accuracy_model.fit(X_train, y_train)
    y_acc_train = accuracy_model.predict(X_train)
    acc_train_score = accuracy_score(y_train, y_acc_train)

    # Testing and scoring the model
    y_acc_test = accuracy_model.predict(X_test)
    acc_test_score = accuracy_score(y_test, y_acc_test)

    # PRECISION
    solution_dt['Precision'] = hyperparams.cv_results_['mean_test_precision']
    best_precision = results[np.argmin(hyperparams.cv_results_['rank_test_precision'])]
    # creating new model with optimal hyperparameters
    precision_model = DecisionTreeClassifier(criterion = best_precision['classifier__criterion'],
                                             splitter = best_precision['classifier__splitter'],
                                             min_samples_leaf = best_precision['classifier__min_samples_leaf'],
                                             class_weight = 'balanced')
    # Training on the new model
    precision_model.fit(X_train, y_train)
    y_prec_train = precision_model.predict(X_train)
    prec_train_score = precision_score(y_train, y_prec_train)

    # Testing and scoring the model
    y_prec_test = precision_model.predict(X_test)
    prec_test_score = precision_score(y_test, y_prec_test)

    # ROC AUC
    solution_dt['ROC AUC'] = hyperparams.cv_results_['mean_test_roc_auc']
    best_roc_auc = results[np.argmin(hyperparams.cv_results_['rank_test_roc_auc'])]
    # creating new model with optimal hyperparameters
    roc_model = DecisionTreeClassifier(criterion = best_roc_auc['classifier__criterion'],
                                       splitter = best_roc_auc['classifier__splitter'],
                                       min_samples_leaf = best_roc_auc['classifier__min_samples_leaf'],
                                       class_weight = 'balanced')
    # Training on the new model
    roc_model.fit(X_train, y_train)
    y_roc_train = roc_model.predict(X_train)
    roc_train_score = roc_auc_score(y_train, y_roc_train)

    # Testing and scoring the model
    y_roc_test = roc_model.predict(X_test)
    roc_test_score = roc_auc_score(y_test, y_roc_test)

    # F1
    solution_dt['F1'] = hyperparams.cv_results_['mean_test_f1']
    best_f1 = results[np.argmin(hyperparams.cv_results_['rank_test_f1'])]
    # creating new model with optimal hyperparameters
    f1_model = DecisionTreeClassifier(criterion = best_f1['classifier__criterion'],
                                      splitter = best_f1['classifier__splitter'],
                                      min_samples_leaf = best_f1['classifier__min_samples_leaf'],
                                      class_weight = 'balanced')
    # Training on the new model
    f1_model.fit(X_train, y_train)
    y_f1_train = f1_model.predict(X_train)
    f1_train_score = f1_score(y_train, y_f1_train)

    # Testing and scoring the model
    y_f1_test = f1_model.predict(X_test)
    f1_test_score = f1_score(y_test, y_f1_test)

    train_metrics_dt = train_metrics_dt.append({'DT: Accuracy': acc_train_score, 'DT: Precision': prec_train_score,
                                                'DT: AUC': roc_train_score, 'DT: F1': f1_train_score}, ignore_index=True)

    test_metrics_dt = test_metrics_dt.append({'DT: Accuracy': acc_test_score, 'DT: Precision': prec_test_score,
                                              'DT: AUC': roc_test_score, 'DT: F1': f1_test_score}, ignore_index=True)
    
    return train_metrics_dt, test_metrics_dt, solution_dt

In [17]:
trials = 5
train_metrics = pd.DataFrame()
test_metrics = pd.DataFrame()
solution_metrics = pd.DataFrame()

# Running the trial five times
for i in range(trials):
    # Splitting data into train size = 5000
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=5000, shuffle=True)
    
    train_log, test_log, solution_log = logisticRegression(X_train, X_test, y_train, y_test)
    train_rf, test_rf, solution_rf = randomForest(X_train, X_test, y_train, y_test)
    train_dt, test_dt, solution_dt = decisionTrees(X_train, X_test, y_train, y_test)

    train_metrics = train_metrics.append(pd.concat([train_log, train_rf, train_dt], axis=1))
    test_metrics = test_metrics.append(pd.concat([test_log, test_rf, test_dt], axis=1))
    solution_metrics = solution_metrics.append(pd.concat([solution_log, solution_rf, solution_dt], axis=1))

# storing data into CSV file
train_metrics.to_csv('employee_train.csv')
test_metrics.to_csv('employee_test.csv')
solution_metrics.to_csv('employee_solution.csv')

Fitting 5 folds for each of 41 candidates, totalling 205 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 205 out of 205 | elapsed:   58.9s finished


Fitting 5 folds for each of 14 candidates, totalling 70 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  70 out of  70 | elapsed: 10.1min finished


Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    9.6s finished


Fitting 5 folds for each of 41 candidates, totalling 205 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 205 out of 205 | elapsed:   56.8s finished


Fitting 5 folds for each of 14 candidates, totalling 70 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  70 out of  70 | elapsed: 12.2min finished


Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   11.5s finished


Fitting 5 folds for each of 41 candidates, totalling 205 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 205 out of 205 | elapsed:   59.2s finished


Fitting 5 folds for each of 14 candidates, totalling 70 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  70 out of  70 | elapsed: 13.2min finished


Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    7.2s finished


Fitting 5 folds for each of 41 candidates, totalling 205 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 205 out of 205 | elapsed:   47.7s finished


Fitting 5 folds for each of 14 candidates, totalling 70 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  70 out of  70 | elapsed: 14.6min finished


Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   11.7s finished


Fitting 5 folds for each of 41 candidates, totalling 205 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 205 out of 205 | elapsed:  1.4min finished


Fitting 5 folds for each of 14 candidates, totalling 70 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  70 out of  70 | elapsed:  9.5min finished


Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    7.5s finished
