In [1]:
from sklearn_genetic import GASearchCV
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
import scipy.stats as stats
from sklearn.utils.fixes import loguniform
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
import numpy as np
import itertools
import warnings
warnings.filterwarnings("ignore")

In [2]:
data = load_digits() 

In [3]:
label_names = data['target_names'] 
y = data['target']
X = data['data'] 

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [6]:
clf = SGDClassifier(loss='hinge',fit_intercept=True)

## 1. Random Search

In [7]:
param_dist = {'average': [True, False],
              'l1_ratio': stats.uniform(0, 1),
              'alpha': loguniform(1e-4, 1e0)}

In [8]:
n_iter_search = 30
random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=n_iter_search,n_jobs=-1)

In [9]:
random_search.fit(X_train,y_train)

RandomizedSearchCV(cv=None, error_score=nan,
                   estimator=SGDClassifier(alpha=0.0001, average=False,
                                           class_weight=None,
                                           early_stopping=False, epsilon=0.1,
                                           eta0=0.0, fit_intercept=True,
                                           l1_ratio=0.15,
                                           learning_rate='optimal',
                                           loss='hinge', max_iter=1000,
                                           n_iter_no_change=5, n_jobs=None,
                                           penalty='l2', power_t=0.5,
                                           random_state=None, shuffle=True,
                                           tol=0.001, validation_frac...
                                           verbose=0, warm_start=False),
                   iid='deprecated', n_iter=30, n_jobs=-1,
                   param_distributions={'alph

In [10]:
accuracy_score(y_test, random_search.predict(X_test))

0.9562289562289562

In [11]:
random_search.best_params_

{'alpha': 0.00210435125867271, 'average': True, 'l1_ratio': 0.5916704284933427}

## 2. Grid Search

In [12]:
param_grid = {'average': [True, False],
              'l1_ratio': np.linspace(0, 1, num=10),
              'alpha': np.power(10, np.arange(-4, 1, dtype=float))}

In [13]:
grid_search = GridSearchCV(clf, param_grid=param_grid,n_jobs=-1)

In [14]:
grid_search.fit(X_train,y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=SGDClassifier(alpha=0.0001, average=False,
                                     class_weight=None, early_stopping=False,
                                     epsilon=0.1, eta0=0.0, fit_intercept=True,
                                     l1_ratio=0.15, learning_rate='optimal',
                                     loss='hinge', max_iter=1000,
                                     n_iter_no_change=5, n_jobs=None,
                                     penalty='l2', power_t=0.5,
                                     random_state=None, shuffle=True, tol=0.001,
                                     validation_fraction=0.1, verbose=0,
                                     warm_start=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'alpha': array([1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00]),
                         'average': [True, False],
                         'l1_ratio': array([0.        , 0.11111111, 0.2222

In [15]:
accuracy_score(y_test, grid_search.predict(X_test))

0.9494949494949495

In [16]:
grid_search.best_params_

{'alpha': 0.01, 'average': True, 'l1_ratio': 0.1111111111111111}

## 3. Genetich Algorithm

In [17]:
evolved_estimator = GASearchCV(clf,
                    cv=3,
                    scoring='accuracy',
                    pop_size=20,
                    generations=8,
                    tournament_size=3,
                    elitism=True,
                    continuous_parameters = {'l1_ratio':(0,1), 'alpha':(1e-4,1)},
                    categorical_parameters = {'average': [True, False]},
                    int_parameters = {},
                    encoding_len=10)

In [18]:
evolved_estimator.fit(X_train,y_train)

n_gen: 0 {'n_chrom': 0, 'params': {'l1_ratio': 0.0625610944, 'alpha': 0.5709129296, 'average': True}, 'fitness': 0.946, 'fitness_std': 0.0082}

n_gen: 1 {'n_chrom': 16, 'params': {'l1_ratio': 0.7751710603, 'alpha': 0.2327258172, 'average': False}, 'fitness': 0.9493, 'fitness_std': 0.0031}

n_gen: 2 {'n_chrom': 6, 'params': {'l1_ratio': 0.9511241383, 'alpha': 0.6070774474, 'average': False}, 'fitness': 0.9476, 'fitness_std': 0.0113}

n_gen: 3 {'n_chrom': 7, 'params': {'l1_ratio': 0.9002932491, 'alpha': 0.6041451892, 'average': False}, 'fitness': 0.9468, 'fitness_std': 0.0024}

n_gen: 4 {'n_chrom': 9, 'params': {'l1_ratio': 0.2971651984, 'alpha': 0.6588806756, 'average': False}, 'fitness': 0.9476, 'fitness_std': 0.0035}

n_gen: 5 {'n_chrom': 1, 'params': {'l1_ratio': 0.7908113339, 'alpha': 0.5142226044, 'average': False}, 'fitness': 0.9501, 'fitness_std': 0.0035}

n_gen: 6 {'n_chrom': 9, 'params': {'l1_ratio': 0.7869012655, 'alpha': 0.6197838996, 'average': False}, 'fitness': 0.9485, 'fi

{'n_chrom': 12,
 'params': {'l1_ratio': 0.78201368, 'alpha': 0.53768067, 'average': False},
 'fitness': 0.9468,
 'fitness_std': 0.0024}

In [19]:
y_predicy_ga = evolved_estimator.predict(X_test)

In [20]:
accuracy_score(y_test,y_predicy_ga)

0.9579124579124579

In [21]:
evolved_estimator.best_params_

{'l1_ratio': 0.78201368, 'alpha': 0.53768067, 'average': False}