# Scikit-learn Comparison

In [1]:
from sklearn_genetic import GASearchCV
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn_genetic.space import Categorical, Continuous
import scipy.stats as stats
from scipy.stats import loguniform
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
data = load_digits() 

In [3]:
label_names = data['target_names'] 
y = data['target']
X = data['data'] 

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [5]:
clf = SGDClassifier(loss='hinge',fit_intercept=True)

### 1. Random Search

In [6]:
param_dist = {'average': [True, False],
              'l1_ratio': stats.uniform(0, 1),
              'alpha': loguniform(1e-4, 1e0)}

In [7]:
n_iter_search = 30
random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=n_iter_search,n_jobs=-1)

In [8]:
random_search.fit(X_train,y_train)

RandomizedSearchCV(estimator=SGDClassifier(), n_iter=30, n_jobs=-1,
                   param_distributions={'alpha': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001A62568BD60>,
                                        'average': [True, False],
                                        'l1_ratio': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001A61065B400>})

In [9]:
accuracy_score(y_test, random_search.predict(X_test))

0.9629629629629629

In [10]:
random_search.best_params_

{'alpha': 0.020380435883006108,
 'average': True,
 'l1_ratio': 0.01937382409973476}

### 2. Grid Search

In [11]:
param_grid = {'average': [True, False],
              'l1_ratio': np.linspace(0, 1, num=10),
              'alpha': np.power(10, np.arange(-4, 1, dtype=float))}

In [12]:
grid_search = GridSearchCV(clf, param_grid=param_grid,n_jobs=-1)

In [13]:
grid_search.fit(X_train,y_train)

GridSearchCV(estimator=SGDClassifier(), n_jobs=-1,
             param_grid={'alpha': array([1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00]),
                         'average': [True, False],
                         'l1_ratio': array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
       0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ])})

In [14]:
accuracy_score(y_test, grid_search.predict(X_test))

0.9528619528619529

In [15]:
grid_search.best_params_

{'alpha': 0.001, 'average': True, 'l1_ratio': 0.4444444444444444}

### 3. Genetic Algorithm

In [16]:
param_grid = {'l1_ratio': Continuous(0,1),
              'alpha': Continuous(1e-4,1),
              'average': Categorical([True, False])}

evolved_estimator = GASearchCV(clf,
                    cv=3,
                    scoring='accuracy',
                    param_grid=param_grid,
                    population_size=10,
                    generations=8,
                    tournament_size=3,
                    elitism=True,
                    verbose=True)

In [17]:
evolved_estimator.fit(X_train,y_train)

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	10    	0.939817	0.00313682 	0.945137   	0.934331   
1  	10    	0.940482	0.00433848 	0.9468     	0.932668   
2  	10    	0.940482	0.00226736 	0.943475   	0.935162   
3  	10    	0.942228	0.00244479 	0.945137   	0.938487   
4  	10    	0.939734	0.00420996 	0.945137   	0.934331   
5  	10    	0.937323	0.00362717 	0.944306   	0.931837   
6  	10    	0.943475	0.00313241 	0.949293   	0.939318   
7  	10    	0.940399	0.0042394  	0.950125   	0.934331   
8  	10    	0.943724	0.00257689 	0.948462   	0.938487   


<sklearn_genetic_opt.GASearchCV at 0x1a625628ee0>

In [18]:
y_predict_ga = evolved_estimator.predict(X_test)

In [19]:
accuracy_score(y_test,y_predict_ga)

0.968013468013468

In [20]:
evolved_estimator.best_params_

{'l1_ratio': 0.9918490625641972, 'alpha': 0.5633014570910942, 'average': False}