# Iris Multi-metric

In [1]:
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Categorical, Integer, Continuous
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.metrics import make_scorer
from sklearn.metrics import balanced_accuracy_score

### Import the data and split it in train and test sets

In [2]:
data = load_iris()
X, y = data["data"], data["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

### Define the GASearchCV options and Multi-metric


In [3]:
clf = DecisionTreeClassifier()

params_grid = {
    "min_weight_fraction_leaf": Continuous(0, 0.5),
    "criterion": Categorical(["gini", "entropy"]),
    "max_depth": Integer(2, 20),
    "max_leaf_nodes": Integer(2, 30),
}

scoring = {"accuracy": "accuracy",
           "balanced_accuracy": make_scorer(balanced_accuracy_score)}

### Define the GASearchCV options

In [4]:
# Low number of generations and population
# Just to see the effect of multimetric
# In logbook and cv_results_

evolved_estimator = GASearchCV(
    clf,
    scoring=scoring,
    population_size=3,
    generations=2,
    crossover_probability=0.9,
    mutation_probability=0.05,
    param_grid=params_grid,
    algorithm="eaSimple",
    n_jobs=-1,
    verbose=True,
    error_score='raise',
    refit="accuracy")

### Fit the model and see some results

In [5]:
evolved_estimator.fit(X_train, y_train)
y_predict_ga = evolved_estimator.predict(X_test)

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	3     	0.856902	0.117921   	0.940285   	0.690137   
1  	2     	0.940285	0          	0.940285   	0.940285   
2  	2     	0.940285	0          	0.940285   	0.940285   


In [6]:
evolved_estimator.cv_results_


{'param_min_weight_fraction_leaf': [0.22963955365985156,
  0.11807874354582698,
  0.4566955700628974,
  0.11807874354582698,
  0.22963955365985156,
  0.22963955365985156,
  0.22963955365985156],
 'param_criterion': ['gini',
  'entropy',
  'entropy',
  'gini',
  'entropy',
  'entropy',
  'gini'],
 'param_max_depth': [2, 9, 10, 2, 9, 2, 9],
 'param_max_leaf_nodes': [13, 7, 3, 7, 13, 13, 13],
 'split0_test_accuracy': [0.9117647058823529,
  0.9117647058823529,
  0.6764705882352942,
  0.9117647058823529,
  0.9117647058823529,
  0.9117647058823529,
  0.9117647058823529],
 'split1_test_accuracy': [0.9696969696969697,
  0.9696969696969697,
  0.696969696969697,
  0.9696969696969697,
  0.9696969696969697,
  0.9696969696969697,
  0.9696969696969697],
 'split2_test_accuracy': [0.9393939393939394,
  0.9393939393939394,
  0.696969696969697,
  0.9393939393939394,
  0.9393939393939394,
  0.9393939393939394,
  0.9393939393939394],
 'mean_test_accuracy': [0.9402852049910874,
  0.9402852049910874,
  0.69

In [7]:
evolved_estimator.logbook.chapters["parameters"]



[{'index': 0,
  'min_weight_fraction_leaf': 0.22963955365985156,
  'criterion': 'gini',
  'max_depth': 2,
  'max_leaf_nodes': 13,
  'score': 0.9402852049910874,
  'cv_scores': array([0.91176471, 0.96969697, 0.93939394]),
  'fit_time': array([0.00199986, 0.00199795, 0.00199938]),
  'score_time': array([0.00197005, 0.00199533, 0.00199914]),
  'test_accuracy': array([0.91176471, 0.96969697, 0.93939394]),
  'train_accuracy': array([0.96969697, 0.95522388, 0.97014925]),
  'test_balanced_accuracy': array([0.90909091, 0.97222222, 0.93333333]),
  'train_balanced_accuracy': array([0.97222222, 0.95514148, 0.97101449])},
 {'index': 1,
  'min_weight_fraction_leaf': 0.11807874354582698,
  'criterion': 'entropy',
  'max_depth': 9,
  'max_leaf_nodes': 7,
  'score': 0.9402852049910874,
  'cv_scores': array([0.91176471, 0.96969697, 0.93939394]),
  'fit_time': array([0.00200057, 0.0019865 , 0.00097251]),
  'score_time': array([0.00200391, 0.00196981, 0.00400376]),
  'test_accuracy': array([0.91176471, 0