# Algoritmos genéticos para otimização de hiper parâmetros

In [1]:

import random
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from scipy import stats
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Continuous, Categorical, Integer


## Dataset

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("diabetes.csv")

## Dataset pré-processamento

In [3]:
limit = 3

z_scores = stats.zscore(df['Pregnancies'])
outliers_p = df['Pregnancies'][abs(z_scores) > limit]
print(outliers_p)

z_scores = stats.zscore(df['Glucose'])
outliers_g = df['Glucose'][abs(z_scores) > limit]
print(outliers_g)

z_scores = stats.zscore(df['BloodPressure'])
outliers_b = df['BloodPressure'][abs(z_scores) > limit]
print(outliers_b)

# body mass index (18.5 - 24.9 [Obesity > 30])
z_scores = stats.zscore(df['BMI'])
outliers_bmi = df['BMI'][abs(z_scores) > limit]
print(outliers_bmi)

# Probability of diabetes based on family history
z_scores = stats.zscore(df['DiabetesPedigreeFunction'])
outliers_dpf = df['DiabetesPedigreeFunction'][abs(z_scores) > limit]
print(outliers_dpf)

z_scores = stats.zscore(df['Age'])
outliers_a = df['Age'][abs(z_scores) > limit]
print(outliers_a)

unique_outliers = list(set(outliers_g.index) | set(outliers_b.index) | set([9,49,60,81,145,371,426,494,522,684,706]))
print(unique_outliers)
df.drop(unique_outliers, inplace=True)

X = df.drop("Outcome", axis=1)
y = df["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_scaled = scaler.transform(X)

88     15
159    17
298    14
455    14
Name: Pregnancies, dtype: int64
75     0
182    0
342    0
349    0
502    0
Name: Glucose, dtype: int64
7      0
15     0
49     0
60     0
78     0
81     0
172    0
193    0
222    0
261    0
266    0
269    0
300    0
332    0
336    0
347    0
357    0
426    0
430    0
435    0
453    0
468    0
484    0
494    0
522    0
533    0
535    0
589    0
601    0
604    0
619    0
643    0
697    0
703    0
706    0
Name: BloodPressure, dtype: int64
9       0.0
49      0.0
60      0.0
81      0.0
145     0.0
177    67.1
371     0.0
426     0.0
445    59.4
494     0.0
522     0.0
673    57.3
684     0.0
706     0.0
Name: BMI, dtype: float64
4      2.288
45     1.893
58     1.781
228    2.329
330    1.476
370    2.137
371    1.731
395    1.600
445    2.420
593    1.699
621    1.698
Name: DiabetesPedigreeFunction, dtype: float64
123    69
453    72
459    81
666    70
684    69
Name: Age, dtype: int64
[643, 261, 7, 9, 266, 522, 269, 15, 145, 533, 53

## Aplicação algoritmo genético

In [4]:
# Defines the possible values to search
param_grid = {'penalty': Categorical(['l1', 'l2']),
              'solver': Categorical(['liblinear', 'saga'])}


evolved_estimator = GASearchCV(estimator=LogisticRegression(),
                               cv=None,
                               scoring="f1",
                               population_size=100,
                               generations=1000,
                               crossover_probability=0.2,
                               mutation_probability=0.7,
                               param_grid=param_grid)

evolved_estimator.fit(X_scaled, y)

print(evolved_estimator.best_params_)
print(evolved_estimator.best_score_)

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	100   	0.630391	0.0017027  	0.632945   	0.62805    
1  	171   	0.631433	0.00141084 	0.632945   	0.62805    
2  	183   	0.63244 	0.000921122	0.632945   	0.629498   
3  	180   	0.632886	0.000338515	0.632945   	0.630961   
4  	182   	0.632906	0.000277817	0.632945   	0.630961   
5  	183   	0.632837	0.00058824 	0.632945   	0.62805    
6  	179   	0.632906	0.000277817	0.632945   	0.630961   
7  	176   	0.632945	1.44329e-15	0.632945   	0.632945   
8  	172   	0.632945	1.44329e-15	0.632945   	0.632945   
9  	179   	0.632925	0.000197446	0.632945   	0.630961   
10 	182   	0.632876	0.000523741	0.632945   	0.62805    
11 	183   	0.632896	0.000487096	0.632945   	0.62805    
12 	190   	0.632906	0.000277817	0.632945   	0.630961   
13 	180   	0.632906	0.000277817	0.632945   	0.630961   
14 	182   	0.632925	0.000197446	0.632945   	0.630961   
15 	186   	0.632837	0.00058824 	0.632945   	0.62805    
16 	177   	0.632906	0.000277817	0.632945   	0.63