# Imports

In [None]:
import pandas as pd
import numpy as np
import random
import pickle
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from math import sqrt
import math
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


# Scripts

In [None]:
def population(ind_size,pop_size,models):
  ind = []
  pop = []

  for p in range(pop_size):
    ind = [random.choice([True,False]) for i in range(ind_size)]
    ind[0] = random.choice(range(0,len(models)+1))
    pop.append(ind)
    ind = []

  return pop

In [None]:
def fitness(population,df,models):
  result = []

  x = df[df.columns.difference(['fetal_health'])]
  y = df[['fetal_health']]

  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

  x_train = np.array(x_train)
  x_test = np.array(x_test)
  y_train = np.ravel(y_train)
  y_test = np.array(y_test)

  for p in population:
    model = models[p[1]]
    ind = [i==1 for i in p[1:]]
    model.fit(x_train[:,ind],y_train)
    y_pred = model.predict(x_test[:,ind])
    rmse = sqrt(mean_squared_error(y_test, y_pred))
    fit_value = 20*rmse + np.sum(p[1:])

    result.append([fit_value,p])
  return result

In [None]:
def tour(fitness):
  indiv_current = 1
  new_pop = []

  while indiv_current <= 100:
    indiv1 = random.choice(fitness)
    indiv2 = random.choice(fitness)

    if indiv1[0] > indiv2[0]:
      new_pop.append(indiv1)
    else:
      new_pop.append(indiv2)

    indiv_current += 1

  return new_pop

In [None]:
def mutation_and_crossover(population):
  children = []
  prop_mutation = 0.1 #prob of mutation
  rand = random.uniform(0,1) #random value to compare with prob_mutation

  pop = [i[1] for i in population]

  for p in population:
    parent1 = random.choice(pop)
    parent2 = random.choice(pop)

    n = random.choice(range(1,len(parent1))) #gene that gona be mutaded

    if rand < prop_mutation:
      #mutation
      parent1[0] = random.choice(range(4)) #mute the model

      if parent1[n] == True:
        parent1[n] = False
        child = parent1
      else:
        parent1[n] = True
        child = parent1

      children.append(child)

    else:
      #crossover
      chromosome_length = len(parent1) #length of chromosome
      crossover_point = random.randint(1,chromosome_length-1) #point of crossover
      child1 = parent1[1:crossover_point] + parent2[crossover_point:]
      child2 = parent2[1:crossover_point] + parent1[crossover_point:]

      child1.insert(0,parent2[0]) #model+child1
      child2.insert(0,parent1[0]) #model+child2

      #print(child1)
      children.append(child1)
      children.append(child2)

  return children

In [None]:
def survivors(children,parents):
  surv = []

  children_sorted = sorted(children)
  parents_sorted = sorted(parents)

  children_survived = children_sorted[:95]
  parents_survived = parents_sorted[:5]

  survivors = children_survived + parents_survived
  survivors = [surv[1] for surv in survivors]

  return survivors

In [None]:
def best_of_generation(generation):
  fitness = float('inf')
  best_indiv = []

  for i in generation:
    if i[0] < fitness:
      fitness = i[0]
      best_indiv = i

  return best_indiv

# Dataset

In [None]:
url = '/content/drive/My Drive/Data Files/fetal_health.csv'
df = pd.read_csv(url)
df.head()

Unnamed: 0,baseline value,accelerations,fetal_movement,uterine_contractions,light_decelerations,severe_decelerations,prolongued_decelerations,abnormal_short_term_variability,mean_value_of_short_term_variability,percentage_of_time_with_abnormal_long_term_variability,...,histogram_min,histogram_max,histogram_number_of_peaks,histogram_number_of_zeroes,histogram_mode,histogram_mean,histogram_median,histogram_variance,histogram_tendency,fetal_health
0,120.0,0.0,0.0,0.0,0.0,0.0,0.0,73.0,0.5,43.0,...,62.0,126.0,2.0,0.0,120.0,137.0,121.0,73.0,1.0,2.0
1,132.0,0.006,0.0,0.006,0.003,0.0,0.0,17.0,2.1,0.0,...,68.0,198.0,6.0,1.0,141.0,136.0,140.0,12.0,0.0,1.0
2,133.0,0.003,0.0,0.008,0.003,0.0,0.0,16.0,2.1,0.0,...,68.0,198.0,5.0,1.0,141.0,135.0,138.0,13.0,0.0,1.0
3,134.0,0.003,0.0,0.008,0.003,0.0,0.0,16.0,2.4,0.0,...,53.0,170.0,11.0,0.0,137.0,134.0,137.0,13.0,1.0,1.0
4,132.0,0.007,0.0,0.008,0.0,0.0,0.0,16.0,2.4,0.0,...,53.0,170.0,9.0,0.0,137.0,136.0,138.0,11.0,1.0,1.0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2126 entries, 0 to 2125
Data columns (total 22 columns):
 #   Column                                                  Non-Null Count  Dtype  
---  ------                                                  --------------  -----  
 0   baseline value                                          2126 non-null   float64
 1   accelerations                                           2126 non-null   float64
 2   fetal_movement                                          2126 non-null   float64
 3   uterine_contractions                                    2126 non-null   float64
 4   light_decelerations                                     2126 non-null   float64
 5   severe_decelerations                                    2126 non-null   float64
 6   prolongued_decelerations                                2126 non-null   float64
 7   abnormal_short_term_variability                         2126 non-null   float64
 8   mean_value_of_short_term_variability  

# Default Parameters

In [None]:
models = [DecisionTreeClassifier(max_depth=4),
          DecisionTreeClassifier(max_depth=6),
          DecisionTreeClassifier(max_depth=8),
          RandomForestClassifier(max_depth=14),
          RandomForestClassifier(max_depth=16),
          RandomForestClassifier(max_depth=20)]

In [None]:
individual_size = 22
population_size = 100
loops = 50

# Loop

In [None]:
def loop():
  bests = []

  #Step 1
  generation = population(individual_size,population_size,models)

  #export initial generation
  with open("/content/drive/My Drive/Data Files/initial_generation.txt", "wb") as fp:
    pickle.dump(generation, fp)

  for i in range(loops):
    #Step 2
    model_fitness = fitness(generation,df,models)

    #Step 3
    best_indiv = best_of_generation(model_fitness)
    bests.append(best_indiv)

    #Step 4
    generation_tour = tour(model_fitness)

    #Step 5
    children = mutation_and_crossover(generation_tour)

    #Step 6
    next_generation = fitness(children,df,models)

    #Step 7
    survs = survivors(next_generation,model_fitness)

    generation = survs

    print('\n Loops realizados: {}'.format(i+1))

  return generation, bests

In [None]:
genetic_alg, best_indivs = loop()

In [None]:
result_genetic_alg = fitness(genetic_alg,df,models)

In [None]:
with open("/content/drive/My Drive/Data Files/initial_generation.txt", "rb") as fp:
  initial_generation = pickle.load(fp)

In [None]:
result_initial_generation = fitness(initial_generation,df,models)

In [None]:
#exports
with open("/content/drive/My Drive/Data Files/result_genetic_alg.txt", "wb") as fp:
  pickle.dump(result_genetic_alg, fp)

with open("/content/drive/My Drive/Data Files/result_initial_generation.txt", "wb") as fp:
  pickle.dump(result_initial_generation, fp)

with open("/content/drive/My Drive/Data Files/best_indivs.txt", "wb") as fp:
  pickle.dump(best_indivs, fp)