In [1]:
# !pip install deap
# !pip install category_encoders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting deap
  Downloading deap-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.9/139.9 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: deap
Successfully installed deap-1.3.3
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting category_encoders
  Downloading category_encoders-2.6.1-py2.py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/81.9 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: category_encoders
Successfully installed category_encoders-2.6.1


**Dataset:** https://archive.ics.uci.edu/ml/datasets/Car+Evaluation

**Algorithm:** ........

In [2]:
import numpy as np
from pandas import read_csv

url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data'

data = read_csv(url, header=None, usecols=range(0, 7))
col_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety','class']
data.columns = col_names
X = data.iloc[:, 0:6]
y = data.iloc[:, 6:7]

X

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,vhigh,vhigh,2,2,small,low
1,vhigh,vhigh,2,2,small,med
2,vhigh,vhigh,2,2,small,high
3,vhigh,vhigh,2,2,med,low
4,vhigh,vhigh,2,2,med,med
...,...,...,...,...,...,...
1723,low,low,5more,more,med,med
1724,low,low,5more,more,med,high
1725,low,low,5more,more,big,low
1726,low,low,5more,more,big,med


In [None]:
import category_encoders as ce
from sklearn.preprocessing import LabelEncoder

encoder = ce.OrdinalEncoder(cols=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])
X = encoder.fit_transform(X)

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)


  y = column_or_1d(y, warn=True)


In [None]:
y

array([2, 2, 2, ..., 2, 1, 3])

In [None]:
hparams = {
'n_estimators': [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
'learning_rate': np.logspace(-2, 0, num=10, base=10),
'algorithm': ['SAMME', 'SAMME.R'],
}

In [None]:
from deap import base
from deap import creator
from deap import tools
import random

# define a single objective, maximizing fitness strategy:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
# create the Individual class based on list:
creator.create("Individual", list, fitness=creator.FitnessMax)


In [None]:
toolbox = base.Toolbox()
# fill in the with random floats for each type of hyper parameter
toolbox.register("h1", random.uniform,1,100)
toolbox.register("h2", random.uniform,0.01,1.0)
toolbox.register("h3", random.uniform,0,1)

toolbox.register("IndividualCreator",tools.initCycle,creator.Individual,
                 (toolbox.h1,toolbox.h2,toolbox.h3),n=1)

In [None]:
# Test IndividualCreator
for i in range(10):
    print(toolbox.IndividualCreator())

[88.27803344228506, 0.7195382081330456, 0.20018832029540623]
[46.48197400389379, 0.7902696666167557, 0.5466867311816009]
[50.81427002388519, 0.12269902226167373, 0.5452265563702894]
[92.78602855827157, 0.6169338667341124, 0.3310693709051684]
[92.92956814505452, 0.4357221494380934, 0.24708992178004296]
[94.23319641230421, 0.24703868095215586, 0.23958029146383653]
[70.82398642571573, 0.7305736629058429, 0.4854343090645634]
[25.75766962457751, 0.4219122694596569, 0.7014611063676897]
[11.052224895220247, 0.38522258699461004, 0.039124024122078604]
[94.15448490107376, 0.4211914161621162, 0.24780514935596587]


In [None]:
def getParams(individual):
    n_est = round(individual[0])
    lrate = individual[1]
    algo = ['SAMME', 'SAMME.R'][round(individual[2])] 
    return n_est, lrate, algo

In [None]:
from sklearn import model_selection

kfold = model_selection.KFold(n_splits=10, random_state=42, shuffle=True)

In [None]:
from sklearn.ensemble import RandomForestClassifier

def getAccuracy(individual):
    n_estimators, learning_rate, algorithm = getParams(individual)
    classifier = RandomForestClassifier(random_state=42,
                                         n_estimators=n_estimators
                                         )

    cv_results = model_selection.cross_val_score(classifier,
                                                 X,
                                                 y,
                                                 cv=kfold,
                                                 scoring='accuracy')
    return cv_results.mean()

In [None]:
# create the population operator to generate a list of individuals:
toolbox.register("populationCreator", tools.initRepeat, list, 
                 toolbox.IndividualCreator)
# fitness calculation
def classificationAccuracy(individual):
    return getAccuracy(individual),

toolbox.register("evaluate", classificationAccuracy)

In [None]:
# genetic operators:
from deap import gp
# YOUR TASK fill in your own operators

toolbox.register("mate", tools.cxOnePoint)
toolbox.register("mutate", tools.mutPolynomialBounded, eta=1.0, low=[1, 0.01, 0], up=[100, 1.0, 1], indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)

In [None]:
individual = [1.3177909433075996, 0.09159345360220497, 0.16128206481329388]
toolbox.mutate(individual)

([1.3177909433075996, 0.09159345360220497, 0.21955852361100792],)

In [None]:
from deap import algorithms

# Genetic Algorithm constants:
POPULATION_SIZE = 20
P_CROSSOVER = 0.9  # probability for crossover
P_MUTATION = 0.5   # probability for mutating an individual
MAX_GENERATIONS = 5
HALL_OF_FAME_SIZE = 5

# create initial population (generation 0):
population = toolbox.populationCreator(n=POPULATION_SIZE)
#print(population)
# prepare the statistics object:
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("max", np.max)
stats.register("avg", np.mean)

# define the hall-of-fame object:
hof = tools.HallOfFame(HALL_OF_FAME_SIZE)

# perform the Genetic Algorithm flow with hof feature added:
population, logbook = algorithms.eaSimple(population,
                                          toolbox,
                                          cxpb=P_CROSSOVER,
                                          mutpb=P_MUTATION,
                                          ngen=MAX_GENERATIONS,
                                          stats=stats,
                                          halloffame=hof,
                                          verbose=True)

# print best solution found:
print("- Best solution is: ")
print("params = ", hof.items[0])
print("Accuracy = %1.5f" % hof.items[0].fitness.values[0])

gen	nevals	max     	avg     
0  	20    	0.983788	0.977742
1  	19    	0.983788	0.98107 
2  	17    	0.983788	0.981359
3  	20    	0.983788	0.981618
4  	20    	0.983788	0.983152
5  	19    	0.983788	0.983268
- Best solution is: 
params =  [59.27814614645226, 0.9864352165244169, 0.3133794798008386]
Accuracy = 0.98379
