In [None]:
!pip install deap
!pip install category_encoders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


**Dataset:** https://archive.ics.uci.edu/ml/datasets/Car+Evaluation

**Algorithm:** AdaBoostClassifier

In [None]:
import numpy as np
from pandas import read_csv

url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data'

data = read_csv(url, header=None, usecols=range(0, 7))
col_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety','class']
data.columns = col_names
X = data.iloc[:, 0:6]
y = data.iloc[:, 6:7]

X

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,vhigh,vhigh,2,2,small,low
1,vhigh,vhigh,2,2,small,med
2,vhigh,vhigh,2,2,small,high
3,vhigh,vhigh,2,2,med,low
4,vhigh,vhigh,2,2,med,med
...,...,...,...,...,...,...
1723,low,low,5more,more,med,med
1724,low,low,5more,more,med,high
1725,low,low,5more,more,big,low
1726,low,low,5more,more,big,med


In [None]:
import category_encoders as ce
from sklearn.preprocessing import LabelEncoder

encoder = ce.OrdinalEncoder(cols=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])
X = encoder.fit_transform(X)

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)


  y = column_or_1d(y, warn=True)


In [None]:
y

array([2, 2, 2, ..., 2, 1, 3])

In [None]:
hparams = {
'n_estimators': [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
'learning_rate': np.logspace(-2, 0, num=10, base=10),
'algorithm': ['SAMME', 'SAMME.R'],
}

In [None]:
from deap import base
from deap import creator
from deap import tools
import random

# define a single objective, maximizing fitness strategy:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
# create the Individual class based on list:
creator.create("Individual", list, fitness=creator.FitnessMax)




In [None]:
toolbox = base.Toolbox()
# fill in the with random floats for each type of hyper parameter
toolbox.register("h1", random.uniform,1,100)
toolbox.register("h2", random.uniform,0.01,1.0)
toolbox.register("h3", random.uniform,0,1)

toolbox.register("IndividualCreator",tools.initCycle,creator.Individual,
                 (toolbox.h1,toolbox.h2,toolbox.h3),n=1)

In [None]:
# Test IndividualCreator
for i in range(10):
    print(toolbox.IndividualCreator())

[67.08138863560598, 0.39440861741965244, 0.2269975077027686]
[85.2420586963402, 0.49651723618302557, 0.05496980867659795]
[92.26925180359241, 0.014948287042396613, 0.8146946113629387]
[22.050109183255604, 0.37239171471653304, 0.0932161824795984]
[93.10128698159133, 0.3475774222209567, 0.8833567265159428]
[51.95895346819355, 0.5089339827504537, 0.7279777200949911]
[77.50887486966174, 0.6054641091425975, 0.8800529645492041]
[28.411001042087545, 0.804285395203299, 0.6034958080717847]
[65.35040241555939, 0.17378136504715253, 0.17474435043602565]
[50.18387085252724, 0.6750059458789942, 0.7268920447394271]


In [None]:
def getParams(individual):
    n_est = round(individual[0])
    lrate = individual[1]
    algo = ['SAMME', 'SAMME.R'][round(individual[2])] 
    return n_est, lrate, algo

In [None]:
from sklearn import model_selection
from sklearn.ensemble import AdaBoostClassifier

kfold = model_selection.KFold(n_splits=10, random_state=42, shuffle=True)

def getAccuracy(individual):
    n_estimators, learning_rate, algorithm = getParams(individual)
    classifier = AdaBoostClassifier(random_state=42,
                                         n_estimators=n_estimators,
                                         learning_rate=learning_rate,
                                         algorithm=algorithm
                                         )

    cv_results = model_selection.cross_val_score(classifier,
                                                 X,
                                                 y,
                                                 cv=kfold,
                                                 scoring='accuracy')
    return cv_results.mean()

In [None]:
# create the population operator to generate a list of individuals:
toolbox.register("populationCreator", tools.initRepeat, list, 
                 toolbox.IndividualCreator)
# fitness calculation
def classificationAccuracy(individual):
    return getAccuracy(individual),

toolbox.register("evaluate", classificationAccuracy)

In [None]:
# genetic operators:
from deap import gp
# YOUR TASK fill in your own operators

toolbox.register("mate", tools.cxOnePoint)
toolbox.register("mutate", tools.mutPolynomialBounded, eta=1.0, low=[1, 0.01, 0], up=[100, 1.0, 1], indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)

In [None]:
individual = [1.3177909433075996, 0.09159345360220497, 0.16128206481329388]
toolbox.mutate(individual)

([1.3177909433075996, 0.17656459506101713, 0.16128206481329388],)

In [None]:
from deap import algorithms

# Genetic Algorithm constants:
POPULATION_SIZE = 20
P_CROSSOVER = 0.9  # probability for crossover
P_MUTATION = 0.5   # probability for mutating an individual
MAX_GENERATIONS = 5
HALL_OF_FAME_SIZE = 5

# create initial population (generation 0):
population = toolbox.populationCreator(n=POPULATION_SIZE)
#print(population)
# prepare the statistics object:
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("max", np.max)
stats.register("avg", np.mean)

# define the hall-of-fame object:
hof = tools.HallOfFame(HALL_OF_FAME_SIZE)

# perform the Genetic Algorithm flow with hof feature added:
population, logbook = algorithms.eaSimple(population,
                                          toolbox,
                                          cxpb=P_CROSSOVER,
                                          mutpb=P_MUTATION,
                                          ngen=MAX_GENERATIONS,
                                          stats=stats,
                                          halloffame=hof,
                                          verbose=True)

# print best solution found:
print("- Best solution is: ")
print("params = ", hof.items[0])
print("Accuracy = %1.5f" % hof.items[0].fitness.values[0])

gen	nevals	max     	avg    
0  	20    	0.854688	0.80913
1  	16    	0.854688	0.838406
2  	15    	0.855851	0.846388
3  	19    	0.857037	0.848186
4  	17    	0.85818 	0.853594
5  	19    	0.85818 	0.854145
- Best solution is: 
params =  [81.77321290480441, 0.9404625328677314, 0.30883478793089303]
Accuracy = 0.85818
