In [None]:
!pip install deap
!pip install category_encoders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting deap
  Downloading deap-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.9/139.9 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: deap
Successfully installed deap-1.3.3
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting category_encoders
  Downloading category_encoders-2.6.1-py2.py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/81.9 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: category_encoders
Successfully installed category_encoders-2.6.1


**Dataset:** https://archive.ics.uci.edu/ml/datasets/Car+Evaluation

**Algorithm:** SVM 

In [None]:
import numpy as np
from pandas import read_csv

url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data'

data = read_csv(url, header=None, usecols=range(0, 7))
col_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety','class']
data.columns = col_names
X = data.iloc[:, 0:6]
y = data.iloc[:, 6:7]

X

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,vhigh,vhigh,2,2,small,low
1,vhigh,vhigh,2,2,small,med
2,vhigh,vhigh,2,2,small,high
3,vhigh,vhigh,2,2,med,low
4,vhigh,vhigh,2,2,med,med
...,...,...,...,...,...,...
1723,low,low,5more,more,med,med
1724,low,low,5more,more,med,high
1725,low,low,5more,more,big,low
1726,low,low,5more,more,big,med


In [None]:
import category_encoders as ce
from sklearn.preprocessing import LabelEncoder

encoder = ce.OrdinalEncoder(cols=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])
X = encoder.fit_transform(X)

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)


  y = column_or_1d(y, warn=True)


In [None]:
X

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,1,1,1,1,1,1
1,1,1,1,1,1,2
2,1,1,1,1,1,3
3,1,1,1,1,2,1
4,1,1,1,1,2,2
...,...,...,...,...,...,...
1723,4,4,4,3,2,2
1724,4,4,4,3,2,3
1725,4,4,4,3,3,1
1726,4,4,4,3,3,2


In [None]:
# Define the XGBoost parameters
params = {
    'C': [0.1, 1.0, 10.0],
    'kernel': ['linear', 'poly', 'rbf'],
    'gamma': ['scale', 'auto'],
    'degree': [2, 3, 4]
}

In [None]:
from deap import base
from deap import creator
from deap import tools
import random

# define a single objective, maximizing fitness strategy:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
# create the Individual class based on list:
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
# fill in the with random floats for each type of hyper parameter
toolbox.register("h1", random.randint,0,2)
toolbox.register("h2", random.randint,0,2)
toolbox.register("h3", random.uniform,0,1)
toolbox.register("h4", random.randint,0,2)

toolbox.register("IndividualCreator",tools.initCycle,creator.Individual,
                 (toolbox.h1,toolbox.h2,toolbox.h3,toolbox.h4),n=1)

In [None]:
def getParams(individual):
    C = [0.1, 1.0, 10.0][round(individual[0])]
    kernel =  ['linear', 'poly', 'rbf'][round(individual[1])] 
    gamma = ['scale', 'auto'][round(individual[2])]
    degree = [2,3,4][round(individual[3])]
    return C, kernel, gamma,degree

In [None]:
# Test IndividualCreator
for i in range(10):
    a = toolbox.IndividualCreator()
    b = getParams(a)
    print(a, b)

[0, 1, 0.6042249694034572, 1] (0.1, 'poly', 'auto', 3)
[0, 1, 0.973079223105162, 2] (0.1, 'poly', 'auto', 4)
[2, 2, 0.9811830445140468, 0] (10.0, 'rbf', 'auto', 2)
[0, 1, 0.10160878170128862, 0] (0.1, 'poly', 'scale', 2)
[0, 1, 0.9077939718951871, 2] (0.1, 'poly', 'auto', 4)
[0, 2, 0.43262038382126944, 1] (0.1, 'rbf', 'scale', 3)
[1, 1, 0.3757373064485461, 0] (1.0, 'poly', 'scale', 2)
[0, 1, 0.5223381074617297, 1] (0.1, 'poly', 'auto', 3)
[0, 0, 0.3226771927564126, 2] (0.1, 'linear', 'scale', 4)
[0, 0, 0.5790301880549379, 1] (0.1, 'linear', 'auto', 3)


In [None]:
from sklearn import model_selection
from sklearn import svm

kfold = model_selection.KFold(n_splits=10, random_state=42, shuffle=True)

def getAccuracy(individual):
    C, kernel, gamma,degree = getParams(individual)
    classifier = svm.SVC(C=C,
                                  kernel=kernel,
                                  gamma=gamma,
                                  degree=degree)

    cv_results = model_selection.cross_val_score(classifier,
                                                 X,
                                                 y,
                                                 cv=kfold,
                                                 scoring='accuracy')
    return cv_results.mean()

In [None]:
# Get the accuracy from default hyper-parameters setup

# objective = 'binary:logistic'
# eval_metric = None
# eta = 0.1
# max_depth = 3
kfold = model_selection.KFold(n_splits=10, random_state=42, shuffle=True)
classifier = svm.SVC()
cv_results = model_selection.cross_val_score(classifier,
                                                 X,
                                                 y,
                                                 cv=kfold,
                                                 scoring='accuracy')

print('mean: ',cv_results.mean())

mean:  0.9617993009813146


In [None]:
# create the population operator to generate a list of individuals:
toolbox.register("populationCreator", tools.initRepeat, list, 
                 toolbox.IndividualCreator)
# fitness calculation
def classificationAccuracy(individual):
    return getAccuracy(individual),

toolbox.register("evaluate", classificationAccuracy)

# genetic operators:
toolbox.register("mate", tools.cxOnePoint)
toolbox.register("mutate", tools.mutPolynomialBounded, eta=1.0, low=[0, 0, 0, 0], up=[2, 2, 1, 2], indpb=0.3)
#toolbox.register("mutate", tools.mutFlipBit, indpb=0.3)
toolbox.register("select", tools.selTournament, tournsize=3)

In [None]:
individual = [2, 0, 0.41245413088983973, 3]
toolbox.mutate(individual)

([2, 0, 0.41245413088983973, 3],)

In [None]:
from deap import algorithms

# Genetic Algorithm constants:
POPULATION_SIZE = 20
P_CROSSOVER = 0.9  # probability for crossover
P_MUTATION = 0.5   # probability for mutating an individual
MAX_GENERATIONS = 5
HALL_OF_FAME_SIZE = 5

# create initial population (generation 0):
population = toolbox.populationCreator(n=POPULATION_SIZE)

# prepare the statistics object:
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("max", np.max)
stats.register("avg", np.mean)

# define the hall-of-fame object:
hof = tools.HallOfFame(HALL_OF_FAME_SIZE)

# perform the Genetic Algorithm flow with hof feature added:
population, logbook = algorithms.eaSimple(population,
                                          toolbox,
                                          cxpb=P_CROSSOVER,
                                          mutpb=P_MUTATION,
                                          ngen=MAX_GENERATIONS,
                                          stats=stats,
                                          halloffame=hof,
                                          verbose=True)

# print best solution found:
print("- Best solution is: ")
print("params = ", hof.items[0], getParams(hof.items[0]))
print("Accuracy = %1.5f" % hof.items[0].fitness.values[0])

gen	nevals	max     	avg     
0  	20    	0.980327	0.894753
1  	18    	0.980327	0.948112
2  	20    	0.982646	0.971265
3  	18    	0.981486	0.978385
4  	19    	0.981486	0.971383
5  	19    	0.982646	0.972798
- Best solution is: 
params =  [2, 1.6225309722282375, 0.25465031478034084, 1.3172288718571343] (10.0, 'rbf', 'scale', 3)
Accuracy = 0.98265
