In [1]:
!pip install deap
!pip install category_encoders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


**Dataset:** https://archive.ics.uci.edu/ml/datasets/Car+Evaluation

**Algorithm:** MLP 

In [2]:
import numpy as np
from pandas import read_csv

url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data'

data = read_csv(url, header=None, usecols=range(0, 7))
col_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety','class']
data.columns = col_names
X = data.iloc[:, 0:6]
y = data.iloc[:, 6:7]

X

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,vhigh,vhigh,2,2,small,low
1,vhigh,vhigh,2,2,small,med
2,vhigh,vhigh,2,2,small,high
3,vhigh,vhigh,2,2,med,low
4,vhigh,vhigh,2,2,med,med
...,...,...,...,...,...,...
1723,low,low,5more,more,med,med
1724,low,low,5more,more,med,high
1725,low,low,5more,more,big,low
1726,low,low,5more,more,big,med


In [3]:
import category_encoders as ce
from sklearn.preprocessing import LabelEncoder

encoder = ce.OrdinalEncoder(cols=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])
X = encoder.fit_transform(X)

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)


  y = column_or_1d(y, warn=True)


In [4]:
X

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,1,1,1,1,1,1
1,1,1,1,1,1,2
2,1,1,1,1,1,3
3,1,1,1,1,2,1
4,1,1,1,1,2,2
...,...,...,...,...,...,...
1723,4,4,4,3,2,2
1724,4,4,4,3,2,3
1725,4,4,4,3,3,1
1726,4,4,4,3,3,2


In [5]:
# Define the XGBoost parameters
params = {
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'sgd','adam'],
    'max_iter': [200, 400, 600, 800]
}

In [6]:
from deap import base
from deap import creator
from deap import tools
import random

# define a single objective, maximizing fitness strategy:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
# create the Individual class based on list:
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
# fill in the with random floats for each type of hyper parameter
toolbox.register("h1", random.randint,0,3)
toolbox.register("h2", random.randint,0,3)
toolbox.register("h3", random.randint,0,2)
toolbox.register("h4", random.randint,0,3)

toolbox.register("IndividualCreator",tools.initCycle,creator.Individual,
                 (toolbox.h1,toolbox.h2,toolbox.h3,toolbox.h4),n=1)

In [7]:
def getParams(individual):
    alpha = [0.0001, 0.001, 0.01, 0.1][round(individual[0])]
    activation =  ['identity', 'logistic', 'tanh', 'relu'][round(individual[1])] 
    solver = ['lbfgs', 'sgd','adam'][round(individual[2])]
    max_iter = [200, 400, 600, 800][round(individual[3])]
    return alpha, activation, solver,max_iter

In [8]:
# Test IndividualCreator
for i in range(10):
    a = toolbox.IndividualCreator()
    b = getParams(a)
    print(a, b)

[0, 3, 0, 2] (0.0001, 'relu', 'lbfgs', 600)
[0, 3, 1, 0] (0.0001, 'relu', 'sgd', 200)
[0, 3, 1, 0] (0.0001, 'relu', 'sgd', 200)
[2, 1, 2, 1] (0.01, 'logistic', 'adam', 400)
[2, 1, 1, 3] (0.01, 'logistic', 'sgd', 800)
[2, 1, 2, 2] (0.01, 'logistic', 'adam', 600)
[3, 2, 0, 2] (0.1, 'tanh', 'lbfgs', 600)
[1, 3, 0, 1] (0.001, 'relu', 'lbfgs', 400)
[0, 1, 1, 2] (0.0001, 'logistic', 'sgd', 600)
[3, 1, 1, 1] (0.1, 'logistic', 'sgd', 400)


In [9]:
from sklearn import model_selection
from sklearn.neural_network import MLPClassifier

kfold = model_selection.KFold(n_splits=10, random_state=42, shuffle=True)

def getAccuracy(individual):
    alpha, activation, solver,max_iter = getParams(individual)
    classifier = MLPClassifier(alpha=alpha,
                                  activation=activation,
                                  solver=solver,
                                  max_iter=max_iter)

    cv_results = model_selection.cross_val_score(classifier,
                                                 X,
                                                 y,
                                                 cv=kfold,
                                                 scoring='accuracy')
    return cv_results.mean()

In [10]:
import warnings
from sklearn.exceptions import ConvergenceWarning

# Suppress the ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [11]:
from sklearn import model_selection
from sklearn.neural_network import MLPClassifier
# Get the accuracy from default hyper-parameters setup

# objective = 'binary:logistic'
# eval_metric = None
# eta = 0.1
# max_depth = 3
kfold = model_selection.KFold(n_splits=10, random_state=42, shuffle=True)
classifier = MLPClassifier()
cv_results = model_selection.cross_val_score(classifier,
                                                 X,
                                                 y,
                                                 cv=kfold,
                                                 scoring='accuracy')

print('mean: ',cv_results.mean())

mean:  0.9403851324102701


In [12]:
# create the population operator to generate a list of individuals:
toolbox.register("populationCreator", tools.initRepeat, list, 
                 toolbox.IndividualCreator)
# fitness calculation
def classificationAccuracy(individual):
    return getAccuracy(individual),

toolbox.register("evaluate", classificationAccuracy)

# genetic operators:
toolbox.register("mate", tools.cxOnePoint)
toolbox.register("mutate", tools.mutPolynomialBounded, eta=1.0, low=[0, 0, 0, 0], up=[3, 3, 2, 3], indpb=0.3)
#toolbox.register("mutate", tools.mutFlipBit, indpb=0.3)
toolbox.register("select", tools.selTournament, tournsize=3)

In [14]:
individual = [2, 0, 0.41245413088983973, 3]
toolbox.mutate(individual)

([2, 0.0, 0.41245413088983973, 3],)

In [15]:
from deap import algorithms

# Genetic Algorithm constants:
POPULATION_SIZE = 20
P_CROSSOVER = 0.9  # probability for crossover
P_MUTATION = 0.5   # probability for mutating an individual
MAX_GENERATIONS = 5
HALL_OF_FAME_SIZE = 5

# create initial population (generation 0):
population = toolbox.populationCreator(n=POPULATION_SIZE)

# prepare the statistics object:
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("max", np.max)
stats.register("avg", np.mean)

# define the hall-of-fame object:
hof = tools.HallOfFame(HALL_OF_FAME_SIZE)

# perform the Genetic Algorithm flow with hof feature added:
population, logbook = algorithms.eaSimple(population,
                                          toolbox,
                                          cxpb=P_CROSSOVER,
                                          mutpb=P_MUTATION,
                                          ngen=MAX_GENERATIONS,
                                          stats=stats,
                                          halloffame=hof,
                                          verbose=True)

# print best solution found:
print("- Best solution is: ")
print("params = ", hof.items[0], getParams(hof.items[0]))
print("Accuracy = %1.5f" % hof.items[0].fitness.values[0])

gen	nevals	max     	avg     
0  	20    	0.994784	0.871799
1  	15    	0.994784	0.915791
2  	20    	0.997103	0.964954
3  	18    	0.998841	0.968976
4  	19    	0.999419	0.987989
5  	18    	0.999419	0.988944
- Best solution is: 
params =  [3.0, 2, 0.12528477442082253, 1] (0.1, 'tanh', 'lbfgs', 400)
Accuracy = 0.99942
