In [259]:
import random
import operator
import itertools

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp

In [260]:
# peek at features
# TODO: set passenger id as index and filter out the target feature
df = pd.read_csv("clean_train_data.csv", index_col = "PassengerId")
X = df.loc[:, df.columns != "Survived"]
truth = df["Survived"]
X = X.astype(float)
X.head()

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Deck
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,3.0,0.0,22.0,1.0,0.0,7.25,2.0,3.0
2,1.0,1.0,38.0,1.0,0.0,71.2833,0.0,0.0
3,3.0,1.0,26.0,0.0,0.0,7.925,2.0,3.0
4,1.0,1.0,35.0,1.0,0.0,53.1,2.0,0.0
5,3.0,0.0,35.0,0.0,0.0,8.05,2.0,3.0


In [261]:
# create fitness for MO: FP and FN
creator.create("FitnessMin", base.Fitness, weights=(-1.0,-1.0))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

In [262]:
# TODO: create helper functions for primitives
def divide_by_zero(a, b):
#     if b == 0:
#         return sigmoid(np.divide(a, 0.01))
#     return sigmoid(np.divide(a, b))
    if b == 0:
        return 0
    else:
        return a / b

In [263]:
def if_then_else(input, output1, output2):
    if input: 
        return output1
    else: 
        return output2

In [264]:
# TODO: func for float to bool
def float_to_bool(f):
    return f > 0

In [278]:
random.seed(25)
pset = gp.PrimitiveSetTyped("main", itertools.repeat(float, 8), bool) 

pset.addPrimitive(np.add, [float, float], float)
pset.addPrimitive(np.subtract, [float, float], float)
pset.addPrimitive(np.multiply, [float, float], float)
# pset.addPrimitive(divide_by_zero, [float, float], float)

pset.addPrimitive(operator.and_, [bool, bool], bool)
pset.addPrimitive(operator.or_, [bool, bool], bool)
pset.addPrimitive(operator.not_, [bool], bool)
# pset.addPrimitive(operator.xor, [bool, bool], bool)
# pset.addPrimitive(float_to_bool, [float], bool)

pset.addPrimitive(operator.lt, [float, float], bool)
pset.addPrimitive(operator.eq, [float, float], bool)
pset.addPrimitive(if_then_else, [bool, float, float], float)

pset.addTerminal(0, bool)
pset.addTerminal(1, bool)

In [279]:
# create toolbox
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=3, max_=3)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

In [294]:
def eval_function(individual, points, pset):
    func = gp.compile(expr=individual, pset=pset)
    results = [func(*points[x][:8]) for x in range(len(points))]
    tn, fp, fn, tp = confusion_matrix(truth, results).ravel()
    return fp, fn

In [295]:
toolbox.register("evaluate", eval_function, points=X.values, pset=pset)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))

In [304]:
expr = gp.genHalfAndHalf(pset, min_=3, max_=3)
tree = gp.PrimitiveTree(expr)
print(str(tree))
print(str(eval_function(tree, X.values, pset)))

lt(multiply(multiply(ARG0, ARG0), multiply(ARG7, ARG4)), multiply(if_then_else(0, ARG1, ARG1), subtract(ARG3, ARG5)))
(0, 342)


In [305]:
def pareto_dominance(ind1, ind2):
    not_equal = False
    for value_1, value_2 in zip(ind1.fitness.values, ind2.fitness.values):
        if value_1 > value_2:
            return False
        elif value_1 < value_2:
            not_equal = True
    return not_equal

In [306]:
pop = toolbox.population(n=300)

fitnesses = list(map(toolbox.evaluate, pop))
for ind, fit in zip(pop, fitnesses):
    ind.fitness.values = fit

a_given_individual = toolbox.population(n=1)[0]
a_given_individual.fitness.values = toolbox.evaluate(a_given_individual)