In [2]:
from ponyge2_adapted_files import Grammar, ge
import algorithms
from os import path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from deap import creator, base, tools
from sklearn.model_selection import train_test_split
import math
import textwrap
import csv
import random
import json
import spacy

import warnings
warnings.filterwarnings("ignore")

In [3]:
GRAMMAR_FILE = 'basic_english_grammar.bnf'
BNF_GRAMMAR = Grammar(path.join("/mnt/d/college_notes/internship/grape/grammars", GRAMMAR_FILE))

         Unit productions consume GE codons.


In [26]:
nlp = spacy.load("en_core_web_lg")

def fitness_eval(individual, points=None):
    if individual.phenotype is None:
        return np.NaN,

    if individual.invalid == True:
        return np.NaN,
    stimulus = nlp("The monkey ate all my cookies")
    pred = nlp(individual.phenotype)
    fitness = stimulus.similarity(pred)
    return -fitness,

In [27]:
toolbox = base.Toolbox()

# define a single objective, minimising fitness strategy:
# creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))


# creator.create('Individual', ge.Individual, fitness=creator.FitnessMax)
creator.create('Individual', ge.Individual, fitness=creator.FitnessMin)

toolbox.register("populationCreator", ge.initialisation_PI_Grow, creator.Individual) 

toolbox.register("evaluate", fitness_eval)#, points=[X_train, Y_train])

# Tournament selection:
toolbox.register("select", ge.selTournament, tournsize=3)

# Single-point crossover:
toolbox.register("mate", ge.crossover_onepoint)

# Flip-int mutation:
toolbox.register("mutate", ge.mutation_int_flip_per_codon)

In [33]:
N_RUNS = 32
POPULATION_SIZE = 200
MAX_GENERATIONS = 50
P_CROSSOVER = 0.9
P_MUTATION = 0.01
ELITE_SIZE = round(0.01*POPULATION_SIZE)

HALL_OF_FAME_SIZE = 1
MAX_INIT_TREE_DEPTH = 8
MIN_INIT_TREE_DEPTH = 6
MAX_TREE_DEPTH = 8
MAX_WRAPS = 0
CODON_SIZE = 255

In [34]:
maxListFitness = []
avgListFitness = []
minListFitness = []
stdListFitness = []

maxListSize = []
avgListSize = []
minListSize = []
stdListSize = []
bestListSize = []

In [35]:
# population = toolbox.populationCreator(size=POPULATION_SIZE, 
#                                             bnf_grammar=BNF_GRAMMAR, 
#                                             min_init_tree_depth=MIN_INIT_TREE_DEPTH,
#                                             max_init_tree_depth=MAX_INIT_TREE_DEPTH,
#                                             max_tree_depth=MAX_TREE_DEPTH, 
#                                             max_wraps=MAX_WRAPS,
#                                             codon_size=CODON_SIZE)

In [36]:
# for ind in population:
#     print(ind.phenotype)
#     print("depth", ind.depth)
#     print("\n")

In [37]:
for r in range(0, N_RUNS):
    # create initial population (generation 0):
    population = toolbox.populationCreator(size=POPULATION_SIZE, 
                                            bnf_grammar=BNF_GRAMMAR, 
                                            min_init_tree_depth=MIN_INIT_TREE_DEPTH,
                                            max_init_tree_depth=MAX_INIT_TREE_DEPTH,
                                            max_tree_depth=MAX_TREE_DEPTH, 
                                            max_wraps=MAX_WRAPS,
                                            codon_size=CODON_SIZE
                                            )
    # define the hall-of-fame object:
    hof = tools.HallOfFame(HALL_OF_FAME_SIZE)             
    # prepare the statistics object:
    #stats = tools.Statistics(key=lambda ind: ind.fitness.values if math.isnan(ind.fitness.values[0]) else None)#ind.fitness.values != np.inf else None)
    #stats = tools.Statistics(key=lambda ind: ind.fitness.values[0] if not math.isnan(ind.fitness.values[0]) else np.NaN)#ind.fitness.values != np.inf else None)
    stats = tools.Statistics(key=lambda ind: ind.fitness.values)# if not ind.invalid else (np.NaN,))#ind.fitness.values != np.inf else None)
    stats.register("avg", np.nanmean)
    stats.register("std", np.nanstd)
    stats.register("min", np.nanmin)
    stats.register("max", np.nanmax)
    
    # Which run are we on?
    print("\n\nCurrently on run", r, "of",N_RUNS)
    # perform the Grammatical Evolution flow:
    population, logbook = algorithms.ge_eaSimpleWithElitism(population, toolbox, cxpb=P_CROSSOVER, mutpb=P_MUTATION,
                                          ngen=MAX_GENERATIONS, elite_size=ELITE_SIZE,
                                          bnf_grammar=BNF_GRAMMAR, codon_size=CODON_SIZE, 
                                          max_tree_depth=MAX_TREE_DEPTH, max_wraps=MAX_WRAPS,
                                          points_train=[None, None], 
                                          points_test=[None, None], 
                                          stats=stats, halloffame=hof, verbose=True)

    max_fitness_values, mean_fitness_values = logbook.select("max", "avg")
    min_fitness_values, std_fitness_values = logbook.select("min", "std")
    
    # fitness_test = logbook.select("fitness_test")
    best_ind_length = logbook.select("best_ind_length")
    avg_length = logbook.select("avg_length")
    max_length = logbook.select("max_length")
    # selection_time = logbook.select("selection_time")
    # generation_time = logbook.select("generation_time")
    # gen, invalid = logbook.select("gen", "invalid")

    # Save statistics for this run:
    avgListFitness.append(mean_fitness_values)
    stdListFitness.append(std_fitness_values)
    minListFitness.append(min_fitness_values)
    maxListFitness.append(max_fitness_values)

    avgListSize.append(avg_length)
    bestListSize.append(best_ind_length)
    # stdListSize.append(stdSizeValues)
    # minListSize.append(minSizeValues)
    maxListSize.append(max_length)

    # best = hof.items[0].phenotype # parser to change the individual 
    best = hof.items[0].phenotype # parser to change the individual 
    print("Best individual: \n","\n".join(textwrap.wrap(best,80)))
    print("\nTraining Fitness: ", hof.items[0].fitness.values[0])
    print("Depth: ", hof.items[0].depth)
    print("Length of the genome: ", len(hof.items[0].genome))
    print(f'Used portion of the genome: {hof.items[0].used_codons/len(hof.items[0].genome):.2f}')                            



Currently on run 0 of 32
gen	invalid	avg      	std      	min      	max      	fitness_test	best_ind_length	avg_length	max_length	selection_time	generation_time
0  	0      	-0.679758	0.0616199	-0.834228	-0.514908	-0.834228   	16             	19.855    	34        	0             	3.47662        
1  	51     	-0.703381	0.0674516	-0.854207	-0.523409	-0.854207   	21             	19        	43        	0.0027163     	2.70903        
2  	35     	-0.743906	0.0623868	-0.858605	-0.563876	-0.858605   	15             	20.865    	47        	0.00137234    	2.14543        
3  	22     	-0.779914	0.0491618	-0.886138	-0.601595	-0.886138   	20             	21.83     	58        	0.00136209    	2.27289        
4  	25     	-0.789456	0.0600592	-0.891873	-0.54614 	-0.891873   	15             	23.36     	63        	0.00106931    	2.34212        
5  	25     	-0.799962	0.0561908	-0.891873	-0.6345  	-0.891873   	15             	24.4      	57        	0.00122809    	2.28532        
6  	24     	-0.818138	0.0503474	-0.