In [1]:
import numpy as np
import pandas as pd
import random
import time

from deap import base, creator, tools, algorithms
from scipy.stats import bernoulli
from bitstring import BitArray

import parameters
import ner
import sentence_tagger

Using cuDNN version 6021 on context None
Mapped name None to device cuda: GeForce GTX 1070 (0000:01:00.0)


In [2]:
def replace(individual, indexes, value):
    binary_format = '{0:0' + str(indexes[1] - indexes[0])  + 'b}'
    binary_value = binary_format.format(value)
    index_list = list(range(*indexes))
    assert len(index_list) == len(binary_value), 'Binary representation must be the same size as the indexes interval'
    for (index, replacement) in zip(index_list, binary_value):
        individual[index] = int(replacement)

In [3]:
def check_and_replace_if_needed(individual, indexes, min_value, max_value):
    value_bits = BitArray(individual[slice(*indexes)])
    int_value = value_bits.uint
    if int_value < min_value:
        print('adjusting individual to lower bound, switching', int_value, 'at position', indexes, 'for', min_value)
        replace(indexes=indexes, individual=individual, value=min_value)
    elif int_value > max_value:
        print('adjusting individual to upper bound, switching', int_value, 'at position', indexes, 'for', max_value)
        replace(indexes=indexes, individual=individual, value=max_value)

In [4]:
def adjust_individual(individual, min_char_lstm_dim=10, max_char_lstm_dim=50, min_word_lstm_dim=50, max_word_lstm_dim=200):
    check_and_replace_if_needed(individual=individual, 
                                indexes=(2,8), 
                                min_value=min_char_lstm_dim, 
                                max_value=max_char_lstm_dim)
    check_and_replace_if_needed(individual=individual, 
                                indexes=(9,17), 
                                min_value=min_word_lstm_dim, 
                                max_value=max_word_lstm_dim)

In [5]:
def check_individual():
    def decorator(func):
        def wrapper(*args, **kargs):
            individual = func(*args, **kargs)
            adjust_individual(individual=individual)
            return individual
        return wrapper
    return decorator

In [6]:
def check_mate():
    def decorator(func):
        def wrapper(*args, **kargs):
            children = func(*args, **kargs)
            child1, child2 = children
            adjust_individual(child1)
            adjust_individual(child2)
            return children
        return wrapper
    return decorator

In [7]:
def check_mutation():
    def decorator(func):
        def wrapper(*args, **kargs):
            mutant = func(*args, **kargs)
            adjust_individual(mutant[0])
            return mutant
        return wrapper
    return decorator

In [8]:
def train_evaluate(ga_individual_solution):
    training_parameters = parameters.get_parameters_from_individual(ga_individual_solution=ga_individual_solution, 
                                                                    train='dataset/train.txt', 
                                                                    dev='dataset/dev.txt', 
                                                                    test='dataset/test.txt', 
                                                                    tag_scheme='iob', 
                                                                    char_dim=25, 
                                                                    word_dim=100, 
                                                                    pre_emb='embeddings/glove_s100.txt', 
                                                                    all_emb=True, 
                                                                    crf=True, 
                                                                    dropout=0.5, 
                                                                    lr_method='sgd-lr_.005',
                                                                    reload=False)
    print('Lower text?', training_parameters['lower'], 
          'Replace digits with zero?', training_parameters['zeros'], 
          'Dimension of LSTM for chars:', training_parameters['char_lstm_dim'],
          'Use bidirectional LSTM for chars?', training_parameters['char_bidirect'],
          'Dimension of LSTM for words:', training_parameters['word_lstm_dim'],
          'Use bidirectional LSTM for words?', training_parameters['word_bidirect'],
          'Dimension of capitalization features:', training_parameters['cap_dim'])
    model = ner.NER(parameters=training_parameters)
    f1_score = model.train(n_epochs=5, verbose=False)
    return f1_score,

In [9]:
population_size = 16
num_generations = 16
gene_length = 19

In [None]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register('binary', bernoulli.rvs, 0.5)
toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.binary, n = gene_length)
toolbox.decorate('individual', check_individual())
toolbox.register('population', tools.initRepeat, list , toolbox.individual)

toolbox.register('mate', tools.cxOrdered)
toolbox.decorate('mate', check_mate())
toolbox.register('mutate', tools.mutShuffleIndexes, indpb = 0.6)
toolbox.decorate('mutate', check_mutation())
toolbox.register('select', tools.selTournament, tournsize=2)
toolbox.register('evaluate', train_evaluate)

In [None]:
population = toolbox.population(n = population_size)
r = algorithms.eaSimple(population, toolbox, cxpb = 0.4, mutpb = 0.1, ngen = num_generations, verbose = True)
print(time.ctime())

('adjusting individual to upper bound, switching', 240, 'at position', (9, 17), 'for', 200)
('adjusting individual to lower bound, switching', 30, 'at position', (9, 17), 'for', 50)
('adjusting individual to lower bound, switching', 11, 'at position', (9, 17), 'for', 50)
('adjusting individual to lower bound, switching', 30, 'at position', (9, 17), 'for', 50)
('adjusting individual to lower bound, switching', 32, 'at position', (9, 17), 'for', 50)
('adjusting individual to upper bound, switching', 62, 'at position', (2, 8), 'for', 50)
('adjusting individual to lower bound, switching', 42, 'at position', (9, 17), 'for', 50)
('adjusting individual to upper bound, switching', 54, 'at position', (2, 8), 'for', 50)
('adjusting individual to upper bound, switching', 54, 'at position', (2, 8), 'for', 50)
('adjusting individual to lower bound, switching', 48, 'at position', (9, 17), 'for', 50)
('adjusting individual to upper bound, switching', 63, 'at position', (2, 8), 'for', 50)
('adjusting 

In [None]:
best_individuals = tools.selBest(population,k = 1)

lower = None
zeros = None
char_lstm_dim = None
char_bidirect = None
word_lstm_dim = None
word_bidirect = None
cap_dim = None

for bi in best_individuals:
    lower_bit = bi[0]
    zeros_bit = bi[1]
    char_lstm_dim_bits = BitArray(bi[2:8])
    char_bidirect_bit = bi[8]
    word_lstm_dim_bits = BitArray(bi[9:17])
    word_bidirect_bit = bi[17]
    cap_dim_bit = bi[18]
    
    lower = lower_bit == 1
    zeros = zeros_bit == 1
    char_lstm_dim = char_lstm_dim_bits.uint
    char_bidirect = char_bidirect_bit == 1
    word_lstm_dim = word_lstm_dim_bits.uint
    word_bidirect = word_bidirect_bit == 1
    cap_dim = cap_dim_bit
    
    print('Lower text?', lower, 
          'Replace digits with zero?', zeros, 
          'Dimension of LSTM for chars:', char_lstm_dim,
          'Use bidirectional LSTM for chars?', char_bidirect,
          'Dimension of LSTM for words:', word_lstm_dim,
          'Use bidirectional LSTM for words?', word_bidirect,
          'Dimension of capitalization features:', cap_dim,
          'F1 score:', bi.fitness.values)

In [None]:
sentence_tagger.tag('models/tag_scheme=iob,lower=True,zeros=True,char_dim=25,char_lstm_dim=34,char_bidirect=True,word_dim=100,word_lstm_dim=200,word_bidirect=True,pre_emb=glove_s100.txt,all_emb=True,cap_dim=1,crf=True,dropout=0.5,lr_method=sgd-lr_.005',
                    'Este trabalho avalia o uso de Algoritmos Geneticos, pelo aluno Pedro Vitor Quinta de Castro, da Universidade Federal de Goias')