In [15]:
import numpy as np
import pandas as pd
import random
import time

from deap import base, creator, tools, algorithms
from scipy.stats import bernoulli
from bitstring import BitArray

import parameters
import ner
import sentence_tagger

In [2]:
def replace(individual, indexes, value):
    binary_format = '{0:0' + str(indexes[1] - indexes[0])  + 'b}'
    binary_value = binary_format.format(value)
    index_list = list(range(*indexes))
    assert len(index_list) == len(binary_value), 'Binary representation must be the same size as the indexes interval'
    for (index, replacement) in zip(index_list, binary_value):
        individual[index] = int(replacement)

In [3]:
def check_and_replace_if_needed(individual, indexes, min_value, max_value):
    value_bits = BitArray(individual[slice(*indexes)])
    int_value = value_bits.uint
    if int_value < min_value:
        print('adjusting individual to lower bound, switching', int_value, 'at position', indexes, 'for', min_value)
        replace(indexes=indexes, individual=individual, value=min_value)
    elif int_value > max_value:
        print('adjusting individual to upper bound, switching', int_value, 'at position', indexes, 'for', max_value)
        replace(indexes=indexes, individual=individual, value=max_value)

In [4]:
def adjust_individual(individual, min_char_lstm_dim=10, max_char_lstm_dim=50, min_word_lstm_dim=50, max_word_lstm_dim=200):
    check_and_replace_if_needed(individual=individual, 
                                indexes=(2,8), 
                                min_value=min_char_lstm_dim, 
                                max_value=max_char_lstm_dim)
    check_and_replace_if_needed(individual=individual, 
                                indexes=(9,17), 
                                min_value=min_word_lstm_dim, 
                                max_value=max_word_lstm_dim)

In [5]:
def check_individual():
    def decorator(func):
        def wrapper(*args, **kargs):
            individual = func(*args, **kargs)
            adjust_individual(individual=individual)
            return individual
        return wrapper
    return decorator

In [6]:
def check_mate():
    def decorator(func):
        def wrapper(*args, **kargs):
            children = func(*args, **kargs)
            child1, child2 = children
            adjust_individual(child1)
            adjust_individual(child2)
            return children
        return wrapper
    return decorator

In [7]:
def check_mutation():
    def decorator(func):
        def wrapper(*args, **kargs):
            mutant = func(*args, **kargs)
            adjust_individual(mutant[0])
            return mutant
        return wrapper
    return decorator

In [8]:
def train_evaluate(ga_individual_solution):
    training_parameters = parameters.get_parameters_from_individual(ga_individual_solution=ga_individual_solution, 
                                                                    train='dataset/train.txt', 
                                                                    dev='dataset/dev.txt', 
                                                                    test='dataset/test.txt', 
                                                                    tag_scheme='iob', 
                                                                    char_dim=25, 
                                                                    word_dim=100, 
                                                                    pre_emb='embeddings/glove_s100.txt', 
                                                                    all_emb=True, 
                                                                    crf=True, 
                                                                    dropout=0.5, 
                                                                    lr_method='sgd-lr_.005',
                                                                    reload=False)
    print('Lower text?', training_parameters['lower'], 
          'Replace digits with zero?', training_parameters['zeros'], 
          'Dimension of LSTM for chars:', training_parameters['char_lstm_dim'],
          'Use bidirectional LSTM for chars?', training_parameters['char_bidirect'],
          'Dimension of LSTM for words:', training_parameters['word_lstm_dim'],
          'Use bidirectional LSTM for words?', training_parameters['word_bidirect'],
          'Dimension of capitalization features:', training_parameters['cap_dim'])
    model = ner.NER(parameters=training_parameters)
    f1_score = model.train(n_epochs=5, verbose=False)
    return f1_score,

In [9]:
population_size = 4
num_generations = 4
gene_length = 19

In [10]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register('binary', bernoulli.rvs, 0.5)
toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.binary, n = gene_length)
toolbox.decorate('individual', check_individual())
toolbox.register('population', tools.initRepeat, list , toolbox.individual)

toolbox.register('mate', tools.cxOrdered)
toolbox.decorate('mate', check_mate())
toolbox.register('mutate', tools.mutShuffleIndexes, indpb = 0.6)
toolbox.decorate('mutate', check_mutation())
toolbox.register('select', tools.selTournament, tournsize=2)
toolbox.register('evaluate', train_evaluate)

In [12]:
population = toolbox.population(n = population_size)
r = algorithms.eaSimple(population, toolbox, cxpb = 0.4, mutpb = 0.1, ngen = num_generations, verbose = False)
print(time.ctime())

('adjusting individual to lower bound, switching', 0, 'at position', (9, 17), 'for', 50)
('adjusting individual to upper bound, switching', 255, 'at position', (9, 17), 'for', 200)
('Lower text?', False, 'Replace digits with zero?', True, 'Dimension of LSTM for chars:', 37, 'Use bidirectional LSTM for chars?', False, 'Dimension of LSTM for words:', 50, 'Use bidirectional LSTM for words?', False, 'Dimension of capitalization features:', 0)
Model location: ./models/tag_scheme=iob,lower=False,zeros=True,char_dim=25,char_lstm_dim=37,char_bidirect=False,word_dim=100,word_lstm_dim=50,word_bidirect=False,pre_emb=glove_s100.txt,all_emb=True,cap_dim=0,crf=True,dropout=0.5,lr_method=sgd-lr_.005
Found 13639 unique words (62480 in total)
Loading pretrained embeddings from embeddings/glove_s100.txt...
Found 114 unique characters
Found 21 unique named entity tags
2955 / 529 / 854 sentences in train / dev / test.
Saving the mappings to disk...
Loading pretrained embeddings from embeddings/glove_s100.

Epoch 2 done. Average cost: 6.339011
('Starting epoch 3 at...', 'Fri Jan 26 09:39:57 2018')
processed 8831 tokens with 709 phrases; found: 664 phrases; correct: 298.
accuracy:  90.13%; precision:  44.88%; recall:  42.03%; FB1:  43.41
       ABSTRACCAO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
    ACONTECIMENTO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            COISA: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            LOCAL: precision:  38.06%; recall:  73.91%; FB1:  50.25  268
             OBRA: precision:  10.00%; recall:   1.92%; FB1:   3.23  10
      ORGANIZACAO: precision:  62.50%; recall:  12.50%; FB1:  20.83  8
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  47.40%; recall:  70.19%; FB1:  56.59  308
            TEMPO: precision:  67.86%; recall:  31.15%; FB1:  42.70  56
            VALOR: precision:  42.86%; recall:  14.63%; FB1:  21.82  14
7959/8831 (90.12569%)
Score on dev: 43.41000
proc

Epoch 0 done. Average cost: 12.230147
('Starting epoch 1 at...', 'Fri Jan 26 09:45:18 2018')
processed 8831 tokens with 709 phrases; found: 466 phrases; correct: 257.
accuracy:  89.48%; precision:  55.15%; recall:  36.25%; FB1:  43.74
       ABSTRACCAO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
    ACONTECIMENTO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            COISA: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            LOCAL: precision:  53.24%; recall:  53.62%; FB1:  53.43  139
             OBRA: precision:  33.33%; recall:   1.92%; FB1:   3.64  3
      ORGANIZACAO: precision:  77.78%; recall:  17.50%; FB1:  28.57  9
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  56.05%; recall:  66.83%; FB1:  60.96  248
            TEMPO: precision:  52.31%; recall:  27.87%; FB1:  36.36  65
            VALOR: precision: 100.00%; recall:   4.88%; FB1:   9.30  2
7902/8831 (89.48024%)
Score on dev: 43.74000
New b

Epoch 3 done. Average cost: 4.431551
('Starting epoch 4 at...', 'Fri Jan 26 09:50:14 2018')
processed 8831 tokens with 709 phrases; found: 680 phrases; correct: 411.
accuracy:  92.18%; precision:  60.44%; recall:  57.97%; FB1:  59.18
       ABSTRACCAO: precision:  51.85%; recall:  34.15%; FB1:  41.18  27
    ACONTECIMENTO: precision:  75.00%; recall:  24.00%; FB1:  36.36  8
            COISA: precision:  52.00%; recall:  35.14%; FB1:  41.94  25
            LOCAL: precision:  63.10%; recall:  76.81%; FB1:  69.28  168
             OBRA: precision:  28.57%; recall:   7.69%; FB1:  12.12  14
      ORGANIZACAO: precision:  29.63%; recall:  60.00%; FB1:  39.67  81
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  78.82%; recall:  76.92%; FB1:  77.86  203
            TEMPO: precision:  50.39%; recall:  52.46%; FB1:  51.41  127
            VALOR: precision:  74.07%; recall:  48.78%; FB1:  58.82  27
8140/8831 (92.17529%)
Score on dev: 59.18000


Epoch 1 done. Average cost: 8.785760
('Starting epoch 2 at...', 'Fri Jan 26 09:56:15 2018')
processed 8831 tokens with 709 phrases; found: 566 phrases; correct: 290.
accuracy:  89.68%; precision:  51.24%; recall:  40.90%; FB1:  45.49
       ABSTRACCAO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
    ACONTECIMENTO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            COISA: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            LOCAL: precision:  40.55%; recall:  74.64%; FB1:  52.55  254
             OBRA: precision:  16.67%; recall:   1.92%; FB1:   3.45  6
      ORGANIZACAO: precision:  29.31%; recall:  42.50%; FB1:  34.69  58
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  66.22%; recall:  70.67%; FB1:  68.37  222
            TEMPO: precision:  84.62%; recall:  18.03%; FB1:  29.73  26
            VALOR: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
7920/8831 (89.68407%)
Score on dev: 45.49000
New b

Epoch 4 done. Average cost: 4.496271
('Lower text?', True, 'Replace digits with zero?', False, 'Dimension of LSTM for chars:', 17, 'Use bidirectional LSTM for chars?', False, 'Dimension of LSTM for words:', 163, 'Use bidirectional LSTM for words?', False, 'Dimension of capitalization features:', 1)
Model location: ./models/tag_scheme=iob,lower=True,zeros=False,char_dim=25,char_lstm_dim=17,char_bidirect=False,word_dim=100,word_lstm_dim=163,word_bidirect=False,pre_emb=glove_s100.txt,all_emb=True,cap_dim=1,crf=True,dropout=0.5,lr_method=sgd-lr_.005
Found 12769 unique words (62480 in total)
Loading pretrained embeddings from embeddings/glove_s100.txt...
Found 123 unique characters
Found 21 unique named entity tags
2955 / 529 / 854 sentences in train / dev / test.
Saving the mappings to disk...
Loading pretrained embeddings from embeddings/glove_s100.txt...
Loaded 931381 pretrained embeddings.
931866 / 932886 (99.8907%) words have been initialized with pretrained embeddings.
931381 found di

Epoch 2 done. Average cost: 7.155009
('Starting epoch 3 at...', 'Fri Jan 26 10:05:26 2018')
processed 8831 tokens with 709 phrases; found: 524 phrases; correct: 296.
accuracy:  90.05%; precision:  56.49%; recall:  41.75%; FB1:  48.01
       ABSTRACCAO: precision: 100.00%; recall:   2.44%; FB1:   4.76  1
    ACONTECIMENTO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            COISA: precision:   0.00%; recall:   0.00%; FB1:   0.00  1
            LOCAL: precision:  54.65%; recall:  68.12%; FB1:  60.65  172
             OBRA: precision:   0.00%; recall:   0.00%; FB1:   0.00  4
      ORGANIZACAO: precision:  33.90%; recall:  50.00%; FB1:  40.40  59
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  63.05%; recall:  75.48%; FB1:  68.71  249
            TEMPO: precision:  63.16%; recall:  19.67%; FB1:  30.00  38
            VALOR: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
7952/8831 (90.04643%)
Score on dev: 48.01000
New b

Epoch 0 done. Average cost: 13.182392
('Starting epoch 1 at...', 'Fri Jan 26 10:10:38 2018')
processed 8831 tokens with 709 phrases; found: 277 phrases; correct: 176.
accuracy:  87.91%; precision:  63.54%; recall:  24.82%; FB1:  35.70
       ABSTRACCAO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
    ACONTECIMENTO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            COISA: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            LOCAL: precision:  61.26%; recall:  49.28%; FB1:  54.62  111
             OBRA: precision: 100.00%; recall:   1.92%; FB1:   3.77  1
      ORGANIZACAO: precision:  14.29%; recall:   2.50%; FB1:   4.26  7
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  67.09%; recall:  50.96%; FB1:  57.92  158
            TEMPO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            VALOR: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
7763/8831 (87.90624%)
Score on dev: 35.70000
New be

Epoch 3 done. Average cost: 5.000547
('Starting epoch 4 at...', 'Fri Jan 26 10:14:47 2018')
processed 8831 tokens with 709 phrases; found: 642 phrases; correct: 368.
accuracy:  91.65%; precision:  57.32%; recall:  51.90%; FB1:  54.48
       ABSTRACCAO: precision:  50.00%; recall:   7.32%; FB1:  12.77  6
    ACONTECIMENTO: precision:  50.00%; recall:   8.00%; FB1:  13.79  4
            COISA: precision:  50.00%; recall:  13.51%; FB1:  21.28  10
            LOCAL: precision:  67.59%; recall:  71.01%; FB1:  69.26  145
             OBRA: precision:  10.00%; recall:   3.85%; FB1:   5.56  20
      ORGANIZACAO: precision:  42.86%; recall:  52.50%; FB1:  47.19  49
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  56.09%; recall:  84.13%; FB1:  67.31  312
            TEMPO: precision:  70.67%; recall:  43.44%; FB1:  53.81  75
            VALOR: precision:  42.86%; recall:  21.95%; FB1:  29.03  21
8094/8831 (91.65440%)
Score on dev: 54.48000
Ne

Epoch 1 done. Average cost: 8.291795
('Starting epoch 2 at...', 'Fri Jan 26 10:19:50 2018')
processed 8831 tokens with 709 phrases; found: 473 phrases; correct: 281.
accuracy:  89.98%; precision:  59.41%; recall:  39.63%; FB1:  47.55
       ABSTRACCAO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
    ACONTECIMENTO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            COISA: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            LOCAL: precision:  50.00%; recall:  65.22%; FB1:  56.60  180
             OBRA: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
      ORGANIZACAO: precision:  40.00%; recall:  35.00%; FB1:  37.33  35
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  71.20%; recall:  65.38%; FB1:  68.17  191
            TEMPO: precision:  60.00%; recall:  31.97%; FB1:  41.71  65
            VALOR: precision: 100.00%; recall:   4.88%; FB1:   9.30  2
7946/8831 (89.97848%)
Score on dev: 47.55000
New b

Epoch 4 done. Average cost: 4.309516
('Lower text?', True, 'Replace digits with zero?', True, 'Dimension of LSTM for chars:', 34, 'Use bidirectional LSTM for chars?', True, 'Dimension of LSTM for words:', 200, 'Use bidirectional LSTM for words?', True, 'Dimension of capitalization features:', 1)
Model location: ./models/tag_scheme=iob,lower=True,zeros=True,char_dim=25,char_lstm_dim=34,char_bidirect=True,word_dim=100,word_lstm_dim=200,word_bidirect=True,pre_emb=glove_s100.txt,all_emb=True,cap_dim=1,crf=True,dropout=0.5,lr_method=sgd-lr_.005
Found 12383 unique words (62480 in total)
Loading pretrained embeddings from embeddings/glove_s100.txt...
Found 114 unique characters
Found 21 unique named entity tags
2955 / 529 / 854 sentences in train / dev / test.
Saving the mappings to disk...
Loading pretrained embeddings from embeddings/glove_s100.txt...
Loaded 931381 pretrained embeddings.
931389 / 932409 (99.8906%) words have been initialized with pretrained embeddings.
931381 found directly

Epoch 2 done. Average cost: 5.149817
('Starting epoch 3 at...', 'Fri Jan 26 10:30:13 2018')
processed 8831 tokens with 709 phrases; found: 597 phrases; correct: 375.
accuracy:  91.82%; precision:  62.81%; recall:  52.89%; FB1:  57.43
       ABSTRACCAO: precision:  53.85%; recall:  17.07%; FB1:  25.93  13
    ACONTECIMENTO: precision:  85.71%; recall:  24.00%; FB1:  37.50  7
            COISA: precision:  35.29%; recall:  32.43%; FB1:  33.80  34
            LOCAL: precision:  62.35%; recall:  73.19%; FB1:  67.33  162
             OBRA: precision:  44.44%; recall:   7.69%; FB1:  13.11  9
      ORGANIZACAO: precision:  40.82%; recall:  50.00%; FB1:  44.94  49
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  73.06%; recall:  76.92%; FB1:  74.94  219
            TEMPO: precision:  75.44%; recall:  35.25%; FB1:  48.04  57
            VALOR: precision:  46.81%; recall:  53.66%; FB1:  50.00  47
8109/8831 (91.82426%)
Score on dev: 57.43000
Ne

Epoch 0 done. Average cost: 11.946121
('Starting epoch 1 at...', 'Fri Jan 26 10:36:55 2018')
processed 8831 tokens with 709 phrases; found: 584 phrases; correct: 257.
accuracy:  88.91%; precision:  44.01%; recall:  36.25%; FB1:  39.75
       ABSTRACCAO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
    ACONTECIMENTO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            COISA: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            LOCAL: precision:  30.39%; recall:  67.39%; FB1:  41.89  306
             OBRA: precision:  25.00%; recall:   3.85%; FB1:   6.67  8
      ORGANIZACAO: precision:  53.85%; recall:  17.50%; FB1:  26.42  13
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  77.33%; recall:  55.77%; FB1:  64.80  150
            TEMPO: precision:  36.45%; recall:  31.97%; FB1:  34.06  107
            VALOR: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
7852/8831 (88.91405%)
Score on dev: 39.75000
New

Epoch 3 done. Average cost: 4.448172
('Starting epoch 4 at...', 'Fri Jan 26 10:42:03 2018')
processed 8831 tokens with 709 phrases; found: 647 phrases; correct: 420.
accuracy:  93.01%; precision:  64.91%; recall:  59.24%; FB1:  61.95
       ABSTRACCAO: precision:  83.33%; recall:  12.20%; FB1:  21.28  6
    ACONTECIMENTO: precision:  88.89%; recall:  32.00%; FB1:  47.06  9
            COISA: precision:  43.18%; recall:  51.35%; FB1:  46.91  44
            LOCAL: precision:  61.49%; recall:  77.54%; FB1:  68.59  174
             OBRA: precision:  18.18%; recall:   7.69%; FB1:  10.81  22
      ORGANIZACAO: precision:  41.67%; recall:  50.00%; FB1:  45.45  48
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  72.73%; recall:  80.77%; FB1:  76.54  231
            TEMPO: precision:  79.49%; recall:  50.82%; FB1:  62.00  78
            VALOR: precision:  77.14%; recall:  65.85%; FB1:  71.05  35
8214/8831 (93.01325%)
Score on dev: 61.95000
Ne

Epoch 1 done. Average cost: 6.931495
('Starting epoch 2 at...', 'Fri Jan 26 10:48:39 2018')
processed 8831 tokens with 709 phrases; found: 654 phrases; correct: 363.
accuracy:  91.50%; precision:  55.50%; recall:  51.20%; FB1:  53.26
       ABSTRACCAO: precision:  30.77%; recall:  19.51%; FB1:  23.88  26
    ACONTECIMENTO: precision:   0.00%; recall:   0.00%; FB1:   0.00  1
            COISA: precision:  25.00%; recall:   2.70%; FB1:   4.88  4
            LOCAL: precision:  56.18%; recall:  72.46%; FB1:  63.29  178
             OBRA: precision:   0.00%; recall:   0.00%; FB1:   0.00  6
      ORGANIZACAO: precision:  38.89%; recall:  52.50%; FB1:  44.68  54
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  58.33%; recall:  80.77%; FB1:  67.74  288
            TEMPO: precision:  69.57%; recall:  39.34%; FB1:  50.26  69
            VALOR: precision:  60.71%; recall:  41.46%; FB1:  49.28  28
8080/8831 (91.49587%)
Score on dev: 53.26000
New

Epoch 4 done. Average cost: 3.982195
('Lower text?', True, 'Replace digits with zero?', True, 'Dimension of LSTM for chars:', 34, 'Use bidirectional LSTM for chars?', True, 'Dimension of LSTM for words:', 200, 'Use bidirectional LSTM for words?', True, 'Dimension of capitalization features:', 1)
Model location: ./models/tag_scheme=iob,lower=True,zeros=True,char_dim=25,char_lstm_dim=34,char_bidirect=True,word_dim=100,word_lstm_dim=200,word_bidirect=True,pre_emb=glove_s100.txt,all_emb=True,cap_dim=1,crf=True,dropout=0.5,lr_method=sgd-lr_.005
Found 12383 unique words (62480 in total)
Loading pretrained embeddings from embeddings/glove_s100.txt...
Found 114 unique characters
Found 21 unique named entity tags
2955 / 529 / 854 sentences in train / dev / test.
Saving the mappings to disk...
Loading pretrained embeddings from embeddings/glove_s100.txt...
Loaded 931381 pretrained embeddings.
931389 / 932409 (99.8906%) words have been initialized with pretrained embeddings.
931381 found directly

Epoch 2 done. Average cost: 5.095902
('Starting epoch 3 at...', 'Fri Jan 26 11:00:43 2018')
processed 8831 tokens with 709 phrases; found: 716 phrases; correct: 412.
accuracy:  92.32%; precision:  57.54%; recall:  58.11%; FB1:  57.82
       ABSTRACCAO: precision:  51.72%; recall:  36.59%; FB1:  42.86  29
    ACONTECIMENTO: precision:  66.67%; recall:  24.00%; FB1:  35.29  9
            COISA: precision:  38.89%; recall:  18.92%; FB1:  25.45  18
            LOCAL: precision:  58.29%; recall:  73.91%; FB1:  65.18  175
             OBRA: precision:  21.43%; recall:  11.54%; FB1:  15.00  28
      ORGANIZACAO: precision:  42.50%; recall:  42.50%; FB1:  42.50  40
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  61.59%; recall:  85.58%; FB1:  71.63  289
            TEMPO: precision:  65.91%; recall:  47.54%; FB1:  55.24  88
            VALOR: precision:  57.50%; recall:  56.10%; FB1:  56.79  40
8153/8831 (92.32250%)
Score on dev: 57.82000
N

Epoch 0 done. Average cost: 11.972345
('Starting epoch 1 at...', 'Fri Jan 26 11:07:12 2018')
processed 8831 tokens with 709 phrases; found: 374 phrases; correct: 228.
accuracy:  89.25%; precision:  60.96%; recall:  32.16%; FB1:  42.11
       ABSTRACCAO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
    ACONTECIMENTO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            COISA: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            LOCAL: precision:  46.88%; recall:  54.35%; FB1:  50.34  160
             OBRA: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
      ORGANIZACAO: precision:  75.00%; recall:   7.50%; FB1:  13.64  4
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  71.70%; recall:  54.81%; FB1:  62.13  159
            TEMPO: precision:  68.75%; recall:  27.05%; FB1:  38.82  48
            VALOR: precision: 100.00%; recall:   7.32%; FB1:  13.64  3
7882/8831 (89.25377%)
Score on dev: 42.11000
New b

Epoch 3 done. Average cost: 4.433994
('Starting epoch 4 at...', 'Fri Jan 26 11:12:20 2018')
processed 8831 tokens with 709 phrases; found: 686 phrases; correct: 424.
accuracy:  92.78%; precision:  61.81%; recall:  59.80%; FB1:  60.79
       ABSTRACCAO: precision:  51.85%; recall:  34.15%; FB1:  41.18  27
    ACONTECIMENTO: precision:  70.00%; recall:  28.00%; FB1:  40.00  10
            COISA: precision:  51.85%; recall:  37.84%; FB1:  43.75  27
            LOCAL: precision:  52.29%; recall:  82.61%; FB1:  64.04  218
             OBRA: precision:  23.08%; recall:   5.77%; FB1:   9.23  13
      ORGANIZACAO: precision:  47.37%; recall:  45.00%; FB1:  46.15  38
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  72.05%; recall:  79.33%; FB1:  75.51  229
            TEMPO: precision:  72.09%; recall:  50.82%; FB1:  59.62  86
            VALOR: precision:  71.05%; recall:  65.85%; FB1:  68.35  38
8193/8831 (92.77545%)
Score on dev: 60.79000


Epoch 1 done. Average cost: 6.958063
('Starting epoch 2 at...', 'Fri Jan 26 11:18:53 2018')
processed 8831 tokens with 709 phrases; found: 605 phrases; correct: 346.
accuracy:  91.28%; precision:  57.19%; recall:  48.80%; FB1:  52.66
       ABSTRACCAO: precision:  50.00%; recall:   4.88%; FB1:   8.89  4
    ACONTECIMENTO: precision:   0.00%; recall:   0.00%; FB1:   0.00  1
            COISA: precision:  50.00%; recall:   8.11%; FB1:  13.95  6
            LOCAL: precision:  50.48%; recall:  76.09%; FB1:  60.69  208
             OBRA: precision:  19.05%; recall:   7.69%; FB1:  10.96  21
      ORGANIZACAO: precision:  38.18%; recall:  52.50%; FB1:  44.21  55
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  72.73%; recall:  69.23%; FB1:  70.94  198
            TEMPO: precision:  61.63%; recall:  43.44%; FB1:  50.96  86
            VALOR: precision:  53.85%; recall:  34.15%; FB1:  41.79  26
8061/8831 (91.28072%)
Score on dev: 52.66000
New

Epoch 4 done. Average cost: 3.915641
('Lower text?', True, 'Replace digits with zero?', False, 'Dimension of LSTM for chars:', 46, 'Use bidirectional LSTM for chars?', True, 'Dimension of LSTM for words:', 149, 'Use bidirectional LSTM for words?', False, 'Dimension of capitalization features:', 0)
Model location: ./models/tag_scheme=iob,lower=True,zeros=False,char_dim=25,char_lstm_dim=46,char_bidirect=True,word_dim=100,word_lstm_dim=149,word_bidirect=False,pre_emb=glove_s100.txt,all_emb=True,cap_dim=0,crf=True,dropout=0.5,lr_method=sgd-lr_.005
Found 12769 unique words (62480 in total)
Loading pretrained embeddings from embeddings/glove_s100.txt...
Found 123 unique characters
Found 21 unique named entity tags
2955 / 529 / 854 sentences in train / dev / test.
Saving the mappings to disk...
Loading pretrained embeddings from embeddings/glove_s100.txt...
Loaded 931381 pretrained embeddings.
931866 / 932886 (99.8907%) words have been initialized with pretrained embeddings.
931381 found dire

Epoch 2 done. Average cost: 5.700254
('Starting epoch 3 at...', 'Fri Jan 26 11:29:41 2018')
processed 8831 tokens with 709 phrases; found: 598 phrases; correct: 328.
accuracy:  90.94%; precision:  54.85%; recall:  46.26%; FB1:  50.19
       ABSTRACCAO: precision:  33.33%; recall:   2.44%; FB1:   4.55  3
    ACONTECIMENTO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            COISA: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
            LOCAL: precision:  53.59%; recall:  70.29%; FB1:  60.82  181
             OBRA: precision:   5.56%; recall:   1.92%; FB1:   2.86  18
      ORGANIZACAO: precision:  50.00%; recall:  42.50%; FB1:  45.95  34
            OUTRO: precision:   0.00%; recall:   0.00%; FB1:   0.00  0
           PESSOA: precision:  55.52%; recall:  82.21%; FB1:  66.28  308
            TEMPO: precision:  75.00%; recall:  31.97%; FB1:  44.83  52
            VALOR: precision: 100.00%; recall:   4.88%; FB1:   9.30  2
8031/8831 (90.94100%)
Score on dev: 50.19000
New 

In [13]:
best_individuals = tools.selBest(population,k = 1)

lower = None
zeros = None
char_lstm_dim = None
char_bidirect = None
word_lstm_dim = None
word_bidirect = None
cap_dim = None

for bi in best_individuals:
    lower_bit = bi[0]
    zeros_bit = bi[1]
    char_lstm_dim_bits = BitArray(bi[2:8])
    char_bidirect_bit = bi[8]
    word_lstm_dim_bits = BitArray(bi[9:17])
    word_bidirect_bit = bi[17]
    cap_dim_bit = bi[18]
    
    lower = lower_bit == 1
    zeros = zeros_bit == 1
    char_lstm_dim = char_lstm_dim_bits.uint
    char_bidirect = char_bidirect_bit == 1
    word_lstm_dim = word_lstm_dim_bits.uint
    word_bidirect = word_bidirect_bit == 1
    cap_dim = cap_dim_bit
    
    print('Lower text?', lower, 
          'Replace digits with zero?', zeros, 
          'Dimension of LSTM for chars:', char_lstm_dim,
          'Use bidirectional LSTM for chars?', char_bidirect,
          'Dimension of LSTM for words:', word_lstm_dim,
          'Use bidirectional LSTM for words?', word_bidirect,
          'Dimension of capitalization features:', cap_dim,
          'F1 score:', bi.fitness.values)

('Lower text?', True, 'Replace digits with zero?', True, 'Dimension of LSTM for chars:', 34, 'Use bidirectional LSTM for chars?', True, 'Dimension of LSTM for words:', 200, 'Use bidirectional LSTM for words?', True, 'Dimension of capitalization features:', 1, 'F1 score:', (63.07,))


In [18]:
sentence_tagger.tag('models/tag_scheme=iob,lower=True,zeros=True,char_dim=25,char_lstm_dim=34,char_bidirect=True,word_dim=100,word_lstm_dim=200,word_bidirect=True,pre_emb=glove_s100.txt,all_emb=True,cap_dim=1,crf=True,dropout=0.5,lr_method=sgd-lr_.005',
                    'Este trabalho avalia o uso de Algoritmos Geneticos, pelo aluno Pedro Vitor Quinta de Castro, da Universidade Federal de Goias')

Loading model...
Compiling...
Tagging...
---- sentence tagged in 0.0068s ----


u'Este__O trabalho__O avalia__O o__O uso__O de__O Algoritmos__B-PESSOA Geneticos,__I-PESSOA pelo__O aluno__O Pedro__B-PESSOA Vitor__I-PESSOA Quinta__I-PESSOA de__I-PESSOA Castro,__I-PESSOA da__I-PESSOA Universidade__I-PESSOA Federal__I-PESSOA de__O Goias__B-LOCAL\n'