This is based on this code: https://github.com/codekansas/keras-language-modeling/blob/master/keras_models.py

In [1]:
%load_ext autoreload
%autoreload 2

## Note - To Get this working:

* Install CUDA and associated libraries, setup path
* Install bleeding edge theano (from src)
* Make sure the THEANO_FLAGS are set correctly via the environment var, or via the ~/.theanorc file
* Install and compile bleeding edge Keras (from src)
* `export KERAS_BACKEND=theano`
* `export KERAS_IMAGE_DIM_ORDERING='th'`
* `sh <project_root>/shell_scipts/setup_environment.sh` to install additional dependencies
* **DO NOT SET UNROLL=True** when creating RNN's - causes max recursion issue

## Trouble-Shooting

* You may need to clean the theano cache. To do so thoroughly, run this command from the shell:
 * `theano-cache purge`

In [2]:
import numpy as np
from collections import defaultdict
from joblib import Parallel, delayed

import keras
from keras.preprocessing import sequence
from keras.optimizers import SGD, RMSprop, Adagrad
from keras.utils import np_utils
from keras.layers import Bidirectional
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, TimeDistributedDense
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM, GRU
from Metrics import rpf1
from load_data import load_process_essays

#from gensim.models import Word2Vec
from window_based_tagger_config import get_config
from IdGenerator import IdGenerator as idGen
from results_procesor import ResultsProcessor, __MICRO_F1__
from Rpfa import micro_rpfa
from collections import defaultdict

import Settings
import logging

import datetime

Using TensorFlow backend.


## Load and Pre-Process Essays

In [3]:
import pickle
from CrossValidation import cross_validation
from BrattEssay import load_bratt_essays
from load_data import load_process_essays
from collections import defaultdict
from IterableFP import flatten
from Settings import Settings
from Settings import Settings

CV_FOLDS = 5
DEV_SPLIT = 0.1

settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
training_folder = root_folder + "Training" + "/"
training_pickled = settings.data_directory + "CoralBleaching/Thesis_Dataset/training.pl"
models_folder = root_folder + "Models/Bi-LSTM/"

Results Dir: /Users/simon.hughes/Google Drive/Phd/Results/
Data Dir:    /Users/simon.hughes/Google Drive/Phd/Data/
Root Dir:    /Users/simon.hughes/GitHub/NlpResearch/
Public Data: /Users/simon.hughes/GitHub/NlpResearch/Data/PublicDatasets/


In [4]:
with open(training_pickled, "rb+") as f:
    tagged_essays = pickle.load(f)
len(tagged_essays)

902

In [5]:
import datetime, logging
print("Started at: " + str(datetime.datetime.now()))
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger()

Started at: 2017-03-19 21:59:32.236300


In [6]:
from numpy.random import shuffle
shuffle(tagged_essays)

## Prepare Tags

In [7]:
tag_freq = defaultdict(int)
unique_words = set()
for essay in tagged_essays:
    for sentence in essay.sentences:
        for word, tags in sentence:
            unique_words.add(word)
            for tag in tags:
                tag_freq[tag] += 1

EMPTY_TAG = "Empty"
regular_tags = list((t for t in tag_freq.keys() if t[0].isdigit()))
vtags = set(regular_tags)
vtags.add(EMPTY_TAG)

len(unique_words)

1641

In [8]:
sorted(vtags)

['1',
 '11',
 '12',
 '13',
 '14',
 '2',
 '3',
 '4',
 '5',
 '50',
 '5b',
 '6',
 '7',
 'Empty']

# Transform Essays into Training Data (Word Ids)

* Computes `xs`, `ys`, `ys_bytag` and `seq_lens`
* `ys_bytag` includes **all tags** and does **not** focus only on the most common tag
* `ys` only includes the most common tag (so we can use cross entropy)
* `seq_lens` is without the start and end tags included (so we have to map back and forth to maintain mappings)
* `ys_bytag` also excludes the START and END tokens

## Get Max Sequence Length, Generate All Ids

In [9]:
ix2tag = {}
for ix, t in enumerate(vtags):
    ix2tag[ix] = t
    
generator = idGen(seed=1) # important as we zero pad sequences

maxlen = 0
for essay in tagged_essays:
    for sentence in essay.sentences:
        for word, tags in sentence:
            id = generator.get_id(word) #starts at 0, but 0 used to pad sequences
        maxlen = max(maxlen, len(sentence) + 2)

def ids2tags(ids):
    return [generator.get_key(j) for j in ids]  

def lbls2tags(ixs):
    return [ix2tag[ix] for ix in ixs]
        
maxlen

93

In [10]:
START = "<start>"
END   = "<end>"

def get_training_data(tessays):
    # outputs
    xs = []
    ys = []
    ys_bytag = defaultdict(list)
    seq_lens = []

    # cut texts after this number of words (among top max_features most common words)
    for essay in tessays:
        for sentence in essay.sentences:
            row = []
            y_found = False
            y_seq = []
            for word, tags in [(START, set())] + sentence + [(END, set())]:
                id = generator.get_id(word) #starts at 0, but 0 used to pad sequences
                row.append(id)

                # remove unwanted tags
                tags = vtags.intersection(tags)
                # retain all tags for evaluation (not just most common)
                # SKIP the START and END tags
                if word != START and word != END:
                    for t in (vtags - set([EMPTY_TAG])):
                        if t in tags:
                            ys_bytag[t].append(1)
                        else:
                            ys_bytag[t].append(0)

                # encode ys with most common tag only
                if len(tags) > 1:
                    most_common = max(tags, key=lambda t: tag_freq[t])
                    tags = set([most_common])
                if len(tags) == 0:
                    tags.add(EMPTY_TAG)

                one_hot = []
                for t in vtags:
                    if t in tags:
                        one_hot.append(1)
                    else:
                        one_hot.append(0)
                y_seq.append(one_hot)

            seq_lens.append(len(row)-2)
            ys.append(y_seq)
            xs.append(row)
    
    xs = sequence.pad_sequences(xs, maxlen=maxlen)
    ys = sequence.pad_sequences(ys, maxlen=maxlen)
    assert xs.shape[0] == ys.shape[0], "Sequences should have the same number of rows"
    assert xs.shape[1] == ys.shape[1] == maxlen, "Sequences should have the same lengths"
    return xs, ys, ys_bytag, seq_lens

## Create Train - Test Split

In [11]:
#Helper Functions
def collapse_results(seq_lens, preds):
    assert len(seq_lens) == preds.shape[0], "Axis 1 size does not align"
    pred_ys_by_tag = defaultdict(list)
    for i in range(len(seq_lens)):
        row_ixs = preds[i,:]
        len_of_sequence = seq_lens[i] + 2
        # sequences are padded from the left, take the preds from the end of the seq
        pred_ys = [ix2tag[j] for j in row_ixs[-len_of_sequence:]]
        # skip the start and end label
        pred_ys = pred_ys[1:-1]
        for pred_tag in pred_ys:
            pred_ys_by_tag[pred_tag].append(1)
            # for all other tags, a 0
            for tag in(vtags - set([EMPTY_TAG, pred_tag])):
                pred_ys_by_tag[tag].append(0)
        if EMPTY_TAG in pred_ys_by_tag:
            del pred_ys_by_tag[EMPTY_TAG]
    return pred_ys_by_tag

In [12]:
def train_dev_split(lst, dev_split):
    # random shuffle
    shuffle(lst)
    num_training = int((1.0 - dev_split) * len(lst))
    return lst[:num_training], lst[num_training:]

In [13]:
%%time

folds = cross_validation(tagged_essays, CV_FOLDS)
fold2training_data = {}
fold2dev_data = {}
fold2test_data = {}

for i, (essays_TD, essays_VD) in enumerate(folds):
    # further split into train and dev test
    essays_train, essays_dev = train_dev_split(essays_TD, DEV_SPLIT)
    fold2training_data[i] = get_training_data(essays_train)
    fold2dev_data[i]     = get_training_data(essays_dev)
    # Test Data
    fold2test_data[i]     = get_training_data(essays_VD)

CPU times: user 7.45 s, sys: 486 ms, total: 7.94 s
Wall time: 7.95 s


## Load Glove 100 Dim Embeddings

In [14]:
# see /Users/simon.hughes/GitHub/NlpResearch/PythonNlpResearch/DeepLearning/WordVectors/pickle_glove_embedding.py
# for creating pre-filtered embeddings file
import pickle, os
from numpy.linalg import norm

embeddings_file = "/Users/simon.hughes/data/word_embeddings/glove.6B/cb_dict_glove.6B.100d.txt"
# read data file
with open(embeddings_file, "rb+") as f:
    cb_emb_index = pickle.load(f)

In [15]:
missed = set()
for wd in unique_words:
    if wd not in cb_emb_index:
        missed.add(wd)
print(len(missed), len(unique_words), 100.0 * round(len(missed)/  len(unique_words),4), "%")

41 1641 2.5 %


### Construct Embedding Matrix

In [16]:
EMBEDDING_DIM = list(cb_emb_index.values())[0].shape[0]

def get_embedding_matrix(words, idgenerator, max_features, init='uniform', unit_length=False):
    embedding_dim = list(cb_emb_index.values())[0].shape[0]
    # initialize with a uniform distribution
    if init == 'uniform':
        # NOTE: the max norms for these is quite low relative to the embeddings
        embedding_matrix = np.random.uniform(low=-0.05, high=0.05,size=(max_features, embedding_dim))
    elif init =='zeros':
        embedding_matrix = np.zeros(shape=(max_features, embedding_dim), dtype=np.float32)
    elif init == 'normal':
        embedding_matrix = np.random.normal(mean, sd, size=(max_features, embedding_dim))
    else:
        raise Exception("Unknown init type")
    for word in words:
        i = idgenerator.get_id(word)
        embedding_vector = cb_emb_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
    if unit_length:
        norms = np.linalg.norm(embedding_matrix, axis=1,keepdims=True)
        # remove 0 norms to prevent divide by zero
        norms[norms == 0.0] = 1.0
        embedding_matrix = embedding_matrix / norms
    return embedding_matrix

In [17]:
def score_predictions(model, xs, ys_by_tag, seq_len):
    preds = model.predict_classes(xs, batch_size=batch_size)   
    pred_ys_by_tag = collapse_results(seq_len, preds)
    class2metrics = ResultsProcessor.compute_metrics(ys_by_tag, pred_ys_by_tag)
    micro_metrics = micro_rpfa(class2metrics.values())
    return micro_metrics, pred_ys_by_tag

In [18]:
from keras.layers import Bidirectional
from datetime import datetime

def get_ts():
    return datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')

def get_file_ts():
    return datetime.now().strftime('%Y%m%d_%H%M%S_%f')

embedding_size = EMBEDDING_DIM
hidden_size    = 128
out_size = len(vtags)
batch_size = 128

get_ts(), get_file_ts()

('2017-03-19 21:59:40.893754', '20170319_215940_893783')

## Train Bi-Directional LSTM With Glove Embeddings

In [20]:
#%%time
max_features=len(generator.get_ids())+2 #Need plus one maybe due to masking of sequences
embedding_matrix = get_embedding_matrix(unique_words, generator, max_features, init='uniform', unit_length=False)
embedding_layer = Embedding(max_features,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=maxlen,
                            trainable=True,
                            mask_zero=True) # If false, initialize unfound words with all 0's

model = Sequential()
#model.add(Embedding(max_features, embedding_size, input_length=maxlen, mask_zero=True))
model.add(embedding_layer)
model.add(Bidirectional(GRU(hidden_size, return_sequences=True, consume_less="cpu"), merge_mode="sum"))

model.add(TimeDistributedDense(out_size))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', 
              #metrics=['fmeasure'], 
              sample_weight_mode="temporal")

fold_ix = 0
X_train, y_train, train_ys_by_tag, seq_len_train = fold2training_data[fold_ix]
X_dev,   y_dev,   dev_ys_by_tag,   seq_len_dev   = fold2dev_data[fold_ix]
X_test,  y_test,  test_ys_by_tag,  seq_len_test  = fold2test_data[fold_ix]

f1_scores = [-1]
num_since_best_score = 0
patience = 3
best_weights = None

for i in range(30):
    print("{ts}: Epoch={epoch}".format(ts=get_ts(), epoch=i))
    epochs = 1 # epochs per training instance
    results = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_split=0.0, verbose=0)
    micro_metrics, _ = score_predictions(model, X_dev, dev_ys_by_tag, seq_len_dev)

    print(micro_metrics)
    print()
    
    f1_score = micro_metrics.f1_score
    best_f1_score = max(f1_scores)
    if f1_score <= best_f1_score:
        num_since_best_score += 1
    else: # score improved
        num_since_best_score = 0
        best_weights = model.get_weights()

    f1_scores.append(f1_score)
    if num_since_best_score >= patience:
        print("Too long since an improvement, stopping")
        break

ResourceExhaustedError: OOM when allocating tensor with shape[1645,100]
	 [[Node: random_uniform_14/RandomUniform = RandomUniform[T=DT_INT32, dtype=DT_FLOAT, seed=87654321, seed2=768030624, _device="/job:localhost/replica:0/task:0/gpu:0"](random_uniform_14/shape)]]

Caused by op 'random_uniform_14/RandomUniform', defined at:
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-20-b41a9e7a7d89>", line 13, in <module>
    model.add(embedding_layer)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/keras/models.py", line 299, in add
    layer.create_input_layer(batch_input_shape, input_dtype)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/keras/engine/topology.py", line 401, in create_input_layer
    self(x)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/keras/engine/topology.py", line 546, in __call__
    self.build(input_shapes[0])
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/keras/layers/embeddings.py", line 99, in build
    constraint=self.W_constraint)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/keras/engine/topology.py", line 418, in add_weight
    weight = initializer(shape, name=name)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/keras/initializations.py", line 33, in uniform
    return K.random_uniform_variable(shape, -scale, scale, name=name)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 634, in random_uniform_variable
    low, high, dtype=tf_dtype, seed=seed)(shape)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/tensorflow/python/ops/init_ops.py", line 180, in __call__
    dtype, seed=self.seed)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/tensorflow/python/ops/random_ops.py", line 245, in random_uniform
    seed2=seed2)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/tensorflow/python/ops/gen_random_ops.py", line 220, in _random_uniform
    seed=seed, seed2=seed2, name=name)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
    op_def=op_def)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/simon.hughes/anaconda3/envs/tensorflow_gpu/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
    self._traceback = _extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[1645,100]
	 [[Node: random_uniform_14/RandomUniform = RandomUniform[T=DT_INT32, dtype=DT_FLOAT, seed=87654321, seed2=768030624, _device="/job:localhost/replica:0/task:0/gpu:0"](random_uniform_14/shape)]]


## Load Best Set of Weights and Evaluate

In [21]:
model.set_weights(best_weights)
micro_metrics, _ = score_predictions(model, X_test, test_ys_by_tag, seq_len_test)
micro_metrics



Recall: 0.8388, Precision: 0.8474, F1: 0.8431, Accuracy: 0.9941, Codes:  6546

In [34]:
# Make sure lbls and predictions line up
for tag in vtags:
    assert len(test_ys_by_tag["50"]) == len(test_pred_ys_by_tag["50"])

### TODO
* Use early stopping criteria
* Embeddings:
 * Don't remove low frequency words
 * Normalize all vector lengths

In [19]:
max_features=len(generator.get_ids())+2 #Need plus one maybe due to masking of sequences

# merge_mode is Bi-Directional only
def evaluate_fold(fold_ix, use_pretrained_embedding, bi_directional, merge_mode, hidden_size):

    if use_pretrained_embedding:
        embedding_matrix = get_embedding_matrix(unique_words, generator, max_features, init='uniform', unit_length=False)
        embedding_layer = Embedding(max_features,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=maxlen,
                                trainable=True,
                                mask_zero=True) # If false, initialize unfound words with all 0's
    else:
        embedding_layer = Embedding(max_features, embedding_size, input_length=maxlen, trainable=True, mask_zero=True)

    if bi_directional:
        rnn_layer = Bidirectional(GRU(hidden_size, return_sequences=True, consume_less="cpu"), merge_mode=merge_mode)
    else:
        rnn_layer = GRU(hidden_size, return_sequences=True, consume_less="cpu")
         
    model = Sequential()
    model.add(embedding_layer)
    model.add(rnn_layer)

    model.add(TimeDistributedDense(out_size))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', sample_weight_mode="temporal")

    X_train, y_train, train_ys_by_tag, seq_len_train = fold2training_data[fold_ix]
    X_dev,   y_dev,   dev_ys_by_tag,   seq_len_dev   = fold2dev_data[fold_ix]
    X_test,  y_test,  test_ys_by_tag,  seq_len_test  = fold2test_data[fold_ix]

    # init loop vars
    f1_scores = [-1]
    num_since_best_score = 0
    patience = 3
    best_weights = None

    for i in range(30):
        print("{ts}: Epoch={epoch}".format(ts=get_ts(), epoch=i))
        epochs = 1 # epochs per training instance
        results = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_split=0.0, verbose=0)
        micro_metrics,_ = score_predictions(model, X_dev, dev_ys_by_tag, seq_len_dev)

        print(micro_metrics)
        print()

        f1_score = micro_metrics.f1_score
        best_f1_score = max(f1_scores)
        if f1_score <= best_f1_score:
            num_since_best_score += 1
        else: # score improved
            num_since_best_score = 0
            best_weights = model.get_weights()

        f1_scores.append(f1_score)
        if num_since_best_score >= patience:
            print("Too long since an improvement, stopping")
            break
    
    # load best weights
    model.set_weights(best_weights)
    train_micro_metrics, train_predictions_by_tag = score_predictions(model, X_train, train_ys_by_tag, seq_len_train)
    test_micro_metrics,  test_predictions_by_tag  = score_predictions(model, X_test,   test_ys_by_tag,  seq_len_test)
    return train_predictions_by_tag, test_predictions_by_tag, train_ys_by_tag, test_ys_by_tag

In [21]:
use_pretrained_embedding = True
bi_directional = True
merge_mode = "concat"
hidden_size = 128

#results = Parallel(n_jobs=CV_FOLDS)(delayed(evaluate_fold)\
#    (i, use_pretrained_embedding, bi_directional, merge_mode, hidden_size) for i in range(CV_FOLDS))
#results = Parallel(n_jobs=1)(delayed(evaluate_fold)\
#    (i, use_pretrained_embedding, bi_directional, merge_mode, hidden_size) for i in range(CV_FOLDS))



2017-03-19 18:44:27.085587: Epoch=0
Recall: 0.3405, Precision: 0.7943, F1: 0.4767, Accuracy: 0.9860, Codes:  2790

2017-03-19 18:45:26.231829: Epoch=1
Recall: 0.5674, Precision: 0.8552, F1: 0.6822, Accuracy: 0.9901, Codes:  2790

2017-03-19 18:46:05.555499: Epoch=2
Recall: 0.6710, Precision: 0.8785, F1: 0.7608, Accuracy: 0.9921, Codes:  2790

2017-03-19 18:46:45.120897: Epoch=3
Recall: 0.7065, Precision: 0.8919, F1: 0.7884, Accuracy: 0.9929, Codes:  2790

2017-03-19 18:47:24.659390: Epoch=4
Recall: 0.7595, Precision: 0.8796, F1: 0.8152, Accuracy: 0.9936, Codes:  2790

2017-03-19 18:48:04.377607: Epoch=5
Recall: 0.7932, Precision: 0.8716, F1: 0.8305, Accuracy: 0.9940, Codes:  2790

2017-03-19 18:48:44.020312: Epoch=6
Recall: 0.7523, Precision: 0.9016, F1: 0.8202, Accuracy: 0.9938, Codes:  2790

2017-03-19 18:49:23.687688: Epoch=7
Recall: 0.7993, Precision: 0.8793, F1: 0.8374, Accuracy: 0.9942, Codes:  2790

2017-03-19 18:50:03.556279: Epoch=8
Recall: 0.8186, Precision: 0.8535, F1: 0.835



2017-03-19 18:52:14.978408: Epoch=0
Recall: 0.3317, Precision: 0.7331, F1: 0.4567, Accuracy: 0.9861, Codes:  2807

2017-03-19 18:53:08.487026: Epoch=1
Recall: 0.5486, Precision: 0.7955, F1: 0.6494, Accuracy: 0.9896, Codes:  2807

2017-03-19 18:53:47.176084: Epoch=2
Recall: 0.6413, Precision: 0.8511, F1: 0.7314, Accuracy: 0.9917, Codes:  2807

2017-03-19 18:54:25.877594: Epoch=3
Recall: 0.6715, Precision: 0.8522, F1: 0.7511, Accuracy: 0.9922, Codes:  2807

2017-03-19 18:55:05.377401: Epoch=4
Recall: 0.7239, Precision: 0.8307, F1: 0.7737, Accuracy: 0.9926, Codes:  2807

2017-03-19 18:55:44.284625: Epoch=5
Recall: 0.7285, Precision: 0.8517, F1: 0.7853, Accuracy: 0.9930, Codes:  2807

2017-03-19 18:56:23.290469: Epoch=6
Recall: 0.7378, Precision: 0.8463, F1: 0.7884, Accuracy: 0.9930, Codes:  2807

2017-03-19 18:57:02.323641: Epoch=7
Recall: 0.7959, Precision: 0.7877, F1: 0.7918, Accuracy: 0.9926, Codes:  2807

2017-03-19 18:57:41.585873: Epoch=8
Recall: 0.7873, Precision: 0.8140, F1: 0.800



2017-03-19 19:03:47.323321: Epoch=0
Recall: 0.3300, Precision: 0.7856, F1: 0.4648, Accuracy: 0.9858, Codes:  2421

2017-03-19 19:04:42.843740: Epoch=1
Recall: 0.5708, Precision: 0.8422, F1: 0.6805, Accuracy: 0.9900, Codes:  2421

2017-03-19 19:05:23.375064: Epoch=2
Recall: 0.7005, Precision: 0.8561, F1: 0.7706, Accuracy: 0.9922, Codes:  2421

2017-03-19 19:06:04.016237: Epoch=3
Recall: 0.6708, Precision: 0.9118, F1: 0.7730, Accuracy: 0.9926, Codes:  2421

2017-03-19 19:06:44.690444: Epoch=4
Recall: 0.7728, Precision: 0.8575, F1: 0.8129, Accuracy: 0.9934, Codes:  2421

2017-03-19 19:07:25.485334: Epoch=5
Recall: 0.7377, Precision: 0.9034, F1: 0.8122, Accuracy: 0.9936, Codes:  2421

2017-03-19 19:08:06.296227: Epoch=6
Recall: 0.7567, Precision: 0.8902, F1: 0.8180, Accuracy: 0.9937, Codes:  2421

2017-03-19 19:08:47.085971: Epoch=7
Recall: 0.7633, Precision: 0.8906, F1: 0.8221, Accuracy: 0.9938, Codes:  2421

2017-03-19 19:09:28.128360: Epoch=8
Recall: 0.7943, Precision: 0.8682, F1: 0.829

In [22]:
from wordtagginghelper import merge_dictionaries

cv_wd_td_ys_by_tag, cv_wd_td_predictions_by_tag = defaultdict(list), defaultdict(list)
cv_wd_vd_ys_by_tag, cv_wd_vd_predictions_by_tag = defaultdict(list), defaultdict(list)

for result in results:
    td_wd_predictions_by_code, vd_wd_predictions_by_code, wd_td_ys_bytag, wd_vd_ys_bytag = result
    merge_dictionaries(wd_td_ys_bytag, cv_wd_td_ys_by_tag)
    merge_dictionaries(wd_vd_ys_bytag, cv_wd_vd_ys_by_tag)
    merge_dictionaries(td_wd_predictions_by_code, cv_wd_td_predictions_by_tag)
    merge_dictionaries(vd_wd_predictions_by_code, cv_wd_vd_predictions_by_tag)

In [27]:
class2metrics = ResultsProcessor.compute_metrics(cv_wd_vd_ys_by_tag, cv_wd_vd_predictions_by_tag)
micro_metrics = micro_rpfa(class2metrics.values())

In [28]:
micro_metrics

Recall: 0.8074, Precision: 0.8547, F1: 0.8304, Accuracy: 0.9938, Codes: 33259