In [1]:
!wget -qq https://raw.githubusercontent.com/svinkapeppa/bachelor-thesis/master/conlleval.py
!wget -qq https://raw.githubusercontent.com/svinkapeppa/bachelor-thesis/master/utils.py
!wget -qq https://raw.githubusercontent.com/svinkapeppa/bachelor-thesis/master/install
!pip install git+https://www.github.com/keras-team/keras-contrib.git

Collecting git+https://www.github.com/keras-team/keras-contrib.git
  Cloning https://www.github.com/keras-team/keras-contrib.git to /tmp/pip-req-build-tayz11pf
  Running command git clone -q https://www.github.com/keras-team/keras-contrib.git /tmp/pip-req-build-tayz11pf
Building wheels for collected packages: keras-contrib
  Building wheel for keras-contrib (setup.py) ... [?25l[?25hdone
  Stored in directory: /tmp/pip-ephem-wheel-cache-hfr3kx6g/wheels/11/27/c8/4ed56de7b55f4f61244e2dc6ef3cdbaff2692527a2ce6502ba
Successfully built keras-contrib
Installing collected packages: keras-contrib
Successfully installed keras-contrib-2.0.8


In [2]:
!chmod 777 install
!./install

--2019-06-26 13:45:40--  https://raw.githubusercontent.com/glample/tagger/master/dataset/eng.train
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3281527 (3.1M) [text/plain]
Saving to: ‘eng.train’


2019-06-26 13:45:41 (72.3 MB/s) - ‘eng.train’ saved [3281527/3281527]

--2019-06-26 13:45:41--  https://raw.githubusercontent.com/glample/tagger/master/dataset/eng.testa
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 827009 (808K) [text/plain]
Saving to: ‘eng.testa’


2019-06-26 13:45:41 (23.7 MB/s) - ‘eng.testa’ saved [827009/827009]

--2019-

In [3]:
from keras.models import Model
from keras.layers import TimeDistributed, Conv1D, Dense, Embedding, Input, Dropout, LSTM, Bidirectional, MaxPooling1D, Flatten, concatenate
from keras.initializers import RandomUniform
from keras.optimizers import SGD, Adam
from keras.losses import sparse_categorical_crossentropy
from keras_contrib.layers import CRF
from keras_contrib.losses import crf_loss

import utils
import random
import time
import math
import pickle

Using TensorFlow backend.


In [0]:
NUM_EPOCHS = 10000
BATCH_SIZE = 48
LSTM_DIM = 200
CHAR_EMB_DIM = 27
KERNEL_SIZE = 3
CHAR_FILTERS = 27
DROPOUT = 0.47
RECURRENT_DROPOUT = 0.47

In [0]:
with open('features/all_onehot.train', 'rb') as f:
    features_train = pickle.load(f)
with open('features/all_onehot.testa', 'rb') as f:
    features_dev = pickle.load(f)
with open('features/all_onehot.testb', 'rb') as f:
    features_test = pickle.load(f)

In [0]:
with open('features/gazetteer_PERLOC.train', 'rb') as f:
    gaze_train = pickle.load(f)
with open('features/gazetteer_PERLOC.testa', 'rb') as f:
    gaze_dev = pickle.load(f)
with open('features/gazetteer_PERLOC.testb', 'rb') as f:
    gaze_test = pickle.load(f)

In [7]:
train_sentences = utils.read_sentences('data/train')
valid_sentences = utils.read_sentences('data/valid')
test_sentences = utils.read_sentences('data/test')

print('Number of TRAIN sentences: {}'.format(len(train_sentences)))
print('Number of VALID sentences: {}'.format(len(valid_sentences)))
print('Number of TEST sentences: {}'.format(len(test_sentences)))

Number of TRAIN sentences: 14041
Number of VALID sentences: 3250
Number of TEST sentences: 3453


In [0]:
utils.convert_tags(train_sentences)
utils.convert_tags(valid_sentences)
utils.convert_tags(test_sentences)

In [0]:
tag_idx, idx_tag = utils.create_tag_mapping([train_sentences, valid_sentences, test_sentences])

In [0]:
word_idx, idx_word, word_embeddings = utils.create_word_mapping('glove/glove.6B.300d.txt')

In [0]:
char_idx, idx_char = utils.create_char_mapping([train_sentences, valid_sentences, test_sentences])

In [0]:
case_idx, case_embeddings = utils.create_case_mapping()

In [0]:
max_word_length = utils.get_max_word_length([train_sentences, valid_sentences, test_sentences])

In [0]:
utils.add_auxiliary_information(train_sentences)
utils.add_auxiliary_information(valid_sentences)
utils.add_auxiliary_information(test_sentences)

In [0]:
train_batches = utils.create_batches(train_sentences, BATCH_SIZE, max_word_length, word_idx,
                                     char_idx, tag_idx, features_train, gaze_train)
valid_batches = utils.create_batches(valid_sentences, BATCH_SIZE, max_word_length, word_idx,
                                     char_idx, tag_idx, features_dev, gaze_dev)
test_batches = utils.create_batches(test_sentences, BATCH_SIZE, max_word_length, word_idx,
                                    char_idx, tag_idx, features_test, gaze_test)

In [0]:
def CharCNNBiLSTM(word_vocab_size, case_vocab_size, char_vocab_size,
                  word_embeddings_dim, case_embeddings_dim, max_word_length,
                  word_embeddings, case_embeddings, tag_set_size):
    word_input = Input(shape=(None,))
    word_embeddings = Embedding(
        word_vocab_size, word_embeddings_dim, weights=[word_embeddings], trainable=True
    )(word_input)

    case_input = Input(shape=(None,))
    case_embeddings = Embedding(
        case_vocab_size, case_embeddings_dim, weights=[case_embeddings], trainable=True
    )(case_input)
    
    char_input = Input(shape=(None, max_word_length))
    char_embeddings = TimeDistributed(Embedding(
        char_vocab_size, CHAR_EMB_DIM, embeddings_initializer=RandomUniform(minval=-math.sqrt(3 / LSTM_DIM), maxval=math.sqrt(3 / LSTM_DIM))
    ))(char_input)
    char_embeddings = Dropout(DROPOUT)(char_embeddings)
    char_embeddings = TimeDistributed(Conv1D(
        kernel_size=KERNEL_SIZE, filters=CHAR_FILTERS, padding='same', activation='tanh', strides=1
    ))(char_embeddings)
    char_embeddings = TimeDistributed(MaxPooling1D(max_word_length))(char_embeddings)
    char_embeddings = TimeDistributed(Flatten())(char_embeddings)

    feature_input = Input(shape=(None, 198))
    gaze_input = Input(shape=(None, 4))

    embeddings = concatenate([word_embeddings, case_embeddings, char_embeddings, feature_input, gaze_input])
    embeddings = Dropout(DROPOUT)(embeddings)
    
    output = Bidirectional(LSTM(
        LSTM_DIM, return_sequences=True, dropout=DROPOUT, recurrent_dropout=RECURRENT_DROPOUT, implementation=2
    ))(embeddings)

    lstm = TimeDistributed(Dense(LSTM_DIM, activation='elu'))(output)
    lstm = CRF(tag_set_size, sparse_target=True)(lstm)
    gaze = TimeDistributed(Dense(4, activation='softmax'))(output)
    shape = TimeDistributed(Dense(152, activation='softmax'))(output)
    position = TimeDistributed(Dense(46, activation='softmax'))(output)
    
    return Model(inputs=[word_input, case_input, char_input, feature_input, gaze_input], outputs=[lstm, gaze, shape, position])

In [17]:
model = CharCNNBiLSTM(len(word_idx), len(case_idx), len(char_idx),
              len(word_embeddings[0]), len(case_idx), max_word_length,
              word_embeddings, case_embeddings, len(tag_idx))
model.compile(loss=[crf_loss, utils.weighted_sparse_categorical_crossentropy, sparse_categorical_crossentropy, sparse_categorical_crossentropy],
              optimizer=SGD(lr=0.04, decay=5e-5, momentum=0.9, clipnorm=10, nesterov=True))

W0626 13:47:48.462174 140252629301120 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0626 13:47:48.500872 140252629301120 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0626 13:47:48.511817 140252629301120 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0626 13:47:48.526026 140252629301120 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0626 13:47:48.527259 1402526293

In [0]:
import numpy as np
from sklearn.metrics import f1_score
from conlleval import evaluate


def evaluate_model(model, batches, idx_tag):
    pos_true, pos_pred = [], []
    shp_true, shp_pred = [], []
    true_seqs, pred_seqs = [], []
    pos_total, pos_correct = 0, 0
    shp_total, shp_correct = 0, 0

    for batch in batches:
        tag = batch['tag']
        shp = batch['shape']
        pos = batch['position']

        tag = tag.reshape((tag.shape[0], tag.shape[1]))
        shp = shp.reshape((shp.shape[0], shp.shape[1]))
        pos = pos.reshape((pos.shape[0], pos.shape[1]))

        lstm, _, shape, position = model.predict([batch['word'], batch['case'], batch['char'], batch['features'], batch['gaze']], verbose=False)
        lstm = lstm.argmax(axis=-1)
        shape = shape.argmax(axis=-1)
        position = position.argmax(axis=-1)

        for seq_ind, seq_len in enumerate(batch['lengths']):
            true_seqs.append(' '.join([idx_tag[ind.item()] for ind in tag[seq_ind, 1: seq_len + 1]]))
            pred_seqs.append(' '.join([idx_tag[ind.item()] for ind in lstm[seq_ind, 1: seq_len + 1]]))
            
            true_pos = np.array([ind.item() for ind in pos[seq_ind, 1: seq_len + 1]])
            pred_pos = np.array([ind.item() for ind in position[seq_ind, 1: seq_len + 1]])
            
            pos_true += list(true_pos)
            pos_pred += list(pred_pos)
            
            pos_correct += np.sum(true_pos == pred_pos)
            pos_total += len(pred_pos)
            
            true_shp = np.array([ind.item() for ind in shp[seq_ind, 1: seq_len + 1]])
            pred_shp = np.array([ind.item() for ind in shape[seq_ind, 1: seq_len + 1]])
            
            shp_true += list(true_shp)
            shp_pred += list(pred_shp)
            
            shp_correct += np.sum(true_shp == pred_shp)
            shp_total += len(pred_shp)

    f, precision, recall = evaluate(true_seqs, pred_seqs, verbose=False)
    pos_accuracy = 100 * pos_correct / pos_total
    pos_f = 100 * f1_score(pos_true, pos_pred, average='macro') 
    shp_accuracy = 100 * shp_correct / shp_total
    shp_f = 100 * f1_score(shp_true, shp_pred, average='macro') 
    
    return f, precision, recall, pos_accuracy, pos_f, shp_accuracy, shp_f

In [0]:
for epoch in range(1, NUM_EPOCHS + 1):
    random.shuffle(train_batches)
    
    print('----------------------------------- EPOCH: {} -----------------------------------'.format(epoch))
    print('----------------------------------- Training -----------------------------------')
    
    start_time = time.time()

    for batch in train_batches:
        model.train_on_batch([batch['word'], batch['case'], batch['char'], batch['features'], batch['gaze']], [batch['tag'], batch['gazetteers'], batch['shape'], batch['position']])
    
    finish_time = time.time()
    
    print('Time: {:.2f}s'.format(finish_time - start_time))
    
    print('---------------------------------- Evaluating ----------------------------------')
    
    start_time = time.time()
    
    f, precision, recall, pos_acc, pos_f, shp_acc, shp_f = evaluate_model(model, train_batches, idx_tag)
    
    print('================================== Train Data ==================================')
    print('F1 = {:.2f}%, Precision = {:.2f}%, Recall = {:.2f}%'.format(f, precision, recall))
    print('POS F1 = {:.2f}%, Accuracy = {:.2f}%'.format(pos_f, pos_acc))
    print('Shape F1 = {:.2f}%, Accuracy = {:.2f}%'.format(shp_f, shp_acc))
    
    f, precision, recall, pos_acc, pos_f, shp_acc, shp_f = evaluate_model(model, valid_batches, idx_tag)
    
    print('================================== Valid Data ==================================')
    print('F1 = {:.2f}%, Precision = {:.2f}%, Recall = {:.2f}%'.format(f, precision, recall))
    print('POS F1 = {:.2f}%, Accuracy = {:.2f}%'.format(pos_f, pos_acc))
    print('Shape F1 = {:.2f}%, Accuracy = {:.2f}%'.format(shp_f, shp_acc))
    
    f, precision, recall, pos_acc, pos_f, shp_acc, shp_f = evaluate_model(model, test_batches, idx_tag)
    
    print('================================== Test  Data ==================================')
    print('F1 = {:.2f}%, Precision = {:.2f}%, Recall = {:.2f}%'.format(f, precision, recall))
    print('POS F1 = {:.2f}%, Accuracy = {:.2f}%'.format(pos_f, pos_acc))
    print('Shape F1 = {:.2f}%, Accuracy = {:.2f}%'.format(shp_f, shp_acc))
    
    finish_time = time.time()
    
    print('Time: {:.2f}s\n'.format(finish_time - start_time))

----------------------------------- EPOCH: 1 -----------------------------------
----------------------------------- Training -----------------------------------


  num_elements)


Time: 69.26s
---------------------------------- Evaluating ----------------------------------


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


F1 = 69.23%, Precision = 69.71%, Recall = 69.47%
POS F1 = 37.08%, Accuracy = 82.09%
Shape F1 = 11.76%, Accuracy = 87.65%
F1 = 73.40%, Precision = 72.30%, Recall = 72.84%
POS F1 = 37.28%, Accuracy = 82.05%
Shape F1 = 13.65%, Accuracy = 87.60%
F1 = 67.51%, Precision = 68.84%, Recall = 68.17%
POS F1 = 37.37%, Accuracy = 82.67%
Shape F1 = 13.44%, Accuracy = 86.33%
Time: 27.41s

----------------------------------- EPOCH: 2 -----------------------------------
----------------------------------- Training -----------------------------------
Time: 49.52s
---------------------------------- Evaluating ----------------------------------
F1 = 78.58%, Precision = 78.37%, Recall = 78.47%
POS F1 = 52.36%, Accuracy = 90.33%
Shape F1 = 19.65%, Accuracy = 92.62%
F1 = 79.82%, Precision = 79.42%, Recall = 79.62%
POS F1 = 53.50%, Accuracy = 90.00%
Shape F1 = 23.31%, Accuracy = 92.78%
F1 = 75.93%, Precision = 76.24%, Recall = 76.08%
POS F1 = 54.58%, Accuracy = 90.25%
Shape F1 = 22.46%, Accuracy = 91.48%
Time

In [0]:
# 90.78