# NAACL 2018 Shared Task - Metaphor Detection

Add description

## Prerequisites 

- Facebook FastText Embeddings for English
- https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md

## Preflight Checks

- Installed requirements.txt
- (optional) Download vuamc.zip http://ota.ahds.ac.uk/headers/2541.xml

https://github.com/EducationalTestingService/metaphor/tree/master/NAACL-FLP-shared-task

In [14]:
# TODO: Check for dependencies
import utils
import corpus
import evaluate
import features
import numpy

import os
import collections

from keras.utils import to_categorical
from keras.layers import TimeDistributed, Bidirectional, LSTM, Input, Masking, Dense
from keras.models import Model
from keras import backend as kerasbackend

In [2]:
# Check for VUAMC CSV files and generate if necessary

if not os.path.exists('source/vuamc_corpus_test.csv') and not os.path.exists('source/vuamc_corpus_train.csv'):
    print('VUAMC training and test data not found. Generating...')
    # utils.download_vuamc_xml()
    # utils.generate_vuamc_csv()

In [4]:
# Load Train Corpus from CSV
c_train = corpus.VUAMC('source/vuamc_corpus_train.csv', 'source/verb_tokens_train_gold_labels.csv')
c_train.validate_corpus()
print('Loaded and validated training corpus')

# Load Test Corpus from CSV
c_test = corpus.VUAMC('source/vuamc_corpus_test.csv', 'source/verb_tokens_test.csv', mode='test')
c_test.validate_corpus()
print('Loaded and validated test corpus')

Loaded and validated training corpus
Loaded and validated test corpus


In [5]:
# Shows that we got imbalanced classes in the training data
number_of_all_labels = len( c_train.label_list)
count_of_label_classes = collections.Counter( c_train.label_list)

percentage_of_non_metaphor_tokens = round(count_of_label_classes[0] / number_of_all_labels * 100)
percentage_of_metaphor_tokens = round(count_of_label_classes[1] / number_of_all_labels * 100)
ratio = utils.simplify_ratio(percentage_of_non_metaphor_tokens, percentage_of_metaphor_tokens)
assert(percentage_of_non_metaphor_tokens + percentage_of_metaphor_tokens == 100)

print('Percentage of metaphor tokens: {}'.format(percentage_of_metaphor_tokens))
print('Percentage of non-metaphor tokens: {}'.format(percentage_of_non_metaphor_tokens))
print('Ratio: {}:{}'.format(ratio[0], ratio[1]))

Percentage of metaphor tokens: 3
Percentage of non-metaphor tokens: 97
Ratio: 1:32


In [6]:
# Global configuration
MAX_SENTENCE_LENGTH = 50
EMBEDDING_DIM = 300
KERAS_OPTIMIZER = 'rmsprop'
KERAS_METRICS = ['categorical_accuracy']
KERAS_EPOCHS = 1
KERAS_BATCH_SIZE = 32

In [7]:
# embeddings = features.Word2Vec()
embeddings = features.DummyEmbeddings(EMBEDDING_DIM)
x, y = features.generate_input_and_labels(c_train.sentences, Vectors=embeddings)
x_test, y_test = features.generate_input_and_labels(c_test.sentences, Vectors=embeddings)

# Free up some memory
del embeddings
print('Deleted Word Embeddings')

# Input data and categorical labels
x_input = x
y_labels = to_categorical(y, 2)

Deleted Word Embeddings


In [8]:
# Generate Training and Validation split
indices = numpy.arange(x_input.shape[0])
numpy.random.shuffle(indices)
data = x_input[indices]
labels = y_labels[indices]
num_validation_samples = int(0.2 * x_input.shape[0])

x_train = data[:-num_validation_samples]
y_train = labels[:-num_validation_samples]
x_val = data[-num_validation_samples:]
y_val = labels[-num_validation_samples:]

print('Shape of Train Data tensor:', x_train.shape)
print('Shape of Train Labels tensor:', y_train.shape)
print('Shape of Validation Data tensor:', x_val.shape)
print('Shape of validation Labels tensor:', y_val.shape)

Shape of Train Data tensor: (6299, 50, 300)
Shape of Train Labels tensor: (6299, 50, 2)
Shape of Validation Data tensor: (1574, 50, 300)
Shape of validation Labels tensor: (1574, 50, 2)


In [10]:
# Generate loss_weight, since out dataset contains 97% non-metaphor tokens
# TODO: calculate that shice
loss_weight = 32
# KERAS_LOSS = 'categorical_crossentropy'
KERAS_LOSS = utils.weighted_categorical_crossentropy([1, loss_weight])
print('loss_weights 1 : {}'.format(loss_weight))

loss_weights 1 : 32


In [12]:
# Create and compile model
inputs = Input(shape=(MAX_SENTENCE_LENGTH, EMBEDDING_DIM))
model = Masking(mask_value=[-1] * EMBEDDING_DIM)(inputs)
model = Bidirectional(LSTM(100, return_sequences=True, dropout=0, recurrent_dropout=0.25))(model)
outputs = TimeDistributed(Dense(2, activation='softmax'))(model)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=KERAS_OPTIMIZER, loss=KERAS_LOSS, metrics=KERAS_METRICS)

# Fit the model
model.fit(x_train, y_train, batch_size=KERAS_BATCH_SIZE, epochs=KERAS_EPOCHS, validation_data=(x_val, y_val))
scores = model.evaluate(x_val, y_val)

Train on 6299 samples, validate on 1574 samples
Epoch 1/1


In [15]:
# Generate list of label predictions for each sentence
float_predictions = model.predict(x_test, batch_size=KERAS_BATCH_SIZE)
binary_predictions = kerasbackend.argmax(float_predictions)
label_predictions = kerasbackend.eval(binary_predictions)

# Write prediction to CSV file
predictions_file = 'predictions.csv'
standard_file = 'source/verb_tokens_test_gold_labels.csv'

rows = evaluate.corpus_evaluation(c_test, label_predictions, MAX_SENTENCE_LENGTH)
evaluate.csv_evalutation(rows, predictions_file)
results = evaluate.precision_recall_f1(predictions_file, standard_file)

print(results)

Result(precision=0.6616130988477865, recall=0.6616130988477865, f1=0.6616130988477865)
