In [2]:
import pandas as pd
import numpy as np

import gensim.downloader as api
from gensim.models import KeyedVectors
from gensim.models import word2vec

from sklearn.model_selection import train_test_split as split

import contractions

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv1D, Bidirectional, LSTM, Dense, Concatenate, Dropout, Embedding, GlobalMaxPooling1D
from tensorflow.keras.metrics import binary_crossentropy, categorical_crossentropy, Accuracy, Recall, Precision
from tensorflow.keras.initializers import Constant

In [3]:
X = pd.read_feather('train.feather').sample(1000000).reset_index(drop=True)
Y = X.pop('key')

X_train, X_test, y_train, y_test = split(X, Y, test_size=0.3)

def totensor(df):
    return tf.convert_to_tensor(np.array(df.map(lambda x: x.tolist()).values.tolist()))

X_train = totensor(X_train.squeeze())
X_test = totensor(X_test.squeeze())
y_train = tf.convert_to_tensor(y_train)
y_test = tf.convert_to_tensor(y_test)

In [4]:
n_filters=128
sent_len = 20

weights = api.load('glove-wiki-gigaword-200').vectors

inp = Input(shape=(sent_len,))

emb = Embedding(input_dim=weights.shape[0],
                output_dim=weights.shape[1],
                embeddings_initializer=Constant(weights),
                trainable=False)

conv2 = Conv1D(filters=n_filters,
              activation='relu',
              padding='same',
              kernel_size=2)

conv3 = Conv1D(filters=n_filters,
              activation='relu',
              padding='same',
              kernel_size=3)

conv5 = Conv1D(filters=n_filters,
              activation='relu',
              padding='same',
              kernel_size=5)

model2 = GlobalMaxPooling1D()(conv2(emb(inp)))
model3 = GlobalMaxPooling1D()(conv3(emb(inp)))
model5 = GlobalMaxPooling1D()(conv5(emb(inp)))

conc = Concatenate()([model2, model3, model5])
decode = Dropout(0.3)(Dense(256)(conc))
out = Dense(1, activation='sigmoid')(decode)

model = Model(inputs=inp, outputs=out)
model.compile(loss=binary_crossentropy, optimizer='adam', metrics=[Accuracy(), Recall()])

In [7]:
model.fit(X_train, y_train,
          epochs=10,
          batch_size=32,
          validation_data=(X_test, y_test),
          verbose=1)

Train on 700000 samples, validate on 300000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f7f64c576d8>

In [9]:
model.evaluate(X_test, y_test, batch_size=64)



[0.1996402103018937, 0.84675336, 0.9901246]

In [None]:
# Test architecture, unused

n_filters=256
k_size=5
embedding_len=300
ngram = 10
sent_len = 50
lstm_count = 512

left_in = Input(shape=(k_size, sent_len, embedding_len))
main_in = Input(shape=(1, sent_len, embedding_len))
right_in = Input(shape=(k_size, sent_len, embedding_len))

left_conv = Conv1D(filters=n_filters,
                   activation='tanh',
                   padding='same',
                   kernel_size=ngram,
                   input_shape=(sent_len, embedding_len))
main_conv = Conv1D(filters=n_filters,
                   activation='tanh',
                   padding='same',
                   kernel_size=ngram,
                   input_shape=(1, embedding_len))
right_conv = Conv1D(filters=n_filters,
                    activation='tanh',
                    padding='same',
                    kernel_size=ngram,
                    input_shape=(sent_len, embedding_len))

context_bilstm1 = Bidirectional(LSTM(lstm_count,
                                    input_shape=(k_size, n_filters),
                                    recurrent_dropout=0.3,
                                    return_sequences=True))
context_bilstm2 = Bidirectional(LSTM(lstm_count,
                                    input_shape=(k_size, n_filters),
                                    recurrent_dropout=0.3,
                                    return_sequences=True))

left_conv = TimeDistributed(context_conv)(left_in)
mid_conv = TimeDistributed(main_conv)(main_in)
right_conv = TimeDistributed(context_conv)(right_in)

left_pool = TimeDistributed(GlobalMaxPooling1D())(left_conv)
mid_pool = TimeDistributed(GlobalMaxPooling1D())(mid_conv)
right_pool = TimeDistributed(GlobalMaxPooling1D())(right_conv)

left_context = context_bilstm1(context_bilstm2(left_pool))
right_context = context_bilstm1(context_bilstm2(right_pool))

left_encoded = Dropout(0.3)(AttentionWithContext()(left_context))
mid_encoded = Dropout(0.3)(Dense(300)(Flatten()(mid_pool)))
right_encoded = Dropout(0.3)(AttentionWithContext()(right_context))

encoding = Concatenate()([left_encoded, mid_encoded, right_encoded])
decoding = Dropout(0.3)(Dense(512)(encoding))

model = Model(inputs=[left_in, main_in, right_in], outputs=decoding)
model.compile(loss=binary_crossentropy, optimizer='adadelta', metrics=[Accuracy(), Recall()])