# Siamese network 2, 03062017

In [11]:
from datetime import datetime
from IPython.display import SVG

import pandas as pd
import numpy as np

from keras.models import Model
from keras.layers import Dense, Dropout, Input, LSTM, Embedding, Bidirectional, \
    Lambda, RepeatVector, merge, Permute, Reshape
from keras.layers.merge import concatenate, multiply
from keras.callbacks import EarlyStopping, ModelCheckpoint, ProgbarLogger, TensorBoard
from keras.layers.normalization import BatchNormalization
from keras import backend as K
from keras_tqdm import TQDMNotebookCallback

from utils import load_embeddings, extract_questions_from_dataframe, save_submission

%load_ext autoreload
%autoreload 2
pd.set_option('max_colwidth', 250)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
class Config(object):
    VOCABULARY_SIZE = 1193514
    EMBEDDING_DIMENSION = 200
    OFFSET = 3
    OOV_TOKEN = 0  # out of vocabulary
    EOS_TOKEN = 1  # end of sentence
    PAD_TOKEN = 2  # padding to max sentence length
    MAX_SENTENCE_LENGTH = 60
    DENSE_LAYER_SIZE = 150
    DROPOUT = 0.4
    
    def stamp(self, comment):
        return '{date:%Y%m%d_%H%M}_{comment}'.format(
            date=datetime.now(), comment=comment)

In [4]:
train_dataframe = pd.read_csv('train.csv')
current_config = Config()

embedding_weights, word2idx = load_embeddings(
    'glove.twitter.27B.200d.txt',
    config=current_config
)

questions_A, questions_B, labels = extract_questions_from_dataframe(
    train_dataframe, 
    config=current_config,
    word2idx=word2idx,
    prediction_mode=False
)

404290 preprocessed questions loaded from disk


In [5]:
questions_A.shape

(404290, 60)

In [14]:
def shared_attention(inputs):
    a = Permute((2, 1))(inputs)
    a = Reshape(
        target_shape=(current_config.EMBEDDING_DIMENSION, 
                      current_config.MAX_SENTENCE_LENGTH)
    )(a)
    a = Dense(current_config.MAX_SENTENCE_LENGTH, activation='softmax')(a)
    a = Lambda(lambda x: K.mean(x, axis=1))(a)  
    a = RepeatVector(n=current_config.EMBEDDING_DIMENSION)(a)
    a_probs = Permute(dims=(2, 1))(a)
    output_attention_mul = multiply([inputs, a_probs])
    return output_attention_mul

shared_lstm_layer_1 = Bidirectional(
    LSTM(units=100, 
         return_sequences=True, 
         dropout=0.4)
)

shared_lstm_layer_2 = Bidirectional(
    LSTM(units=100, 
         return_sequences=False, 
         dropout=0.4)
)

shared_embedding_layer = Embedding(
    input_dim=current_config.VOCABULARY_SIZE + current_config.OFFSET, 
    output_dim=current_config.EMBEDDING_DIMENSION, 
    input_length=current_config.MAX_SENTENCE_LENGTH,
    weights=[embedding_weights],
    trainable=False
)

input_A = Input(shape=(current_config.MAX_SENTENCE_LENGTH,))
embeddings_A = shared_embedding_layer(input_A)
initial_representation_A = shared_lstm_layer_1(embeddings_A)
norm_initial_representation_A = BatchNormalization()(initial_representation_A)
attended_representation_A = shared_attention(initial_representation_A)
sentence_representation_A = shared_lstm_layer_2(attended_representation_A)

input_B = Input(shape=(current_config.MAX_SENTENCE_LENGTH,))
embeddings_B = shared_embedding_layer(input_B)
initial_representation_B = shared_lstm_layer_1(embeddings_B)
norm_initial_representation_B = BatchNormalization()(initial_representation_B)
attended_representation_B = shared_attention(initial_representation_B)
sentence_representation_B = shared_lstm_layer_2(attended_representation_B)

merged_model = concatenate([sentence_representation_A, sentence_representation_B])
dropout_1 = Dropout(current_config.DROPOUT)(merged_model)
dense_1 = Dense(current_config.DENSE_LAYER_SIZE)(dropout_1)
dropout_2 = Dropout(current_config.DROPOUT)(dense_1)
merged = BatchNormalization()(dropout_2)

predictions = Dense(1, activation='sigmoid')(merged)

model = Model(inputs=[input_A, input_B], outputs=predictions)
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_13 (InputLayer)            (None, 60)            0                                            
____________________________________________________________________________________________________
input_14 (InputLayer)            (None, 60)            0                                            
____________________________________________________________________________________________________
embedding_8 (Embedding)          (None, 60, 200)       238703400   input_13[0][0]                   
                                                                   input_14[0][0]                   
____________________________________________________________________________________________________
bidirectional_11 (Bidirectional) (None, 60, 200)       240800      embedding_8[0][0]       

In [15]:
# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=15)
model_checkpoint = ModelCheckpoint(
    monitor='val_loss',
    filepath=current_config.stamp(comment='1') + '.h5', 
    save_best_only=True, 
    save_weights_only=True
)

In [16]:
training_logs = model.fit(
    x=[questions_A, questions_B], 
    y=labels, 
    epochs=150, 
    batch_size=1024,
    validation_split=0.2, 
    callbacks=[early_stopping, model_checkpoint]
)

Train on 400 samples, validate on 100 samples


Widget Javascript not detected.  It may not be installed or enabled properly.


Widget Javascript not detected.  It may not be installed or enabled properly.


Widget Javascript not detected.  It may not be installed or enabled properly.


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 1/100







Widget Javascript not detected.  It may not be installed or enabled properly.


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 2/100







Widget Javascript not detected.  It may not be installed or enabled properly.


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 3/100







Widget Javascript not detected.  It may not be installed or enabled properly.


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 4/100


KeyboardInterrupt: 

In [None]:
model.save('/output/model_final.bin')