In [1]:
import numpy as np

from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input, Lambda, BatchNormalization
from keras.optimizers import RMSprop, Adam
from keras import backend as K
import pandas as pd

Using TensorFlow backend.


# Utility functions for siamese network

In [2]:
def euclidean_distance(vects):
    x, y = vects
    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))

In [3]:
def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)

In [4]:
def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
    return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))

In [5]:
def create_base_network(input_dim):
    '''Base network to be shared
    '''
    seq = Sequential()

    seq.add(Dense(1000, input_shape=(input_dim,), activation='relu'))
    seq.add(Dropout(0.2))
    seq.add(BatchNormalization())
    
    seq.add(Dense(500, activation='relu'))
    seq.add(Dropout(0.2))
    seq.add(BatchNormalization())
    
    seq.add(Dense(300, activation='relu'))
    return seq

In [6]:
def compute_accuracy(predictions, labels):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''
    return labels[predictions.ravel() < 0.001].mean()

# Network definition

In [7]:
input_dim = 625 #dimension (number of features) for a single branch of network

base_network = create_base_network(input_dim)

input_a = Input(shape=(input_dim,))
input_b = Input(shape=(input_dim,))

# because we re-use the same instance `base_network`,
# the weights of the network
# will be shared across the two branches
processed_a = base_network(input_a)
processed_b = base_network(input_b)

distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])

model = Model([input_a, input_b], distance)
model.compile(loss=contrastive_loss, optimizer='Adam')

Loading the weights

In [8]:
model.load_weights("weights_siamese_model.h5")

# Evaluation on test set

In [9]:
data = pd.read_csv("./test.csv", delimiter = "\t")

In [11]:
number_of_features = 1190 #total number of features

In [12]:
test_first_mention = data.values[:,0:565]
test_second_mention = data.values[:,565:1130]
test_common_features = data.values[:,1130:1190]

test_first_mention = np.concatenate((test_first_mention, test_common_features), axis=1)
test_second_mention = np.concatenate((test_second_mention, test_common_features), axis=1)

test_labels = data.values[:,number_of_features] #last column consists of labels

In [13]:
test_pred = model.predict([test_first_mention, test_second_mention], verbose=1)
test_acc = compute_accuracy(test_pred, test_labels)
print('* Accuracy on test set: %0.2f%%' % (100 * test_acc))



Let's calculate the prediction value for a single example from the test set, say, an example number 20

In [16]:
first = test_first_mention[20:21,:]
second = test_second_mention[20:21,:]
print model.predict([first,second])

[[ 0.00031623]]


If a prediction is <b>below 0.001</b>, we consider a pair coreferent