In [1]:
import matchzoo as mz
from matchzoo import DataGenerator
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
import scipy

from sklearn.metrics import precision_recall_fscore_support

Using TensorFlow backend.


# Load Data and Preprocess It
This data is based on WikiQA, but formatted differently.  Basically, I took all the matches, and for each match I also generated a 5 negative samples.  To see the dataset, see the "matchzoo experiments" folder.

In [2]:
train_raw = mz.pack(pd.read_csv('matchzoo experiments/toy_matchzoo_train.csv', index_col=0))
test_raw = mz.pack(pd.read_csv('matchzoo experiments/toy_matchzoo_test.csv', index_col=0))


train_raw.relation['label'] = train_raw.relation['label'].astype('float32')
test_raw.relation['label'] = test_raw.relation['label'].astype('float32')

preprocessor = mz.preprocessors.DSSMPreprocessor()

preprocessor.fit(train_raw)
preprocessor.context

train_preprocessed = preprocessor.transform(train_raw)
test_preprocessed = preprocessor.transform(test_raw)

Processing text_left with chain_transform of TokenizeUnit => LowercaseUnit => PuncRemovalUnit => StopRemovalUnit => NgramLetterUnit: 100%|█| 716/716 [00:00<00:00, 8531.69it/s]
Processing text_right with chain_transform of TokenizeUnit => LowercaseUnit => PuncRemovalUnit => StopRemovalUnit => NgramLetterUnit: 100%|█| 1025/1025 [00:00<00:00, 3818.81it/s]
Processing text_left with extend: 100%|██████████████████████████████████████████| 716/716 [00:00<00:00, 178874.36it/s]
Processing text_right with extend: 100%|███████████████████████████████████████████████████| 1025/1025 [00:00<?, ?it/s]
Building VocabularyUnit from a datapack.: 100%|███████████████████████████| 112803/112803 [00:00<00:00, 3131196.64it/s]
Processing text_left with chain_transform of TokenizeUnit => LowercaseUnit => PuncRemovalUnit => StopRemovalUnit => NgramLetterUnit => WordHashingUnit: 100%|█| 716/716 [00:00<00:00, 6187.63it/s]
Processing text_right with chain_transform of TokenizeUnit => LowercaseUnit => PuncRemoval

# Build Model
Here we set the Loss Function to Cross Entropy with 5 negative samples.  Am I doing this correctly?  I feel like there may be an issue with the negative samples.

In [3]:
model = mz.models.DSSM()
model.params['input_shapes'] = preprocessor.context['input_shapes']
model.params['mlp_num_layers'] = 3
model.params['mlp_num_units'] = 300
model.params['mlp_num_fan_out'] = 128
model.params['mlp_activation_func'] = 'relu'

ranking_task = mz.tasks.Ranking(loss=mz.losses.RankCrossEntropyLoss(num_neg=5))
model.params['task'] = ranking_task

model.guess_and_fill_missing_params()

model.build()
model.compile()
print(model._params['task'].loss)


Parameter "name" set to DSSM.
<matchzoo.losses.rank_cross_entropy_loss.RankCrossEntropyLoss object at 0x000002390E21EAC8>


In [4]:
x_train, y_train = train_preprocessed.unpack()
x_test, y_test = test_preprocessed.unpack()

# Train Model
Batch size is divisible by 6 to account for 1 matching example and 5 negative samples.  I also set shuffle to false, so each set of 6 alternates labels 1,0,0,0,0,0 .  

In [5]:
history = model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=120, epochs=50, shuffle=False, verbose=2)


Train on 4986 samples, validate on 1248 samples
Epoch 1/50
 - 5s - loss: 1.7942 - val_loss: 1.7891
Epoch 2/50
 - 4s - loss: 1.7958 - val_loss: 1.7945
Epoch 3/50
 - 4s - loss: 1.7928 - val_loss: 1.8051
Epoch 4/50
 - 4s - loss: 1.7858 - val_loss: 1.8445
Epoch 5/50
 - 4s - loss: 1.7705 - val_loss: 1.9621
Epoch 6/50
 - 4s - loss: 1.7501 - val_loss: 1.9852
Epoch 7/50
 - 4s - loss: 1.7209 - val_loss: 2.0466
Epoch 8/50


KeyboardInterrupt: 

# Evaluate

Here we predict, but since its cross entropy, the model outputs logits, so we need to apply the sigmoid function.  I'm not sure if this is causing problems.  I think the matchzoo cross entropy uses softmax, which may be causing this problem.  

In [None]:
y_pred = model.predict(x_test)
y_pred = scipy.special.expit(y_pred) #sigmoid function

print(y_pred[0:10])
print(y_test[0:10])

In [None]:
y_pred_cutoff = y_pred > .5

prec, recall, f1beta, support = precision_recall_fscore_support(y_test, y_pred_cutoff, pos_label=1, average='binary')

print(prec)
print(recall)
print(f1beta)
print(f1_score(y_test, y_pred_cutoff, pos_label=1, average='binary'))