In [1]:
from sklearn.externals import joblib
import numpy as np
from numpy import argmax
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
from nltk.translate.bleu_score import corpus_bleu

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [65]:
#define a model name to load
model_name = 'model_01081_32'

In [76]:
# dataset = joblib.load('data/dataset.pkl')
train = joblib.load('data/' + model_name + '/train.pkl')
test = joblib.load('data/' + model_name + '/test.pkl')

In [66]:
eng_tokenizer = joblib.load('data/' + model_name + '/eng_tokenizer.pkl')
eng_length = joblib.load('data/' + model_name + '/eng_length.pkl')
eng_vocab_size = len(eng_tokenizer.word_counts) + 1
targ_tokenizer = joblib.load('data/' + model_name + '/targ_tokenizer.pkl')
targ_length = joblib.load('data/' + model_name + '/targ_length.pkl')
targ_vocab_size = len(targ_tokenizer.word_counts) + 1

In [67]:
trainX = joblib.load('data/' + model_name + '/trainX.pkl')
testX = joblib.load('data/' + model_name + '/testX.pkl')

In [None]:
eng_tokenizer = Tokenizer()
eng_tokenizer.fit_on_texts(dataset[:,0])
eng_length = max(len(line.split()) for line in dataset[:,0])
eng_vocab_size = len(eng_tokenizer.word_counts) + 1
targ_tokenizer = Tokenizer()
targ_tokenizer.fit_on_texts(dataset[:,1])
targ_length = max(len(line.split()) for line in dataset[:,1])
targ_vocab_size = len(targ_tokenizer.word_counts) + 1

In [68]:
# trainX = eng_tokenizer.texts_to_sequences(train[:,0])
# trainX = pad_sequences(trainX, maxlen=eng_length, padding='post')
trainX

array([[  36, 1133,    0,    0,    0],
       [   7,   19,   54,  126,    0],
       [  18,  674,    2,    0,    0],
       ...,
       [   7,  714, 2831,    0,    0],
       [   3,    5,  210,    2,    0],
       [   1,  902,    5,    0,    0]], dtype=int32)

In [69]:
# testX = eng_tokenizer.texts_to_sequences(test[:,0])
# testX = pad_sequences(testX, maxlen=eng_length, padding='post')
testX

array([[  6, 544, 617,   0,   0],
       [  8, 176,   3, 387,   0],
       [  2,  20, 323,  82,   0],
       ...,
       [  1, 271,  69,   0,   0],
       [  7,  19, 430,   0,   0],
       [  7,  76, 307, 313,   0]], dtype=int32)

In [70]:
model = load_model('data/' + model_name + '/' + model_name + '.h5')

## Evaluate model

In [11]:
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None
 
# generate target given source sequence
def predict_sequence(model, tokenizer, source):
    prediction = model.predict(source, verbose=0)[0]
    integers = [argmax(vector) for vector in prediction]
    target = list()
    for i in integers:
        word = word_for_id(i, tokenizer)
        if word is None:
            break
        target.append(word)
    return ' '.join(target)
 
# evaluate the skill of the model
def evaluate_model(model, tokenizer, sources, raw_dataset):
    actual, predicted = list(), list()
    for i, source in enumerate(sources):
        # translate encoded source text
        source = source.reshape((1, source.shape[0]))
        translation = predict_sequence(model, targ_tokenizer, source)
        raw_src, raw_target = raw_dataset[i]
        if i < 10:
            print(f'src: {raw_src}; target: {raw_target}; translation: {translation}')
        actual.append([raw_target.split()])
        predicted.append(translation.split())
    # calculate BLEU score
    print(f'BLEU-1: {corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0))}')
    print(f'BLEU-2: {corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0))}')
    print(f'BLEU-3: {corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0))}')
    print(f'BLEU-4: {corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25))}')
 

In [77]:
print('train')
evaluate_model(model, targ_tokenizer, trainX, train)

train
src: thats helpful; target: eso es de ayuda; translation: eso es ayuda ayuda
src: he was very tired; target: el estaba muy cansado; translation: el estaba muy cansado
src: she trusted you; target: ella confiaba en ti; translation: ella confiaba en en
src: im just curious; target: tan solo soy curioso; translation: solo estoy soy
src: i also like cakes; target: tambien me gusta los pasteles; translation: me gusta el los
src: see for yourself; target: miralo tu mismo; translation: miralo tu
src: can i have some; target: me das un poco; translation: me un un mas
src: thats my fault; target: eso es mi culpa; translation: es es culpa
src: i didnt hear you; target: yo no te oi; translation: no no he oi
src: i need my pills; target: necesito mis pastillas; translation: necesito mis pastillas
BLEU-1: 0.7563666277049743
BLEU-2: 0.6575261754621241
BLEU-3: 0.5592416169792461
BLEU-4: 0.33944535229828865


In [78]:
print('test')
evaluate_model(model, targ_tokenizer, testX, test)

test
src: im being careful; target: estoy siendo cuidadoso; translation: estoy impresionada
src: the room is dark; target: la habitacion es oscura; translation: el habitacion esta echada
src: you can trust us; target: puedes confiar en nosotros; translation: puedes puedes con
src: i was speechless; target: me quede sin palabras; translation: estaba avergonzado
src: am i the only one; target: soy el unico; translation: estoy solo de
src: he went back home; target: el volvio a casa; translation: el a a a
src: have a nice trip; target: buen viaje; translation: se un un
src: im immune; target: soy inmune; translation: estoy confiable
src: i made fun of him; target: me burle de el; translation: le hice por
src: do you feel bad; target: esta mal; translation: me siento mal
BLEU-1: 0.38148339343680593
BLEU-2: 0.24823427433392617
BLEU-3: 0.17434706388794965
BLEU-4: 0.07528697760240183


In [None]:
print('test')
evaluate_model(model, targ_tokenizer, testX, test)