##### Spanish-to-English Model Evaluation
    Here we will evaluate the model for spanish-to-english translation, with the test dataset.

In [1]:
from pickle import load
from numpy import array
from numpy import argmax
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
from nltk.translate.bleu_score import corpus_bleu

# load a clean dataset
def load_clean_sentences(filename):
	return load(open(filename, 'rb'))

# fit a tokenizer
def create_tokenizer(lines):
	tokenizer = Tokenizer()
	tokenizer.fit_on_texts(lines)
	return tokenizer

# max sentence length
def max_length(lines):
	return max(len(line.split()) for line in lines)

# encode and pad sequences
def encode_sequences(tokenizer, length, lines):
	# integer encode sequences
	X = tokenizer.texts_to_sequences(lines)
	# pad sequences with 0 values
	X = pad_sequences(X, maxlen=length, padding='post')
	return X

# map an integer to a word
def word_for_id(integer, tokenizer):
	for word, index in tokenizer.word_index.items():
		if index == integer:
			return word
	return None

# generate target given source sequence
def predict_sequence(model, tokenizer, source):
	prediction = model.predict(source, verbose=0)[0]
	integers = [argmax(vector) for vector in prediction]
	target = list()
	for i in integers:
		word = word_for_id(i, tokenizer)
		if word is None:
			break
		target.append(word)
	return ' '.join(target)

# evaluate the skill of the model
def evaluate_model(model, tokenizer, sources, raw_dataset):
	actual, predicted = list(), list()
	for i, source in enumerate(sources):
		# translate encoded source text
		source = source.reshape((1, source.shape[0]))
		translation = predict_sequence(model, tokenizer, source)
		raw_src,raw_target = raw_dataset[i]
		print('src=[%s], target=[%s], predicted=[%s]' % (raw_src, raw_target, translation))
		actual.append([raw_target.split()])
		predicted.append(translation.split())

# load datasets
dataset = load_clean_sentences('dataset/spanish-english-both-80000.txt')
train = load_clean_sentences('dataset/spanish-english-train-80000.txt')
test = load_clean_sentences('dataset/spanish-english-test-80000.txt')
# prepare english tokenizer
eng_tokenizer = create_tokenizer(dataset[:, 0])
eng_vocab_size = len(eng_tokenizer.word_index) + 1
eng_length = max_length(dataset[:, 0])
# prepare german tokenizer
spa_tokenizer = create_tokenizer(dataset[:, 1])
spa_vocab_size = len(spa_tokenizer.word_index) + 1
spa_length = max_length(dataset[:, 1])
# prepare data
trainX = encode_sequences(eng_tokenizer, eng_length, train[:, 0])
testX = encode_sequences(eng_tokenizer, eng_length, test[:, 0])

# load complete model spanish english
model = load_model('Models\model_spa_eng_80000.h5')


train
test


In [2]:
print("The document count",eng_tokenizer.document_count)
print("The count of words",len(eng_tokenizer.word_counts))
#print("The word index",eng_tokenizer.word_index)
print("The document count",spa_tokenizer.document_count)
print("The count of words",len(spa_tokenizer.word_counts))

The document count 80000
The count of words 20905
The document count 80000
The count of words 11203


In [6]:
#save tokenizers to use in UI API
import pickle

# saving
with open('Tokenizers/spa_tokenizer_80000_eng_spa.pickle', 'wb') as handle:
    pickle.dump(spa_tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# loading
with open('Tokenizers/spa_tokenizer_80000_eng_spa.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)  
    print(len(tokenizer.word_index) + 1)

[[5, 24]]
11204


In [4]:
print('test')
evaluate_model(model, spa_tokenizer, testX, test)

test
src=[el dictador llego al poder hace quince anos], target=[the dictator came to power fifteen years ago], predicted=[the the ran the he he fifteen the]
src=[realmente quiero otra oportunidad], target=[i really want another chance], predicted=[i really want another chance]
src=[no tengo carro pero mi hermana si], target=[i have a car but my sister does], predicted=[i have a but but but sister]
src=[no pongas el vaso cerca del borde de la mesa], target=[put the glass near the edge of the table], predicted=[take the the of of the table table table]
src=[a tom le gusta ver television], target=[tom likes watching tv], predicted=[tom likes watching tv]
src=[subi de puntillas a mi dormitorio], target=[i went up to my bedroom on tiptoe], predicted=[i left out head side room]
src=[por que me preguntas esto], target=[why are you asking me this], predicted=[why did you ask for me]
src=[tom no debe ir a ningun lado], target=[tom have to go anywhere], predicted=[tom have be anywhere anywhere]


    We can see that some translations are really good, but others are not as good. We have to consider that even though we are using bidirectional layers, and it took really long to perform 100 epochs in training, the amount of data used is still small. More data would be necessary for the model to perform better.