In [3]:
import os
os.chdir('../')

from research.utils import load_vocabulary, Tokenizer
from research.scorer import Scorer
from research.greedy_optimizer import GreedyOptimizer

### Model loading

In [4]:
vocab = load_vocabulary()
tokenizer = Tokenizer()
scorer = Scorer()

### Translation scoring

In [5]:
english_sentence = 'I think that machine translation is very interesting subject.'
german_translation_from_google = 'Ich denke, dass maschinelle Übersetzung ein sehr interessantes Thema ist.'
score = scorer.score_texts(english_sentence, german_translation_from_google)
print('Translation scoring')
print('English sentence:', english_sentence)
print('Translation from google translate to score:', german_translation_from_google)
print('Log likehood of translation:', score[0])

Translation scoring
English sentence: I think that machine translation is very interesting subject.
Translation from google translate to score: Ich denke, dass maschinelle Übersetzung ein sehr interessantes Thema ist.
Log likehood of translation: -7.1561403


### Predicting of next word in translation

In [6]:
unfinished_translation = 'Ich denke, dass maschinelle Übersetzung ein sehr'
english_tokens = tokenizer.tokenize(english_sentence)
german_tokens = tokenizer.tokenize(unfinished_translation)
next_word_probs = scorer.next_word_probabilities([english_tokens], [german_tokens])
val, ind = next_word_probs.topk(3)
print('English sentence:', english_sentence)
print('Unfinished translation:', unfinished_translation )
print('Top 3 next tokens: ', vocab.itos[ind[0,0].view(-1)], vocab.itos[ind[0,1].view(-1)], vocab.itos[ind[0,2].view(-1)])

English sentence: I think that machine translation is very interesting subject.
Unfinished translation: Ich denke, dass maschinelle Übersetzung ein sehr
Top 3 next tokens:  ▁interessante ▁interessant ▁Interessant


### Greedy optimization

In [12]:
english_tokens = tokenizer.tokenize(english_sentence)
optimizer = GreedyOptimizer(english_sentence)
german_tokens = optimizer.optimize()[:-1]
print('English sentence:', english_sentence)
print('Translated tokens', ' '.join(german_tokens))
print('Log likehood of translation:', scorer.score_tokenized_texts([english_tokens], [german_tokens])[0])

English sentence: I think that machine translation is very interesting subject.
Translated tokens ▁Ich ▁denke , ▁dass ▁die ▁ maschine lle ▁Übersetzung ▁sehr ▁interessant ▁ist .
Log likehood of translation: -6.6289263
