In [1]:
import os
import subprocess
import torch
import re
import pickle
import time
os.chdir('../')

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import entropy

from IPython.display import HTML as html_print
from research.utils import load_vocabulary, Tokenizer, Aligner, RelaxedTargetField, OneHotEncoder, bleu
from research.scorer import Scorer
from research.greedy_optimizer import GreedyOptimizer
from research.beam_optimizer import BeamOptimizer
from research.continuous_optimizer import ContinuousOptimizer
from research.exponentiated_gradient_optimizer import ExponentiatedGradientOptimizer

In [2]:
vocab = load_vocabulary()
tokenizer = Tokenizer()

In [3]:
WMT_DIR = '/home/pma/wmt/'
german_path = WMT_DIR + 'test.de'
english_path = WMT_DIR + 'test.en'

def cstr(s, color='black'):
    return "<text style=color:{}>{}</text>".format(color, s)

def load_test_data(path):
    with open(path) as f:
        return [tokenizer.detokenize(line.split()) for line in f.readlines()]

def load_reference_data(path):
    with open(path) as f:
        return [line.strip() for line in f.readlines()]

class BeamTestResult(object):
    
    def __init__(self, translations, scores, bleus):
        self.translations = translations
        self.scores = scores
        self.bleus = bleus
    
    def save(self, filename):
        with open(filename, 'wb') as f:
            pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)
            
    
def beam_test(k=50, beamsize=15, temperature=1.):
    scorer = Scorer(temperature=temperature)
    english_sentences = load_test_data(english_path)
    references = load_reference_data(german_path)    
    i = 0
    bleus = []
    scores = []
    translations = []
    times = []
    bleu_score = 0.
    for english_sentence, reference in zip(english_sentences[:k], references):
        i += 1
        print('Sentence', i)
        print(english_sentence)
        english_tok = tokenizer.tokenize(english_sentence)
        optimizer = BeamOptimizer(english_sentence, beamsize, temperature=temperature)
        
        start_ts = time.time()
        tmp = optimizer.optimize()
        translation = tmp[0][0][:-1]
        end_ts = time.time()
        time_elapsed = end_ts - start_ts
        
        print(' '.join(translation))
        score = -scorer.score_tokenized_texts([english_tok], [translation], relaxed=False, normalize=True).item()
        obleu = bleu(reference, ' '.join(translation))
        print('dscore = ', score)
        print('bleu = ', obleu)
        print('time elapsed = ', time_elapsed, 's')
        print()
        
        bleu_score += obleu
        bleus += [obleu]
        scores += [score]
        translations += [translation]
        times += [time_elapsed]
        
    print('Beam size = ', beamsize, 'avg bleu = ', float(bleu_score) / k)
    res = BeamTestResult(translations, scores, bleus)
    res.save(f'beam_test_{k}_{beamsize}.pkl')
    return res

### Beam size = 1

In [4]:
beam_test(k=100, beamsize=1)

Sentence 1
28-Year-Old Chef Found Dead at San Francisco Mall
▁28 - Jahr - O ld ▁Chef ▁Found ▁Dead
dscore =  0.5151540637016296
bleu =  0.0
time elapsed =  0.5672750473022461 s

Sentence 2
A 28-year-old chef who had recently moved to San Francisco was found dead in the stairwell of a local mall this week.
▁Ein ▁28 - jährige r ▁Küchen chef , ▁der ▁kürz lich ▁nach ▁San ▁Francisco ▁ zog , ▁wurde ▁diese ▁Woche ▁im ▁Trepp en haus ▁einer ▁lokale n ▁Mal l ▁to t ▁auf gefunden .
dscore =  0.49215784668922424
bleu =  31.29
time elapsed =  2.7126717567443848 s

Sentence 3
But the victim's brother says he can't think of anyone who would want to hurt him, saying, "Things were finally going well for him."
▁Aber ▁der ▁Bruder ▁des ▁Opfer s ▁sagt , ▁er ▁könne ▁niemand en ▁denken , ▁der ▁ihn ▁verletz en ▁würde , ▁und ▁sagt : ▁" Es ▁ging ▁ihm ▁endlich ▁gut . "
dscore =  0.5355032086372375
bleu =  11.46
time elapsed =  2.7048144340515137 s

Sentence 4
The body found at the Westfield Mall Wednesday morning 

dscore =  0.5059648752212524
bleu =  0.0
time elapsed =  3.1731925010681152 s

Sentence 27
The question of whether she wants children makes her livid: "We women do not need to be married or have children to feel fulfilled," the Hollywood star says.
▁Die ▁Frage , ▁ob ▁sie ▁Kinder ▁haben ▁möchte , ▁ist ▁ihre ▁Lü ge : ▁" Wir ▁Frauen ▁brauchen ▁nicht ▁verhe irate t ▁zu ▁werden ▁oder ▁Kinder ▁zu ▁haben , ▁um ▁sich ▁erfüllt ▁zu ▁fühl en ", ▁sagt ▁der ▁Hollywood - Star .
dscore =  0.6018204689025879
bleu =  22.27
time elapsed =  3.826101303100586 s

Sentence 28
In her new film, "Mother's Day - love isn't child's play", Aniston stars as a single mother with two sons.
▁In ▁ihre m ▁neue n ▁Film ▁" M other ' s ▁Day ▁- ▁love ▁is n ' t ▁child ' s ▁play " ▁ schlägt ▁An ist on ▁als ▁eine ▁allein er ziehen de ▁Mutter ▁mit ▁zwei ▁Söhne n .
dscore =  0.39282190799713135
bleu =  54.6
time elapsed =  3.7147116661071777 s

Sentence 29
The film is released in Germany on 25 August.
▁Der ▁Film ▁wird ▁am ▁25. 

OSError: [Errno 12] Cannot allocate memory