# English - Italian Embeddings (3 versions)
`w266 Final Project: Crosslingual Word Embeddings`

Instead of traning on randomly substituted words, here we'll choose the translation that is closest to the context embedding vector.

# Notebook Setup

In [4]:
# general imports
from __future__ import print_function
import time
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

# tell matplotlib not to open a new window
%matplotlib inline

# autoreload modules
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


__Base Paths__

In [79]:
BASE = '/home/mmillervedam/Data'
PROJ = '/home/mmillervedam/ProjectRepo'
GTT_BASE = PROJ + '/BaselineModels/data/ground_truth_translations/'
#PROJ = '/Users/mona/OneDrive/repos/final_proj/W266-Fall-2017-Final-Project'

# directory to save pickled embeddings
SAVE_TO = BASE + '/embeddings'

__Globals__ - _the parameters below fully determine all 3 models in this NB_

In [116]:
# Data
LANG = ('en','it')
FULL_TEXT = "/home/miwamoto/en_it_shuf.txt"
VOCAB_INDEX = BASE + '/vocab/en_it_small.pkl'
PANLEX = BASE + '/panlex/en_it_dict.pkl'
GTT_PATH = GTT_BASE + "%s-%s-clean.csv" % (LANG[0], LANG[1])

# Model
EMBEDDING_SIZE = 200

# Training
nBATCHES = 50000 # <<< 1 epoch with our 1 million sentence corpus
BATCH_SIZE = 48
WINDOW_SIZE = 4
MAX_EPOCHS = 5 # fail safe
ALPHA = 0.5 # authors use a much smaller learning rate but train longer

# Load Data

In [102]:
from parsing import Corpus, BilingualVocabulary, batch_generator, get_common_words

In [64]:
# load corpus
raw_data = Corpus(FULL_TEXT)

In [65]:
# load panlex dictionary
with open(PANLEX,'rb') as f:
    translations = pickle.load(f)

In [66]:
# load vocabulary
vocab = BilingualVocabulary([], languages = LANG)
with open(VOCAB_INDEX,'rb') as f:
    vocab.load_from_index(pickle.load(f))

In [67]:
# confirmations
print('... loaded %s panlex translations'%(len(translations)))
print('... loaded %s word %s vocabulary'%(vocab.size,vocab.language))

... loaded 525091 panlex translations
... loaded 20003 word ('en', 'it') vocabulary


In [77]:
# Validation Words (for training printout)
TEST_WORDS = vocab.to_ids(['en_the','en_last', 'it_si', 'it_suo'])
print('... test word ids:', TEST_WORDS)

... test word ids: [3, 239, 10020, 10040]


In [117]:
# Ground Truth Translations
GTT_DF = pd.read_csv(GTT_PATH, names = [LANG[0], LANG[1]], sep=' ', header=None)
print('... loaded %s ground truth translations.'%(len(GTT_DF)))

... loaded 103613 ground truth translations.


In [118]:
a = get_common_words(vocab)

In [119]:
a

{'en_comics',
 'en_basel',
 'en_counterpart',
 'it_migliora',
 'en_things',
 'en_attested',
 'en_karnataka',
 'it_migliore',
 'en_expenses',
 'it_ritmica',
 'en_masterpiece',
 'en_state',
 'en_distortion',
 'en_killers',
 'it_platone',
 'en_disk',
 'it_lentamente',
 'it_promettente',
 'it_consolidare',
 'en_bengali',
 'en_interim',
 'en_inevitable',
 'en_examination',
 'en_ethics',
 'it_precisione',
 'it_foto',
 'en_extended',
 'it_queen',
 'en_southwest',
 'it_sonata',
 'en_wake',
 'en_crash',
 'en_transformation',
 'en_exact',
 'it_ginevra',
 'it_sino',
 'it_apre',
 'en_get',
 'en_oceans',
 'en_gear',
 'it_spartiacque',
 'it_consumo',
 'en_complex',
 'it_vegetazione',
 'it_limitare',
 'it_cattura',
 'en_storms',
 'en_ran',
 'en_czechoslovakia',
 'en_saint',
 'en_bmw',
 'en_develop',
 'en_timing',
 'en_initiation',
 'it_sorprendente',
 'it_cuori',
 'it_improvviso',
 'en_bottom',
 'it_improvvisa',
 'en_mall',
 'it_cuore',
 'it_mostra',
 'en_stefan',
 'it_space',
 'it_morente',
 'en_beg

In [120]:
# Evaluation Words (for reporting recall)
eval_words = [w for w in get_common_words(vocab) if w.startswith(LANG[1])]
EVAL_IDS = vocab.to_ids(eval_words)
print('... loaded %s evaluation words.' % (len(EVAL_IDS)))

... loaded 8516 evaluation words.


# Method 1: Random Translations

### Initialize the model

In [39]:
from models import BiW2V_random

# create model
model_1 = BiW2V_random(bilingual_dict = translations,
                       vocab = vocab, H = EMBEDDING_SIZE)

# intialize TF graphs
model_1.BuildCoreGraph()
model_1.BuildTrainingGraph()
model_1.BuildValidationGraph()

... TF graph created for BiW2V model.
... TF graph created for BiW2V training.
... TF graph created for BiW2V validation.


### Training

In [40]:
# fresh data generator
DATA_GENERATOR = batch_generator(raw_data, vocab, BATCH_SIZE, WINDOW_SIZE, MAX_EPOCHS)

In [41]:
# train
start = time.time()
model_1.train(nBATCHES, DATA_GENERATOR, TEST_WORDS, learning_rate = ALPHA)
tot = (time.time() - start)
print('... {} batches trained in {} seconds'.format(nBATCHES, tot))

... Model Initialized
	 <tf.Variable 'Embedding_Layer/ContextEmbeddings:0' shape=(20003, 200) dtype=float32_ref>
	 <tf.Variable 'Hidden_Layer/WordEmbeddings:0' shape=(20003, 200) dtype=float32_ref>
	 <tf.Variable 'Hidden_Layer/b:0' shape=(20003,) dtype=float32_ref>
... Starting Training
... STEP 0 : Average Loss : 0.000264484786987
   [en_the] closest:  it_cristina, en_commented, it_sacerdotale, it_amiga, it_vantaggio, en_muhammad, it_gemello, it_esistente,
   [en_last] closest:  en_n, en_ridge, it_kg, it_cancellare, it_professionistico, en_quad, it_consentire, it_arti,
   [it_si] closest:  it_compiti, it_limitazione, en_sculptor, en_switching, en_solidarity, en_hereford, it_u, it_pesante,
   [it_suo] closest:  it_abitanti, it_presentarsi, en_convert, en_regulate, it_notazione, en_son-in-law, it_compresa, it_girone,
... STEP 50000 : Average Loss : 4.10927392767
... STEP 100000 : Average Loss : 3.85516811703
   [en_the] closest:  en_a, en_its, en_an, en_one, en_his, it_la, it_riunirsi, 

### Save the Embeddings.

In [43]:
# context 
filename = SAVE_TO + '/en_it_rand_500K_V_dec19.pkl'
with open(filename, 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(model_1.context_embeddings, f, pickle.HIGHEST_PROTOCOL)

# word
filename = SAVE_TO + '/en_it_rand_500K_U_dec19.pkl'
with open(filename, 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(model_1.word_embeddings, f, pickle.HIGHEST_PROTOCOL)

### Evaluation

In [101]:
model_1.evaluate(source_lang = LANG[0], 
                 target_lang = LANG[1], 
                 gtt = GTT_DF, 
                 sample = EVAL_IDS,
                 top_k = 5,
                 verbose = True)

... Model Initialized
sim shape (8516, 20003)
vocab size 20003
word it_migliora
half of vocab 10003
word it_migliore
half of vocab 10004
word it_ritmica
half of vocab 10005
word it_platone
half of vocab 10006
word it_lentamente
half of vocab 10007
word it_promettente
half of vocab 10008
word it_consolidare
half of vocab 10009
word it_precisione
half of vocab 10010
word it_foto
half of vocab 10011
word it_queen
half of vocab 10012
word it_sonata
half of vocab 10013
word it_ginevra
half of vocab 10014
word it_sino
half of vocab 10015
word it_apre
half of vocab 10016
word it_spartiacque
half of vocab 10017
word it_consumo
half of vocab 10018
word it_vegetazione
half of vocab 10019
word it_limitare
half of vocab 10020
word it_cattura
half of vocab 10021
word it_sorprendente
half of vocab 10022
word it_cuori
half of vocab 10023
word it_improvviso
half of vocab 10024
word it_improvvisa
half of vocab 10025
word it_cuore
half of vocab 10026
word it_mostra
half of vocab 10027
word it_space
half

word it_suicida
half of vocab 10230
word it_dos
half of vocab 10231
word it_triassico
half of vocab 10232
word it_savona
half of vocab 10233
word it_anale
half of vocab 10234
word it_cina
half of vocab 10235
word it_costi
half of vocab 10236
word it_guardare
half of vocab 10237
word it_master
half of vocab 10238
word it_osservatori
half of vocab 10239
word it_veneti
half of vocab 10240
word it_veneto
half of vocab 10241
word it_stabile
half of vocab 10242
word it_proprietà
half of vocab 10243
word it_lezione
half of vocab 10244
word it_opzione
half of vocab 10245
word it_journal
half of vocab 10246
word it_clan
half of vocab 10247
word it_dotazione
half of vocab 10248
word it_denti
half of vocab 10249
word it_clay
half of vocab 10250
word it_settimanale
half of vocab 10251
word it_video
half of vocab 10252
word it_lubiana
half of vocab 10253
word it_dente
half of vocab 10254
word it_justice
half of vocab 10255
word it_rettangolare
half of vocab 10256
word it_saccheggio
half of vocab 10

word it_inizi
half of vocab 10457
word it_predecessore
half of vocab 10458
word it_incontri
half of vocab 10459
word it_perfetta
half of vocab 10460
word it_riunito
half of vocab 10461
word it_incontra
half of vocab 10462
word it_precedenza
half of vocab 10463
word it_emerso
half of vocab 10464
word it_marginale
half of vocab 10465
word it_gola
half of vocab 10466
word it_circolo
half of vocab 10467
word it_riposo
half of vocab 10468
word it_sorella
half of vocab 10469
word it_sorelle
half of vocab 10470
word it_carica
half of vocab 10471
word it_lirico
half of vocab 10472
word it_conosciuto
half of vocab 10473
word it_elevato
half of vocab 10474
word it_flashback
half of vocab 10475
word it_ultimi
half of vocab 10476
word it_scorrere
half of vocab 10477
word it_parere
half of vocab 10478
word it_nazionale
half of vocab 10479
word it_confusione
half of vocab 10480
word it_battuta
half of vocab 10481
word it_temperamento
half of vocab 10482
word it_edizione
half of vocab 10483
word it_f

word it_notizie
half of vocab 10681
word it_maschi
half of vocab 10682
word it_rimanenti
half of vocab 10683
word it_teatro
half of vocab 10684
word it_messico
half of vocab 10685
word it_douglas
half of vocab 10686
word it_rimanente
half of vocab 10687
word it_caserma
half of vocab 10688
word it_esistenti
half of vocab 10689
word it_decisiva
half of vocab 10690
word it_albert
half of vocab 10691
word it_ungheresi
half of vocab 10692
word it_elfi
half of vocab 10693
word it_morso
half of vocab 10694
word it_ungherese
half of vocab 10695
word it_bicchiere
half of vocab 10696
word it_visuale
half of vocab 10697
word it_scritto
half of vocab 10698
word it_verona
half of vocab 10699
word it_piave
half of vocab 10700
word it_motocicletta
half of vocab 10701
word it_predatori
half of vocab 10702
word it_blues
half of vocab 10703
word it_permanenza
half of vocab 10704
word it_jordan
half of vocab 10705
word it_radici
half of vocab 10706
word it_impegnato
half of vocab 10707
word it_poesia
hal

word it_one
half of vocab 10911
word it_vetro
half of vocab 10912
word it_onu
half of vocab 10913
word it_geologia
half of vocab 10914
word it_improvvisamente
half of vocab 10915
word it_rappresaglia
half of vocab 10916
word it_quinta
half of vocab 10917
word it_stato
half of vocab 10918
word it_manifestare
half of vocab 10919
word it_mite
half of vocab 10920
word it_mito
half of vocab 10921
word it_amante
half of vocab 10922
word it_hobbit
half of vocab 10923
word it_tragedia
half of vocab 10924
word it_vestibolo
half of vocab 10925
word it_vecchi
half of vocab 10926
word it_minerali
half of vocab 10927
word it_grosso
half of vocab 10928
word it_hamilton
half of vocab 10929
word it_adattamento
half of vocab 10930
word it_sita
half of vocab 10931
word it_camillo
half of vocab 10932
word it_sith
half of vocab 10933
word it_percorso
half of vocab 10934
word it_dominare
half of vocab 10935
word it_compleanno
half of vocab 10936
word it_perugia
half of vocab 10937
word it_ponte
half of voc

word it_sciogliere
half of vocab 11139
word it_religiosa
half of vocab 11140
word it_religioso
half of vocab 11141
word it_kenya
half of vocab 11142
word it_prigionia
half of vocab 11143
word it_campus
half of vocab 11144
word it_evita
half of vocab 11145
word it_christine
half of vocab 11146
word it_mandato
half of vocab 11147
word it_faida
half of vocab 11148
word it_rivolta
half of vocab 11149
word it_provocato
half of vocab 11150
word it_soffrire
half of vocab 11151
word it_leggendario
half of vocab 11152
word it_altamente
half of vocab 11153
word it_manto
half of vocab 11154
word it_somme
half of vocab 11155
word it_programmazione
half of vocab 11156
word it_detenuto
half of vocab 11157
word it_diploma
half of vocab 11158
word it_passaggio
half of vocab 11159
word it_riferimento
half of vocab 11160
word it_pinna
half of vocab 11161
word it_altrimenti
half of vocab 11162
word it_pinne
half of vocab 11163
word it_biblioteche
half of vocab 11164
word it_cantiere
half of vocab 11165
w

word it_roma
half of vocab 11364
word it_conca
half of vocab 11365
word it_concezione
half of vocab 11366
word it_bacini
half of vocab 11367
word it_bacino
half of vocab 11368
word it_invaso
half of vocab 11369
word it_nasconde
half of vocab 11370
word it_fino
half of vocab 11371
word it_match
half of vocab 11372
word it_fossili
half of vocab 11373
word it_deceduto
half of vocab 11374
word it_atti
half of vocab 11375
word it_atto
half of vocab 11376
word it_inverno
half of vocab 11377
word it_turchia
half of vocab 11378
word it_vocale
half of vocab 11379
word it_farmaci
half of vocab 11380
word it_museo
half of vocab 11381
word it_prodotto
half of vocab 11382
word it_weber
half of vocab 11383
word it_prodotti
half of vocab 11384
word it_rare
half of vocab 11385
word it_lisbona
half of vocab 11386
word it_ristretto
half of vocab 11387
word it_cartuccia
half of vocab 11388
word it_asilo
half of vocab 11389
word it_aeroplani
half of vocab 11390
word it_labirinto
half of vocab 11391
word i

word it_splendido
half of vocab 11592
word it_prestato
half of vocab 11593
word it_roditore
half of vocab 11594
word it_asburgo
half of vocab 11595
word it_latte
half of vocab 11596
word it_testimone
half of vocab 11597
word it_muro
half of vocab 11598
word it_muri
half of vocab 11599
word it_record
half of vocab 11600
word it_tipologia
half of vocab 11601
word it_formaggio
half of vocab 11602
word it_mura
half of vocab 11603
word it_studenti
half of vocab 11604
word it_cacciare
half of vocab 11605
word it_possesso
half of vocab 11606
word it_barletta
half of vocab 11607
word it_citazione
half of vocab 11608
word it_proclamazione
half of vocab 11609
word it_picco
half of vocab 11610
word it_quadri
half of vocab 11611
word it_passanti
half of vocab 11612
word it_dozzina
half of vocab 11613
word it_strisce
half of vocab 11614
word it_ascesa
half of vocab 11615
word it_crescere
half of vocab 11616
word it_accettare
half of vocab 11617
word it_banchetto
half of vocab 11618
word it_sovente


word it_sconvolto
half of vocab 11816
word it_secondario
half of vocab 11817
word it_cattività
half of vocab 11818
word it_dodici
half of vocab 11819
word it_inseguito
half of vocab 11820
word it_blood
half of vocab 11821
word it_ampiezza
half of vocab 11822
word it_bmw
half of vocab 11823
word it_secco
half of vocab 11824
word it_sacro
half of vocab 11825
word it_messaggio
half of vocab 11826
word it_impossibile
half of vocab 11827
word it_apparato
half of vocab 11828
word it_concorrente
half of vocab 11829
word it_trilogia
half of vocab 11830
word it_frase
half of vocab 11831
word it_calcolato
half of vocab 11832
word it_cipolla
half of vocab 11833
word it_antiochia
half of vocab 11834
word it_luciano
half of vocab 11835
word it_sperimentazione
half of vocab 11836
word it_mela
half of vocab 11837
word it_obiettivi
half of vocab 11838
word it_sepolta
half of vocab 11839
word it_realismo
half of vocab 11840
word it_teorica
half of vocab 11841
word it_fase
half of vocab 11842
word it_go

word it_boy
half of vocab 12041
word it_anteriore
half of vocab 12042
word it_comunione
half of vocab 12043
word it_ace
half of vocab 12044
word it_ghana
half of vocab 12045
word it_localizzazione
half of vocab 12046
word it_conservatore
half of vocab 12047
word it_gilda
half of vocab 12048
word it_funzionario
half of vocab 12049
word it_individuo
half of vocab 12050
word it_saturn
half of vocab 12051
word it_erroneamente
half of vocab 12052
word it_ludwig
half of vocab 12053
word it_meno
half of vocab 12054
word it_avere
half of vocab 12055
word it_riscatto
half of vocab 12056
word it_marte
half of vocab 12057
word it_brutta
half of vocab 12058
word it_razziale
half of vocab 12059
word it_brutto
half of vocab 12060
word it_colonna
half of vocab 12061
word it_promontorio
half of vocab 12062
word it_borghese
half of vocab 12063
word it_assistito
half of vocab 12064
word it_lussemburgo
half of vocab 12065
word it_animali
half of vocab 12066
word it_tracce
half of vocab 12067
word it_cerc

word it_mono
half of vocab 12266
word it_orecchie
half of vocab 12267
word it_leeds
half of vocab 12268
word it_lucca
half of vocab 12269
word it_generazione
half of vocab 12270
word it_estratti
half of vocab 12271
word it_passante
half of vocab 12272
word it_tubo
half of vocab 12273
word it_min
half of vocab 12274
word it_mio
half of vocab 12275
word it_precedente
half of vocab 12276
word it_toronto
half of vocab 12277
word it_adolescente
half of vocab 12278
word it_mutato
half of vocab 12279
word it_precedenti
half of vocab 12280
word it_mia
half of vocab 12281
word it_cuba
half of vocab 12282
word it_sterminio
half of vocab 12283
word it_tubi
half of vocab 12284
word it_cubo
half of vocab 12285
word it_franchigia
half of vocab 12286
word it_definitivamente
half of vocab 12287
word it_reverendo
half of vocab 12288
word it_coperto
half of vocab 12289
word it_ferma
half of vocab 12290
word it_fermo
half of vocab 12291
word it_personal
half of vocab 12292
word it_stabilimento
half of vo

word it_arcangelo
half of vocab 12494
word it_aristocratica
half of vocab 12495
word it_cerchi
half of vocab 12496
word it_cabriolet
half of vocab 12497
word it_floyd
half of vocab 12498
word it_disponibile
half of vocab 12499
word it_ostilità
half of vocab 12500
word it_certificazione
half of vocab 12501
word it_organizzatore
half of vocab 12502
word it_verificare
half of vocab 12503
word it_only
half of vocab 12504
word it_preludio
half of vocab 12505
word it_general
half of vocab 12506
word it_inviata
half of vocab 12507
word it_riflessione
half of vocab 12508
word it_inviato
half of vocab 12509
word it_damasco
half of vocab 12510
word it_cinetica
half of vocab 12511
word it_rifornimento
half of vocab 12512
word it_rifornimenti
half of vocab 12513
word it_schiavo
half of vocab 12514
word it_indicare
half of vocab 12515
word it_maledizione
half of vocab 12516
word it_raccolta
half of vocab 12517
word it_alare
half of vocab 12518
word it_inserito
half of vocab 12519
word it_raccolto
h

word it_barrett
half of vocab 12718
word it_maurizio
half of vocab 12719
word it_estrema
half of vocab 12720
word it_lobo
half of vocab 12721
word it_timone
half of vocab 12722
word it_estremi
half of vocab 12723
word it_elisabetta
half of vocab 12724
word it_lotus
half of vocab 12725
word it_estremo
half of vocab 12726
word it_connecticut
half of vocab 12727
word it_sassoni
half of vocab 12728
word it_annata
half of vocab 12729
word it_sassone
half of vocab 12730
word it_promosso
half of vocab 12731
word it_attuale
half of vocab 12732
word it_paraguay
half of vocab 12733
word it_scienze
half of vocab 12734
word it_volontariamente
half of vocab 12735
word it_scienza
half of vocab 12736
word it_funk
half of vocab 12737
word it_chitarra
half of vocab 12738
word it_monetazione
half of vocab 12739
word it_paleolitico
half of vocab 12740
word it_ammettere
half of vocab 12741
word it_sufficientemente
half of vocab 12742
word it_bolivia
half of vocab 12743
word it_pronto
half of vocab 12744
w

word it_enzimi
half of vocab 12944
word it_enzima
half of vocab 12945
word it_tendenze
half of vocab 12946
word it_citroën
half of vocab 12947
word it_stanley
half of vocab 12948
word it_seduta
half of vocab 12949
word it_seduto
half of vocab 12950
word it_accademica
half of vocab 12951
word it_requiem
half of vocab 12952
word it_castiglione
half of vocab 12953
word it_massima
half of vocab 12954
word it_cap
half of vocab 12955
word it_musa
half of vocab 12956
word it_gilbert
half of vocab 12957
word it_cellulare
half of vocab 12958
word it_brianza
half of vocab 12959
word it_settembre
half of vocab 12960
word it_north
half of vocab 12961
word it_prendere
half of vocab 12962
word it_rifiuta
half of vocab 12963
word it_norfolk
half of vocab 12964
word it_prof
half of vocab 12965
word it_cricket
half of vocab 12966
word it_prefazione
half of vocab 12967
word it_gangster
half of vocab 12968
word it_sanscrito
half of vocab 12969
word it_dietro
half of vocab 12970
word it_prevalente
half of

word it_deriva
half of vocab 13172
word it_vite
half of vocab 13173
word it_segretario
half of vocab 13174
word it_apollo
half of vocab 13175
word it_chicago
half of vocab 13176
word it_einstein
half of vocab 13177
word it_sottogenere
half of vocab 13178
word it_martina
half of vocab 13179
word it_partner
half of vocab 13180
word it_simpatia
half of vocab 13181
word it_quiz
half of vocab 13182
word it_quattrocento
half of vocab 13183
word it_terapia
half of vocab 13184
word it_stretto
half of vocab 13185
word it_vicario
half of vocab 13186
word it_marcatore
half of vocab 13187
word it_convivenza
half of vocab 13188
word it_paese
half of vocab 13189
word it_definitivo
half of vocab 13190
word it_sopravvivenza
half of vocab 13191
word it_specialmente
half of vocab 13192
word it_hart
half of vocab 13193
word it_hans
half of vocab 13194
word it_cobra
half of vocab 13195
word it_swing
half of vocab 13196
word it_tono
half of vocab 13197
word it_diventare
half of vocab 13198
word it_pari
hal

word it_rovine
half of vocab 13396
word it_sacramento
half of vocab 13397
word it_islamica
half of vocab 13398
word it_miseria
half of vocab 13399
word it_mantenimento
half of vocab 13400
word it_corre
half of vocab 13401
word it_bracci
half of vocab 13402
word it_ricchezze
half of vocab 13403
word it_sogno
half of vocab 13404
word it_sogna
half of vocab 13405
word it_terminato
half of vocab 13406
word it_villaggi
half of vocab 13407
word it_precipitare
half of vocab 13408
word it_destra
half of vocab 13409
word it_meriti
half of vocab 13410
word it_rendimento
half of vocab 13411
word it_ossidazione
half of vocab 13412
word it_catalogna
half of vocab 13413
word it_protocolli
half of vocab 13414
word it_apparentemente
half of vocab 13415
word it_riconquistare
half of vocab 13416
word it_governo
half of vocab 13417
word it_nasa
half of vocab 13418
word it_alimentazione
half of vocab 13419
word it_leone
half of vocab 13420
word it_demografia
half of vocab 13421
word it_andato
half of voca

word it_praterie
half of vocab 13621
word it_bar
half of vocab 13622
word it_infinite
half of vocab 13623
word it_principessa
half of vocab 13624
word it_infinito
half of vocab 13625
word it_note
half of vocab 13626
word it_razzismo
half of vocab 13627
word it_metalli
half of vocab 13628
word it_metallo
half of vocab 13629
word it_carmelo
half of vocab 13630
word it_benzina
half of vocab 13631
word it_sostentamento
half of vocab 13632
word it_austin
half of vocab 13633
word it_ammissione
half of vocab 13634
word it_successore
half of vocab 13635
word it_divisi
half of vocab 13636
word it_vendetta
half of vocab 13637
word it_alunni
half of vocab 13638
word it_faith
half of vocab 13639
word it_delibera
half of vocab 13640
word it_belgi
half of vocab 13641
word it_torrenti
half of vocab 13642
word it_andando
half of vocab 13643
word it_avvistamento
half of vocab 13644
word it_mtv
half of vocab 13645
word it_segretamente
half of vocab 13646
word it_bobby
half of vocab 13647
word it_disegna

word it_prua
half of vocab 13849
word it_coste
half of vocab 13850
word it_costa
half of vocab 13851
word it_solito
half of vocab 13852
word it_imposta
half of vocab 13853
word it_leggermente
half of vocab 13854
word it_mini
half of vocab 13855
word it_imposte
half of vocab 13856
word it_vesuvio
half of vocab 13857
word it_spazi
half of vocab 13858
word it_pantaloni
half of vocab 13859
word it_imposto
half of vocab 13860
word it_giudicare
half of vocab 13861
word it_sedicesimo
half of vocab 13862
word it_mistica
half of vocab 13863
word it_troia
half of vocab 13864
word it_composizione
half of vocab 13865
word it_colorado
half of vocab 13866
word it_vogue
half of vocab 13867
word it_onda
half of vocab 13868
word it_vuoto
half of vocab 13869
word it_monster
half of vocab 13870
word it_organismo
half of vocab 13871
word it_promessa
half of vocab 13872
word it_metropoli
half of vocab 13873
word it_vocabolario
half of vocab 13874
word it_nord
half of vocab 13875
word it_cittadini
half of v

word it_fatto
half of vocab 14074
word it_avanzare
half of vocab 14075
word it_cooperazione
half of vocab 14076
word it_promozionale
half of vocab 14077
word it_produttiva
half of vocab 14078
word it_generi
half of vocab 14079
word it_dallas
half of vocab 14080
word it_produttivo
half of vocab 14081
word it_rumore
half of vocab 14082
word it_lesione
half of vocab 14083
word it_lesioni
half of vocab 14084
word it_calcestruzzo
half of vocab 14085
word it_doppi
half of vocab 14086
word it_troupe
half of vocab 14087
word it_pisano
half of vocab 14088
word it_percentuale
half of vocab 14089
word it_disperato
half of vocab 14090
word it_dom
half of vocab 14091
word it_guide
half of vocab 14092
word it_introdurre
half of vocab 14093
word it_guida
half of vocab 14094
word it_guido
half of vocab 14095
word it_disturbo
half of vocab 14096
word it_custodia
half of vocab 14097
word it_scudetto
half of vocab 14098
word it_bell
half of vocab 14099
word it_vandali
half of vocab 14100
word it_maiorca


word it_residente
half of vocab 14302
word it_autorità
half of vocab 14303
word it_criminalità
half of vocab 14304
word it_marcia
half of vocab 14305
word it_fedelmente
half of vocab 14306
word it_cambio
half of vocab 14307
word it_soldi
half of vocab 14308
word it_dottrina
half of vocab 14309
word it_idraulica
half of vocab 14310
word it_dottrine
half of vocab 14311
word it_idraulico
half of vocab 14312
word it_marzo
half of vocab 14313
word it_prevedere
half of vocab 14314
word it_sufficiente
half of vocab 14315
word it_violentemente
half of vocab 14316
word it_imparato
half of vocab 14317
word it_caccia
half of vocab 14318
word it_telefonia
half of vocab 14319
word it_carte
half of vocab 14320
word it_scarso
half of vocab 14321
word it_carta
half of vocab 14322
word it_palla
half of vocab 14323
word it_gigi
half of vocab 14324
word it_infine
half of vocab 14325
word it_grave
half of vocab 14326
word it_bestia
half of vocab 14327
word it_genio
half of vocab 14328
word it_diversità
ha

word it_brescia
half of vocab 14526
word it_coppia
half of vocab 14527
word it_venerazione
half of vocab 14528
word it_budget
half of vocab 14529
word it_osso
half of vocab 14530
word it_élite
half of vocab 14531
word it_rumeno
half of vocab 14532
word it_anticarro
half of vocab 14533
word it_particolarità
half of vocab 14534
word it_vari
half of vocab 14535
word it_comunista
half of vocab 14536
word it_fertilità
half of vocab 14537
word it_omosessuali
half of vocab 14538
word it_scomparsa
half of vocab 14539
word it_inevitabile
half of vocab 14540
word it_scorrimento
half of vocab 14541
word it_franz
half of vocab 14542
word it_pratico
half of vocab 14543
word it_frank
half of vocab 14544
word it_padrona
half of vocab 14545
word it_legname
half of vocab 14546
word it_evento
half of vocab 14547
word it_calabria
half of vocab 14548
word it_pratica
half of vocab 14549
word it_progressiva
half of vocab 14550
word it_miniera
half of vocab 14551
word it_recente
half of vocab 14552
word it_m

word it_preghiere
half of vocab 14752
word it_preghiera
half of vocab 14753
word it_ionico
half of vocab 14754
word it_umili
half of vocab 14755
word it_capire
half of vocab 14756
word it_volendo
half of vocab 14757
word it_vienna
half of vocab 14758
word it_semplificato
half of vocab 14759
word it_danzica
half of vocab 14760
word it_comandata
half of vocab 14761
word it_chiarezza
half of vocab 14762
word it_strutturato
half of vocab 14763
word it_urss
half of vocab 14764
word it_fortificazione
half of vocab 14765
word it_linea
half of vocab 14766
word it_seguito
half of vocab 14767
word it_transetto
half of vocab 14768
word it_committente
half of vocab 14769
word it_pasto
half of vocab 14770
word it_intero
half of vocab 14771
word it_elettore
half of vocab 14772
word it_marittima
half of vocab 14773
word it_arcipelago
half of vocab 14774
word it_talento
half of vocab 14775
word it_angeles
half of vocab 14776
word it_birmingham
half of vocab 14777
word it_radicale
half of vocab 14778
w

word it_lucia
half of vocab 14977
word it_partecipato
half of vocab 14978
word it_guarigione
half of vocab 14979
word it_eagle
half of vocab 14980
word it_porti
half of vocab 14981
word it_matera
half of vocab 14982
word it_porto
half of vocab 14983
word it_porta
half of vocab 14984
word it_intera
half of vocab 14985
word it_noto
half of vocab 14986
word it_porte
half of vocab 14987
word it_giornata
half of vocab 14988
word it_fungo
half of vocab 14989
word it_getto
half of vocab 14990
word it_sinodo
half of vocab 14991
word it_beato
half of vocab 14992
word it_sottosegretario
half of vocab 14993
word it_superman
half of vocab 14994
word it_regola
half of vocab 14995
word it_minimo
half of vocab 14996
word it_educazione
half of vocab 14997
word it_basilea
half of vocab 14998
word it_servita
half of vocab 14999
word it_promotore
half of vocab 15000
word it_belluno
half of vocab 15001
word it_immigrati
half of vocab 15002
word it_crisi
half of vocab 15003
word it_temporale
half of vocab 

word it_caos
half of vocab 15205
word it_common
half of vocab 15206
word it_meeting
half of vocab 15207
word it_droga
half of vocab 15208
word it_pink
half of vocab 15209
word it_dimenticato
half of vocab 15210
word it_misurazione
half of vocab 15211
word it_mystery
half of vocab 15212
word it_aumentare
half of vocab 15213
word it_sky
half of vocab 15214
word it_antagonisti
half of vocab 15215
word it_parker
half of vocab 15216
word it_gian
half of vocab 15217
word it_ospite
half of vocab 15218
word it_amministratore
half of vocab 15219
word it_psicoanalisi
half of vocab 15220
word it_osseo
half of vocab 15221
word it_perso
half of vocab 15222
word it_casino
half of vocab 15223
word it_persa
half of vocab 15224
word it_oreste
half of vocab 15225
word it_carico
half of vocab 15226
word it_lunghezza
half of vocab 15227
word it_turisti
half of vocab 15228
word it_sponsorizzazione
half of vocab 15229
word it_restante
half of vocab 15230
word it_ercole
half of vocab 15231
word it_unione
hal

word it_lena
half of vocab 15431
word it_normanni
half of vocab 15432
word it_problema
half of vocab 15433
word it_pietà
half of vocab 15434
word it_vivo
half of vocab 15435
word it_arenaria
half of vocab 15436
word it_sostanza
half of vocab 15437
word it_idrogeno
half of vocab 15438
word it_prime
half of vocab 15439
word it_sostanze
half of vocab 15440
word it_acciaio
half of vocab 15441
word it_postale
half of vocab 15442
word it_sindacati
half of vocab 15443
word it_garanzia
half of vocab 15444
word it_tasso
half of vocab 15445
word it_mentore
half of vocab 15446
word it_sigma
half of vocab 15447
word it_scarica
half of vocab 15448
word it_difetto
half of vocab 15449
word it_difetti
half of vocab 15450
word it_nomenclatura
half of vocab 15451
word it_scarico
half of vocab 15452
word it_blog
half of vocab 15453
word it_cuoio
half of vocab 15454
word it_chimici
half of vocab 15455
word it_jeremy
half of vocab 15456
word it_mary
half of vocab 15457
word it_connesso
half of vocab 15458


word it_acquedotto
half of vocab 15660
word it_esclusivo
half of vocab 15661
word it_tastiera
half of vocab 15662
word it_lan
half of vocab 15663
word it_demetrio
half of vocab 15664
word it_satira
half of vocab 15665
word it_bivio
half of vocab 15666
word it_hannover
half of vocab 15667
word it_forno
half of vocab 15668
word it_esercito
half of vocab 15669
word it_matrice
half of vocab 15670
word it_volta
half of vocab 15671
word it_giapponese
half of vocab 15672
word it_matrici
half of vocab 15673
word it_celebrazione
half of vocab 15674
word it_cucciolo
half of vocab 15675
word it_rovina
half of vocab 15676
word it_controllato
half of vocab 15677
word it_james
half of vocab 15678
word it_blu
half of vocab 15679
word it_illustrato
half of vocab 15680
word it_jedi
half of vocab 15681
word it_adulti
half of vocab 15682
word it_modena
half of vocab 15683
word it_svezia
half of vocab 15684
word it_licenziato
half of vocab 15685
word it_propulsione
half of vocab 15686
word it_romantica
ha

word it_sentimentale
half of vocab 15888
word it_partecipare
half of vocab 15889
word it_elena
half of vocab 15890
word it_servire
half of vocab 15891
word it_battista
half of vocab 15892
word it_collina
half of vocab 15893
word it_tribù
half of vocab 15894
word it_comico
half of vocab 15895
word it_malata
half of vocab 15896
word it_leoni
half of vocab 15897
word it_filologia
half of vocab 15898
word it_ostile
half of vocab 15899
word it_segna
half of vocab 15900
word it_segno
half of vocab 15901
word it_registrato
half of vocab 15902
word it_centro
half of vocab 15903
word it_edge
half of vocab 15904
word it_mozart
half of vocab 15905
word it_guadagni
half of vocab 15906
word it_ragione
half of vocab 15907
word it_intento
half of vocab 15908
word it_guadagna
half of vocab 15909
word it_palo
half of vocab 15910
word it_rapire
half of vocab 15911
word it_nettuno
half of vocab 15912
word it_originariamente
half of vocab 15913
word it_minerva
half of vocab 15914
word it_modifica
half of 

word it_flora
half of vocab 16116
word it_già
half of vocab 16117
word it_giordano
half of vocab 16118
word it_escludere
half of vocab 16119
word it_rainbow
half of vocab 16120
word it_repubblicano
half of vocab 16121
word it_verde
half of vocab 16122
word it_uomini
half of vocab 16123
word it_tabelle
half of vocab 16124
word it_verdi
half of vocab 16125
word it_garage
half of vocab 16126
word it_naviglio
half of vocab 16127
word it_marmo
half of vocab 16128
word it_frosinone
half of vocab 16129
word it_sperimentare
half of vocab 16130
word it_zinco
half of vocab 16131
word it_settima
half of vocab 16132
word it_bellezza
half of vocab 16133
word it_francesco
half of vocab 16134
word it_saragozza
half of vocab 16135
word it_settimo
half of vocab 16136
word it_danneggiare
half of vocab 16137
word it_baviera
half of vocab 16138
word it_trasformazione
half of vocab 16139
word it_sottostante
half of vocab 16140
word it_notevolmente
half of vocab 16141
word it_anteprima
half of vocab 16142
w

word it_dialogo
half of vocab 16340
word it_mani
half of vocab 16341
word it_decidere
half of vocab 16342
word it_concluse
half of vocab 16343
word it_magnetica
half of vocab 16344
word it_concluso
half of vocab 16345
word it_disprezzo
half of vocab 16346
word it_conferire
half of vocab 16347
word it_correlazione
half of vocab 16348
word it_foster
half of vocab 16349
word it_trappola
half of vocab 16350
word it_rho
half of vocab 16351
word it_zucchero
half of vocab 16352
word it_wells
half of vocab 16353
word it_filadelfia
half of vocab 16354
word it_venti
half of vocab 16355
word it_monaco
half of vocab 16356
word it_oggetti
half of vocab 16357
word it_bonifica
half of vocab 16358
word it_processione
half of vocab 16359
word it_rivelare
half of vocab 16360
word it_completa
half of vocab 16361
word it_organico
half of vocab 16362
word it_disegno
half of vocab 16363
word it_petrolio
half of vocab 16364
word it_emilia
half of vocab 16365
word it_disegna
half of vocab 16366
word it_comple

word it_direttiva
half of vocab 16564
word it_vladimir
half of vocab 16565
word it_qualcuno
half of vocab 16566
word it_tabernacolo
half of vocab 16567
word it_una
half of vocab 16568
word it_resta
half of vocab 16569
word it_holding
half of vocab 16570
word it_rio
half of vocab 16571
word it_crostacei
half of vocab 16572
word it_decreto
half of vocab 16573
word it_radiazione
half of vocab 16574
word it_gabbia
half of vocab 16575
word it_bolzano
half of vocab 16576
word it_samuel
half of vocab 16577
word it_raccordo
half of vocab 16578
word it_neutrale
half of vocab 16579
word it_csi
half of vocab 16580
word it_cardiff
half of vocab 16581
word it_progettista
half of vocab 16582
word it_logan
half of vocab 16583
word it_occupato
half of vocab 16584
word it_occupata
half of vocab 16585
word it_guatemala
half of vocab 16586
word it_provvisorio
half of vocab 16587
word it_svedesi
half of vocab 16588
word it_tenda
half of vocab 16589
word it_patate
half of vocab 16590
word it_rigido
half of

word it_profondamente
half of vocab 16787
word it_persiano
half of vocab 16788
word it_spedizione
half of vocab 16789
word it_tripletta
half of vocab 16790
word it_esistere
half of vocab 16791
word it_maria
half of vocab 16792
word it_rex
half of vocab 16793
word it_pomerania
half of vocab 16794
word it_mario
half of vocab 16795
word it_levi
half of vocab 16796
word it_trevor
half of vocab 16797
word it_draft
half of vocab 16798
word it_furia
half of vocab 16799
word it_alta
half of vocab 16800
word it_conduzione
half of vocab 16801
word it_treno
half of vocab 16802
word it_treni
half of vocab 16803
word it_riprodurre
half of vocab 16804
word it_dinamico
half of vocab 16805
word it_considerato
half of vocab 16806
word it_egitto
half of vocab 16807
word it_folle
half of vocab 16808
word it_lione
half of vocab 16809
word it_pallamano
half of vocab 16810
word it_anime
half of vocab 16811
word it_offerte
half of vocab 16812
word it_milanese
half of vocab 16813
word it_bellissima
half of vo

word it_uno
half of vocab 17011
word it_tirare
half of vocab 17012
word it_rebecca
half of vocab 17013
word it_lacrime
half of vocab 17014
word it_neutroni
half of vocab 17015
word it_siena
half of vocab 17016
word it_elimina
half of vocab 17017
word it_performance
half of vocab 17018
word it_appuntamento
half of vocab 17019
word it_campioni
half of vocab 17020
word it_quinto
half of vocab 17021
word it_intellettuale
half of vocab 17022
word it_calais
half of vocab 17023
word it_notturno
half of vocab 17024
word it_berg
half of vocab 17025
word it_produzione
half of vocab 17026
word it_santana
half of vocab 17027
word it_invadere
half of vocab 17028
word it_parola
half of vocab 17029
word it_sonora
half of vocab 17030
word it_parole
half of vocab 17031
word it_camicia
half of vocab 17032
word it_aristotele
half of vocab 17033
word it_mantenuto
half of vocab 17034
word it_mccartney
half of vocab 17035
word it_comprensione
half of vocab 17036
word it_invasori
half of vocab 17037
word it_

word it_circonferenza
half of vocab 17236
word it_ottomano
half of vocab 17237
word it_attrezzature
half of vocab 17238
word it_sintassi
half of vocab 17239
word it_finanziaria
half of vocab 17240
word it_organizzativa
half of vocab 17241
word it_divisa
half of vocab 17242
word it_corteo
half of vocab 17243
word it_finanziario
half of vocab 17244
word it_artificiale
half of vocab 17245
word it_duo
half of vocab 17246
word it_sarajevo
half of vocab 17247
word it_lisa
half of vocab 17248
word it_fratello
half of vocab 17249
word it_triangolare
half of vocab 17250
word it_apprendere
half of vocab 17251
word it_gambo
half of vocab 17252
word it_costellazione
half of vocab 17253
word it_benedettino
half of vocab 17254
word it_avellino
half of vocab 17255
word it_ricambio
half of vocab 17256
word it_domanda
half of vocab 17257
word it_latina
half of vocab 17258
word it_sospetto
half of vocab 17259
word it_latini
half of vocab 17260
word it_risoluzione
half of vocab 17261
word it_due
half of 

word it_casi
half of vocab 17460
word it_conan
half of vocab 17461
word it_scambiato
half of vocab 17462
word it_fili
half of vocab 17463
word it_filo
half of vocab 17464
word it_gerarchia
half of vocab 17465
word it_film
half of vocab 17466
word it_fabbricato
half of vocab 17467
word it_pinocchio
half of vocab 17468
word it_ministero
half of vocab 17469
word it_preparazione
half of vocab 17470
word it_fabbricati
half of vocab 17471
word it_raf
half of vocab 17472
word it_rai
half of vocab 17473
word it_eseguire
half of vocab 17474
word it_marcello
half of vocab 17475
word it_ram
half of vocab 17476
word it_reato
half of vocab 17477
word it_trans
half of vocab 17478
word it_rap
half of vocab 17479
word it_gamma
half of vocab 17480
word it_collettivo
half of vocab 17481
word it_braccio
half of vocab 17482
word it_collettiva
half of vocab 17483
word it_braccia
half of vocab 17484
word it_cinghiale
half of vocab 17485
word it_famiglie
half of vocab 17486
word it_incredibile
half of vocab 

word it_congresso
half of vocab 17689
word it_alonso
half of vocab 17690
word it_derivare
half of vocab 17691
word it_camerun
half of vocab 17692
word it_monica
half of vocab 17693
word it_carpi
half of vocab 17694
word it_baseball
half of vocab 17695
word it_comporre
half of vocab 17696
word it_garantito
half of vocab 17697
word it_analitica
half of vocab 17698
word it_fondatore
half of vocab 17699
word it_alternativa
half of vocab 17700
word it_delfino
half of vocab 17701
word it_sistematicamente
half of vocab 17702
word it_tortura
half of vocab 17703
word it_thriller
half of vocab 17704
word it_misero
half of vocab 17705
word it_autonomia
half of vocab 17706
word it_nel
half of vocab 17707
word it_bulgaro
half of vocab 17708
word it_diplomatico
half of vocab 17709
word it_debito
half of vocab 17710
word it_costruzione
half of vocab 17711
word it_bulgari
half of vocab 17712
word it_alive
half of vocab 17713
word it_migrazione
half of vocab 17714
word it_esemplare
half of vocab 17715


word it_corpo
half of vocab 17914
word it_enea
half of vocab 17915
word it_colture
half of vocab 17916
word it_specchi
half of vocab 17917
word it_seri
half of vocab 17918
word it_intelligenza
half of vocab 17919
word it_coltura
half of vocab 17920
word it_angela
half of vocab 17921
word it_femminile
half of vocab 17922
word it_venus
half of vocab 17923
word it_elisa
half of vocab 17924
word it_angelo
half of vocab 17925
word it_casa
half of vocab 17926
word it_esecutivo
half of vocab 17927
word it_parentela
half of vocab 17928
word it_agrario
half of vocab 17929
word it_statuto
half of vocab 17930
word it_agraria
half of vocab 17931
word it_legislatura
half of vocab 17932
word it_devastante
half of vocab 17933
word it_incisione
half of vocab 17934
word it_mammiferi
half of vocab 17935
word it_rubrica
half of vocab 17936
word it_generazioni
half of vocab 17937
word it_prostituzione
half of vocab 17938
word it_linux
half of vocab 17939
word it_agricoltori
half of vocab 17940
word it_inf

word it_pippo
half of vocab 18139
word it_certo
half of vocab 18140
word it_documentata
half of vocab 18141
word it_khmer
half of vocab 18142
word it_artiglieria
half of vocab 18143
word it_stimata
half of vocab 18144
word it_terziario
half of vocab 18145
word it_loggia
half of vocab 18146
word it_deserto
half of vocab 18147
word it_visconte
half of vocab 18148
word it_utile
half of vocab 18149
word it_visconti
half of vocab 18150
word it_modesto
half of vocab 18151
word it_isolamento
half of vocab 18152
word it_esercizi
half of vocab 18153
word it_dance
half of vocab 18154
word it_gigantesca
half of vocab 18155
word it_salvare
half of vocab 18156
word it_adriatico
half of vocab 18157
word it_marittimo
half of vocab 18158
word it_accumulo
half of vocab 18159
word it_soffitto
half of vocab 18160
word it_ghiaccio
half of vocab 18161
word it_nevada
half of vocab 18162
word it_indipendente
half of vocab 18163
word it_segmento
half of vocab 18164
word it_invasione
half of vocab 18165
word i

word it_tana
half of vocab 18363
word it_sottomarino
half of vocab 18364
word it_nintendo
half of vocab 18365
word it_incredibilmente
half of vocab 18366
word it_costante
half of vocab 18367
word it_sheffield
half of vocab 18368
word it_felicità
half of vocab 18369
word it_dotato
half of vocab 18370
word it_terni
half of vocab 18371
word it_stilo
half of vocab 18372
word it_intervento
half of vocab 18373
word it_colombo
half of vocab 18374
word it_essere
half of vocab 18375
word it_dotata
half of vocab 18376
word it_confessione
half of vocab 18377
word it_scienziati
half of vocab 18378
word it_pompei
half of vocab 18379
word it_indagine
half of vocab 18380
word it_eccezione
half of vocab 18381
word it_combinata
half of vocab 18382
word it_rana
half of vocab 18383
word it_confessioni
half of vocab 18384
word it_marcato
half of vocab 18385
word it_comandante
half of vocab 18386
word it_imprenditori
half of vocab 18387
word it_cerimonia
half of vocab 18388
word it_imprenditore
half of voc

{'it_migliora': 0}

In [None]:
evaluate(self, source_lang, target_lang, gtt, sample, verbose=True)

# Method 2: Most Common Target Translation

### Initialize the model

In [46]:
from models import BiW2V_mle

# create model
model_2 = BiW2V_mle(bilingual_dict = translations,
                       vocab = vocab, H = EMBEDDING_SIZE)

# intialize TF graphs
model_2.BuildCoreGraph()
model_2.BuildTrainingGraph()
model_2.BuildValidationGraph()

... TF graph created for BiW2V model.
... TF graph created for BiW2V training.
... TF graph created for BiW2V validation.


### Training

In [50]:
# fresh data generator
DATA_GENERATOR = batch_generator(raw_data, vocab, BATCH_SIZE, WINDOW_SIZE, MAX_EPOCHS)

In [51]:
# train
start = time.time()
model_2.train(nBATCHES, DATA_GENERATOR, TEST_WORDS, learning_rate = ALPHA)
tot = (time.time() - start)
print('... {} batches trained in {} seconds'.format(nBATCHES, tot))

... Model Initialized
	 <tf.Variable 'Embedding_Layer/ContextEmbeddings:0' shape=(20003, 200) dtype=float32_ref>
	 <tf.Variable 'Hidden_Layer/WordEmbeddings:0' shape=(20003, 200) dtype=float32_ref>
	 <tf.Variable 'Hidden_Layer/b:0' shape=(20003,) dtype=float32_ref>
... Starting Training
... STEP 0 : Average Loss : 0.00252045059204
   [en_the] closest:  en_bern, en_mini, it_9, en_trout, en_gather, en_rival, en_homes, it_politica,
   [en_last] closest:  it_led, en_idaho, it_funzioni, en_up, it_svevia, it_trascrizione, en_climbing, it_foto,
   [it_si] closest:  en_6th, en_batting, en_reporter, en_shortened, it_alpini, it_tecnologie, en_addresses, en_credits,
   [it_suo] closest:  en_overhead, en_rico, en_venice, it_provenza, it_raven, it_battezzato, it_fire, it_normativa,
... STEP 5000 : Average Loss : 4.71148335021
... STEP 10000 : Average Loss : 4.22817595466
   [en_the] closest:  en_a, en_to, en_and, en_bern, en_mini, en_in, en_trout, it_politica,
   [en_last] closest:  it_led, en_idah

### Save the Embeddings.

In [52]:
# context 
filename = SAVE_TO + '/en_it_mle_50K_V_dec19.pkl'
with open(filename, 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(model_2.context_embeddings, f, pickle.HIGHEST_PROTOCOL)

# word
filename = SAVE_TO + '/en_it_mle_50K_U_dec19.pkl'
with open(filename, 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(model_2.word_embeddings, f, pickle.HIGHEST_PROTOCOL)

# Method 3: Closest Translation

### Initialize the model

In [56]:
from models import BiW2V_nn

# create model
model_3 = BiW2V_nn(bilingual_dict = translations,
                   vocab = vocab, H = EMBEDDING_SIZE)

# intialize TF graphs
model_3.BuildCoreGraph()
model_3.BuildTrainingGraph()
model_3.BuildValidationGraph()

... TF graph created for BiW2V model.
... TF graph created for BiW2V training.
... TF graph created for BiW2V validation.


### Training

In [57]:
# fresh data generator
DATA_GENERATOR = batch_generator(raw_data, vocab, BATCH_SIZE, WINDOW_SIZE, MAX_EPOCHS)

In [58]:
# train
nBATCHES = 5000 # Takes too long w/ nn so we'll only do 5K
start = time.time()
model_3.train(nBATCHES, DATA_GENERATOR, TEST_WORDS, learning_rate = ALPHA)
tot = (time.time() - start)
print('... {} batches trained in {} seconds'.format(nBATCHES, tot))

... Model Initialized
	 <tf.Variable 'Embedding_Layer/ContextEmbeddings:0' shape=(20003, 200) dtype=float32_ref>
	 <tf.Variable 'Hidden_Layer/WordEmbeddings:0' shape=(20003, 200) dtype=float32_ref>
	 <tf.Variable 'Hidden_Layer/b:0' shape=(20003,) dtype=float32_ref>
... Starting Training
... STEP 0 : Average Loss : 0.02340858078
   [en_the] closest:  en_nutrition, it_ma, en_prolonged, it_falco, it_vercelli, en_artists, en_beliefs, it_idraulico,
   [en_last] closest:  en_earn, en_renowned, it_decadimento, it_boy, en_korea, en_basement, en_resulting, en_told,
   [it_si] closest:  en_qualities, it_generato, it_scuole, en_employer, en_situation, en_chorus, en_to, en_inadequate,
   [it_suo] closest:  en_sat, it_rimpiazzare, en_playing, it_triangolare, it_realmente, en_profile, it_tutta, it_stirpe,
... STEP 500 : Average Loss : 6.30888293695
... STEP 1000 : Average Loss : 5.52120716763
   [en_the] closest:  en_nutrition, en_artists, it_falco, it_ma, en_beliefs, it_saturno, it_anime, it_siccit

### Save the Embeddings.

In [59]:
# context 
filename = SAVE_TO + '/en_it_nn_5K_V_dec19.pkl'
with open(filename, 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(model_2.context_embeddings, f, pickle.HIGHEST_PROTOCOL)

# word
filename = SAVE_TO + '/en_it_nn_5K_U_dec19.pkl'
with open(filename, 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(model_2.word_embeddings, f, pickle.HIGHEST_PROTOCOL)