In [1]:
from src.orchestration import Orchestrator
from src.model.evaluation import Evaluator
import pandas as pd
import os

In [2]:
words = pd.read_csv('./MUSE/data/crosslingual/dictionaries/en-cz.5000-6500.txt', sep='\t', header=None, names=['en', 'cz'])
reduced = words.sample(100)

# Running all together

In [4]:
orch = Orchestrator(en_words=reduced['en'].tolist(), cz_words=reduced['cz'].tolist(),
                    vector_sizes=[100, 300], window_sizes=[8, 10])

In [24]:
orch.run_all()

------------------------------
------------------------------
INTERLEAVED
Context window: 8
Vector size: 100
Model type: CBOW
------------------------------
P@1: 14.00%
P@5: 30.00%
P@10: 37.00%
Relevance: 21.60%
------------------------------
------------------------------
MUSE
Context window: 4
Vector size: 100
Model type: CBOW
------------------------------
Supervised
P@1: 7.00%
P@5: 11.00%
P@10: 15.00%
Relevance: 9.12%
Unsupervised
P@1: 0.00%
P@5: 0.00%
P@10: 0.00%
Relevance: 0.00%
------------------------------
------------------------------
INTERLEAVED
Context window: 8
Vector size: 100
Model type: Skip-Gram
------------------------------
P@1: 10.00%
P@5: 19.00%
P@10: 21.00%
Relevance: 13.78%
------------------------------
------------------------------
MUSE
Context window: 4
Vector size: 100
Model type: Skip-Gram
------------------------------
Supervised
P@1: 11.00%
P@5: 16.00%
P@10: 17.00%
Relevance: 12.56%
Unsupervised
P@1: 1.00%
P@5: 1.00%
P@10: 1.00%
Relevance: 1.00%
--------

In [5]:
all_stats = orch.all_stats_df()
all_stats

Unnamed: 0,Model type,W2V type,Vector size,Context window,P@1,P@5,P@10,Relevance
0,Interleaved,CBOW,100,8,16.0,35.0,40.0,24.54
1,MUSE supervised,CBOW,100,4,5.0,12.0,17.0,8.73
2,MUSE unsupervised,CBOW,100,4,0.0,0.0,0.0,0.0
3,Interleaved,Skip-Gram,100,8,11.0,21.0,23.0,15.05
4,MUSE supervised,Skip-Gram,100,4,5.0,5.0,14.0,6.46
5,MUSE unsupervised,Skip-Gram,100,4,0.0,0.0,2.0,0.23
6,Interleaved,CBOW,100,10,19.0,38.0,43.0,27.87
7,MUSE supervised,CBOW,100,5,9.0,14.0,17.0,11.32
8,MUSE unsupervised,CBOW,100,5,0.0,0.0,0.0,0.0
9,Interleaved,Skip-Gram,100,10,13.0,21.0,26.0,16.48


# Testing only one model

In [4]:
from src.model.word2vec import W2V
from src.model.muse import MUSE
en_model = W2V(path='./models/cbow3x300/en')
cz_model = W2V(path='./models/cbow3x300/cz')

In [15]:
muse =  MUSE(en_model=en_model, cz_model=cz_model,
            model_config={'window': 4, 'vector_size': 300, 'sg': 0}, supervised=True)

In [16]:
muse.run_adversarial()

In [21]:
Evaluator(muse).p_at_k_metric(en_words=reduced['en'].tolist(), cz_words=reduced['cz'].tolist(), k=10)

20.0