In [2]:
from transformers import AutoTokenizer
import torch
import torch.nn as nn
import torch.distributed as dist
import numpy as np
import pandas as pd
import pickle as pkl

In [3]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')
cos_sim = torch.nn.CosineSimilarity()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
def get_word_embeddings(path_to_model):
    x_state_dict = torch.load(path_to_model, map_location="cpu")
    return x_state_dict["bert.embeddings.word_embeddings.weight"]

In [5]:
def compute_mean_cosine_similarity(word_pairs, word_embeddings):
    cosine_sim = []

    for lang_word, en_word in word_pairs:

        token_ids_1 = np.asarray(tokenizer.encode(lang_word)[1:-1])
        token_ids_2 = np.asarray(tokenizer.encode(en_word)[1:-1])

        with torch.no_grad():
            embeddings_1 = word_embeddings[token_ids_1,:]
            mean_embeddings_1 = embeddings_1.mean(axis=0)
    
            embeddings_2 = word_embeddings[token_ids_2,:]
            mean_embeddings_2 = embeddings_2.mean(axis=0)
    
            cosine_sim.append(cos_sim(mean_embeddings_1.reshape(1, -1), mean_embeddings_2.reshape(1, -1)).item())
        
    similarities = np.asarray(cosine_sim)
    similarities = similarities[~np.isnan(similarities)]
    return similarities.mean()


In [6]:
def compute_mean_cosine_similarity_wordnet(word_pos_dict, word_embeddings):
    all_translations = []
    noun_translations = []
    verb_translations = []
    adjective_translations = []
    adverb_translations = []

    for word_pos, translation in word_pos_dict.items():
        word = word_pos[0]
        pos = word_pos[1]

        all_translations.append((word, translation))
        if pos == "NOUN":
            noun_translations.append((word, translation))
        if pos == "VERB":
            verb_translations.append((word, translation))
        if pos == "ADJ":
            adjective_translations.append((word, translation))
        if pos == "ADV":
            adverb_translations.append((word, translation))
    
    mean_all = compute_mean_cosine_similarity(all_translations, word_embeddings)
    print("All words | Mean cosine similarity = {m}".format(m = mean_all))

    mean_noun = compute_mean_cosine_similarity(noun_translations, word_embeddings)
    print("All NOUNs | Mean cosine similarity = {m}".format(m = mean_noun))

    mean_verb = compute_mean_cosine_similarity(verb_translations, word_embeddings)
    print("All VERBs | Mean cosine similarity = {m}".format(m = mean_verb))

    mean_adj = compute_mean_cosine_similarity(adjective_translations, word_embeddings)
    print("All ADJs | Mean cosine similarity = {m}".format(m = mean_adj))

    mean_adv = compute_mean_cosine_similarity(adverb_translations, word_embeddings)
    print("All ADVs | Mean cosine similarity = {m}".format(m = mean_adv))

In [15]:
embeddings_baseline = get_word_embeddings("/mnt/nas_home/mrgj4/nlvr2_dataset/cc_pretrain/pytorch_model_best.bin")

## SW

In [16]:
with open("./panlex_dictionaries/sw_to_en_dict.pickle",'rb') as handle_1:
    sw_to_en_dict = pkl.load(handle_1)

with open("./infused_test_set/sw_to_en_marvl_dict.pickle",'rb') as handle_2:
    sw_to_en_marvl_dict = pkl.load(handle_2)

### Baseline

In [17]:
compute_mean_cosine_similarity(list(sw_to_en_marvl_dict.items()), embeddings_baseline)

0.18425346160636666

In [18]:
compute_mean_cosine_similarity_wordnet(sw_to_en_dict, embeddings_baseline_ft)

All words | Mean cosine similarity = 0.18919581820941783
All NOUNs | Mean cosine similarity = 0.207730880408052
All VERBs | Mean cosine similarity = 0.15045521574208381
All ADJs | Mean cosine similarity = 0.1677729550142101
All ADVs | Mean cosine similarity = 0.13368910961400937


### WIT pretraining

In [19]:
embeddings_wit_sw =  get_word_embeddings("/mnt/nas_home/mrgj4/final_repo/thesis-code/experiment_1/sw/seed1/finetune/NLVR2_ctrl_muniter_base/pytorch_model_best.bin")

In [20]:
compute_mean_cosine_similarity(list(sw_to_en_marvl_dict.items()), embeddings_wit_sw)

0.18618910572149977

In [21]:
compute_mean_cosine_similarity_wordnet(sw_to_en_dict, embeddings_wit_sw)

All words | Mean cosine similarity = 0.1896265890370076
All NOUNs | Mean cosine similarity = 0.20814037093355492
All VERBs | Mean cosine similarity = 0.1509691119135823
All ADJs | Mean cosine similarity = 0.1683298956514472
All ADVs | Mean cosine similarity = 0.13344225569025558


### Best model - WIT pretraining with code-switching + code-switched finetuning

In [22]:
embeddings_codeswitching_sw =  get_word_embeddings("/mnt/nas_home/mrgj4/final_repo/thesis-code/experiment_3/sw/seed1/pretrain_cs_finetune_cs/NLVR2_ctrl_muniter_base/pytorch_model_best.bin")

In [23]:
compute_mean_cosine_similarity(list(sw_to_en_marvl_dict.items()), embeddings_codeswitching_sw)

0.18712524190002583

In [24]:
compute_mean_cosine_similarity_wordnet(sw_to_en_dict, embeddings_codeswitching_sw)

All words | Mean cosine similarity = 0.19040289691418058
All NOUNs | Mean cosine similarity = 0.2089250207820141
All VERBs | Mean cosine similarity = 0.15155054509903576
All ADJs | Mean cosine similarity = 0.16905893699122712
All ADVs | Mean cosine similarity = 0.13574119499409754


## TA

In [25]:
with open("./panlex_dictionaries/ta_to_en_dict.pickle",'rb') as handle_1:
    ta_to_en_dict = pkl.load(handle_1)

with open("./infused_test_set/ta_to_en_marvl_dict.pickle",'rb') as handle_2:
    ta_to_en_marvl_dict = pkl.load(handle_2)

### Baseline

In [26]:
compute_mean_cosine_similarity(list(ta_to_en_marvl_dict.items()), embeddings_baseline)

0.15174966074305227

In [27]:
compute_mean_cosine_similarity_wordnet(ta_to_en_dict, embeddings_baseline_ft)

All words | Mean cosine similarity = 0.19977740609031042
All NOUNs | Mean cosine similarity = 0.20765580186589835
All VERBs | Mean cosine similarity = 0.17347964875070365
All ADJs | Mean cosine similarity = 0.19912358018008466
All ADVs | Mean cosine similarity = 0.17600692763174455


### WIT pretraining

In [28]:
embeddings_wit_ta =  get_word_embeddings("/mnt/nas_home/mrgj4/final_repo/thesis-code/experiment_1/ta/seed1/finetune/NLVR2_ctrl_muniter_base/pytorch_model_best.bin")

In [29]:
compute_mean_cosine_similarity(list(ta_to_en_marvl_dict.items()), embeddings_wit_ta)

0.15454914044774948

In [30]:
compute_mean_cosine_similarity_wordnet(ta_to_en_dict, embeddings_wit_ta)

All words | Mean cosine similarity = 0.20129560611709824
All NOUNs | Mean cosine similarity = 0.20912236065700676
All VERBs | Mean cosine similarity = 0.17509699353059968
All ADJs | Mean cosine similarity = 0.20073141315354942
All ADVs | Mean cosine similarity = 0.1777401501296959


### Best model - WIT pretraining with code-switching + code-switched finetuning

In [31]:
embeddings_codeswitching_ta =  get_word_embeddings("/mnt/nas_home/mrgj4/final_repo/thesis-code/experiment_3/ta/seed1/pretrain_cs_finetune_cs/NLVR2_ctrl_muniter_base/pytorch_model_best.bin")

In [32]:
compute_mean_cosine_similarity(list(ta_to_en_marvl_dict.items()), embeddings_codeswitching_ta)

0.15510254572982987

In [33]:
compute_mean_cosine_similarity_wordnet(ta_to_en_dict, embeddings_codeswitching_ta)

All words | Mean cosine similarity = 0.2019679782650549
All NOUNs | Mean cosine similarity = 0.20982041865765622
All VERBs | Mean cosine similarity = 0.1757129264646493
All ADJs | Mean cosine similarity = 0.20131341016982707
All ADVs | Mean cosine similarity = 0.17859935869999968


## TR

In [34]:
with open("./panlex_dictionaries/tr_to_en_dict.pickle",'rb') as handle_1:
    tr_to_en_dict = pkl.load(handle_1)

with open("./infused_test_set/tr_to_en_marvl_dict.pickle",'rb') as handle_2:
    tr_to_en_marvl_dict = pkl.load(handle_2)

### Baseline

In [35]:
compute_mean_cosine_similarity(list(tr_to_en_marvl_dict.items()), embeddings_baseline)

0.1894214367178821

In [36]:
compute_mean_cosine_similarity_wordnet(tr_to_en_dict, embeddings_baseline_ft)

All words | Mean cosine similarity = 0.249461998311177
All NOUNs | Mean cosine similarity = 0.266069126730038
All VERBs | Mean cosine similarity = 0.19543854782843875
All ADJs | Mean cosine similarity = 0.23810544077775023
All ADVs | Mean cosine similarity = 0.17988731655558304


### WIT pretraining

In [38]:
embeddings_wit_tr =  get_word_embeddings("/mnt/nas_home/mrgj4/final_repo/thesis-code/experiment_1/tr/seed1/finetune/NLVR2_ctrl_muniter_base/pytorch_model_best.bin")

In [39]:
compute_mean_cosine_similarity(list(tr_to_en_marvl_dict.items()), embeddings_wit_tr)

0.19052003919906368

In [40]:
compute_mean_cosine_similarity_wordnet(tr_to_en_dict, embeddings_wit_tr)

All words | Mean cosine similarity = 0.25030890732911854
All NOUNs | Mean cosine similarity = 0.2671009984484636
All VERBs | Mean cosine similarity = 0.1961022545246521
All ADJs | Mean cosine similarity = 0.23853598638105328
All ADVs | Mean cosine similarity = 0.1794000202645054


### Best model - WIT pretraining with code-switching + code-switched finetuning

In [41]:
embeddings_codeswitching_tr =  get_word_embeddings("/mnt/nas_home/mrgj4/final_repo/thesis-code/experiment_3/tr/seed1/pretrain_cs_finetune_cs/NLVR2_ctrl_muniter_base/pytorch_model_best.bin")

In [42]:
compute_mean_cosine_similarity(list(tr_to_en_marvl_dict.items()), embeddings_codeswitching_tr)

0.19205984606920407

In [43]:
compute_mean_cosine_similarity_wordnet(tr_to_en_dict, embeddings_codeswitching_tr)

All words | Mean cosine similarity = 0.2508677836228365
All NOUNs | Mean cosine similarity = 0.26760591893497354
All VERBs | Mean cosine similarity = 0.19667106047865543
All ADJs | Mean cosine similarity = 0.23917952845609936
All ADVs | Mean cosine similarity = 0.18089005361474225


## ID

In [44]:
with open("./panlex_dictionaries/id_to_en_dict.pickle",'rb') as handle_1:
    id_to_en_dict = pkl.load(handle_1)

with open("./infused_test_set/id_to_en_marvl_dict.pickle",'rb') as handle_2:
    id_to_en_marvl_dict = pkl.load(handle_2)

### Baseline

In [45]:
compute_mean_cosine_similarity(list(id_to_en_marvl_dict.items()), embeddings_baseline)

0.2634194772362136

In [46]:
compute_mean_cosine_similarity_wordnet(id_to_en_dict, embeddings_baseline_ft)

All words | Mean cosine similarity = 0.27093752962862155
All NOUNs | Mean cosine similarity = 0.2964528954075636
All VERBs | Mean cosine similarity = 0.1912645551180642
All ADJs | Mean cosine similarity = 0.2546986460569315
All ADVs | Mean cosine similarity = 0.19206390762021452


### WIT pretraining

In [47]:
embeddings_wit_id =  get_word_embeddings("/mnt/nas_home/mrgj4/final_repo/thesis-code/experiment_1/id/seed1/finetune/NLVR2_ctrl_muniter_base/pytorch_model_best.bin")

In [48]:
compute_mean_cosine_similarity(list(id_to_en_marvl_dict.items()), embeddings_wit_id)

0.2650132598827716

In [49]:
compute_mean_cosine_similarity_wordnet(id_to_en_dict, embeddings_wit_id)

All words | Mean cosine similarity = 0.2723488596419061
All NOUNs | Mean cosine similarity = 0.2980598641464467
All VERBs | Mean cosine similarity = 0.19266673897446576
All ADJs | Mean cosine similarity = 0.2558594752883049
All ADVs | Mean cosine similarity = 0.19032268647910253


### Best model - WIT pretraining with code-switching + code-switched finetuning

In [50]:
embeddings_codeswitching_id =  get_word_embeddings("/mnt/nas_home/mrgj4/final_repo/thesis-code/experiment_3/id/seed1/pretrain_cs_finetune_cs/NLVR2_ctrl_muniter_base/pytorch_model_best.bin")

In [51]:
compute_mean_cosine_similarity(list(id_to_en_marvl_dict.items()), embeddings_codeswitching_id)

0.2669301095054073

In [52]:
compute_mean_cosine_similarity_wordnet(id_to_en_dict, embeddings_codeswitching_id)

All words | Mean cosine similarity = 0.2732572265662238
All NOUNs | Mean cosine similarity = 0.2988746051746351
All VERBs | Mean cosine similarity = 0.19384401493943015
All ADJs | Mean cosine similarity = 0.25659379923583076
All ADVs | Mean cosine similarity = 0.1929276655653094


## ZH

In [53]:
with open("./panlex_dictionaries/zh_to_en_dict.pickle",'rb') as handle_1:
    zh_to_en_dict = pkl.load(handle_1)

with open("./infused_test_set/zh_to_en_marvl_dict.pickle",'rb') as handle_2:
    zh_to_en_marvl_dict = pkl.load(handle_2)

### Baseline

In [54]:
compute_mean_cosine_similarity(list(zh_to_en_marvl_dict.items()), embeddings_baseline)

0.20628649625810794

In [55]:
compute_mean_cosine_similarity_wordnet(zh_to_en_dict, embeddings_baseline_ft)

All words | Mean cosine similarity = 0.20053107023758296
All NOUNs | Mean cosine similarity = 0.20857513535854397
All VERBs | Mean cosine similarity = 0.15893877996625555
All ADJs | Mean cosine similarity = 0.19798586701689852
All ADVs | Mean cosine similarity = 0.1946136680236639


### WIT pretraining

In [56]:
embeddings_wit_zh =  get_word_embeddings("/mnt/nas_home/mrgj4/final_repo/thesis-code/experiment_1/zh/seed1/finetune/NLVR2_ctrl_muniter_base/pytorch_model_best.bin")

In [57]:
compute_mean_cosine_similarity(list(zh_to_en_marvl_dict.items()), embeddings_wit_zh)

0.20714019160213865

In [59]:
compute_mean_cosine_similarity_wordnet(zh_to_en_dict, embeddings_wit_zh)

All words | Mean cosine similarity = 0.20157428169215286
All NOUNs | Mean cosine similarity = 0.20980931524383256
All VERBs | Mean cosine similarity = 0.16049203230503462
All ADJs | Mean cosine similarity = 0.19803184850985092
All ADVs | Mean cosine similarity = 0.19441348640821782


### Best model - WIT pretraining with code-switching + code-switched finetuning

In [61]:
embeddings_codeswitching_zh =  get_word_embeddings("/mnt/nas_home/mrgj4/final_repo/thesis-code/experiment_3/zh/seed1/pretrain_cs_finetune_cs/NLVR2_ctrl_muniter_base/pytorch_model_best.bin")

In [62]:
compute_mean_cosine_similarity(list(zh_to_en_marvl_dict.items()), embeddings_codeswitching_zh)

0.20864392522868003

In [63]:
compute_mean_cosine_similarity_wordnet(zh_to_en_dict, embeddings_codeswitching_zh)

All words | Mean cosine similarity = 0.2020548308254375
All NOUNs | Mean cosine similarity = 0.21032037653364355
All VERBs | Mean cosine similarity = 0.1611949286628304
All ADJs | Mean cosine similarity = 0.19818321678964237
All ADVs | Mean cosine similarity = 0.19501665415094221
