In [1]:
import pandas as pd 
import numpy as np 
import pickle
from tqdm import tqdm
from sacrebleu import CHRF
chrf = CHRF(lowercase=True)
chrf.BETA = 2

In [2]:
def compute_chrf2(dataframe):
    chrf2_sentence = []
    for i in tqdm(range(len(dataframe))):
        hyp = dataframe["mt"][i]
        ref = dataframe["ref"][i]
        assert type(hyp) == str
        assert type(ref) == str
        chrf2_sentence.append(chrf.sentence_score(hyp, [ref]).score)
    return chrf2_sentence

In [11]:
def process_df(lp: "str", ckpt: "str"):
    model_path = "/home/nunomg/mt-hallucinations/HALO/fairseq/data-bin/wmt18_" + lp + "_heldout/" + ckpt
    stats_path = model_path + "/stats/"
    heldoutwstats_save_path = model_path + "/dataframes/heldoutwstats_bicleaner.pkl"
    lowcomet_save_path = model_path + "/dataframes/heldout_lowcomet_w_bicleaner.pkl"
    lowlaser_save_path = model_path + "/dataframes/heldout_lowlaser_w_bicleaner.pkl"
    
    df = pd.read_pickle(model_path + "/dataframes/dataframe_test_beam5.pkl")
    
    with open(stats_path + "comet_qe_da_scores.pkl", "rb") as f:
        comet = pickle.load(f)
    df["comet-qe"] = comet

    with open(stats_path + "laser_scores.pkl", "rb") as f:
        comet = pickle.load(f)
    df["laser"] = comet

    chrf_values = compute_chrf2(df)
    df["chrf2"] = chrf_values

    with open(stats_path + "f1_scores.pkl", "rb") as f:
        f1 = pickle.load(f)
    df["f1_bpe"] = f1

    with open(stats_path + "f2_scores.pkl", "rb") as f:
        f2 = pickle.load(f)
    df["f2_bpe"] = f2

    with open(stats_path + "rep_scores.pkl", "rb") as f:
        repscore = pickle.load(f)
    df["repscore_bpe"]= repscore
    
    with open(stats_path + "f1_scores_str.pkl", "rb") as f:
        f1 = pickle.load(f)
    df["f1_word"] = f1

    with open(stats_path + "f2_scores_str.pkl", "rb") as f:
        f2 = pickle.load(f)
    df["f2_word"] = f2

    with open(stats_path + "repscores_str.pkl", "rb") as f:
        repscore = pickle.load(f)
    df["repscore_word"]= repscore
    
    idxs = []
    for i in tqdm(df.index.values):
        if 3 in df.loc[i]["src_ids"]:
            idxs.append(i)

    unk_flags = np.zeros(len(df))
    for i in tqdm(df.index.values):
        if i in idxs:
            unk_flags[i] = int(1)
    unk_flags = [int(val) for val in unk_flags]
    df["unk_flag"] = unk_flags

    with open("/home/nunomg/bicleaner/heldout_wmt18/" + lp + "/" + lp + ".bicleaner.classified", "r") as f:
        x_class = f.read().splitlines()
    xs_de = []
    xs_en = []
    scores = []
    for line in x_class:
        line_split = line.split("\t")
        scores.append(float(line_split[-1]))
    bicleaner_scores = []
    for idx in df["idx"].values:
        bicleaner_scores.append(scores[idx])
    df["bicleaner_score"] = bicleaner_scores
    
    df_nodup = df.drop_duplicates(subset="src")
    df_nodup = df_nodup.loc[df_nodup["unk_flag"]==0]
    df_nodup = df_nodup.loc[df_nodup["bicleaner_score"]>=0.5]
    
    df_nodup.to_pickle(heldoutwstats_save_path)
    print("Heldout with stats saved.")
    
    df_lowcomet = df_nodup.sort_values(by="comet-qe")[:10000].sort_index()
    df_lowcomet.to_pickle(lowcomet_save_path)
    print("Low COMET-QE with stats saved.")

    df_laser = df_nodup.sort_values(by="laser")[:10000].sort_index()
    df_laser.to_pickle(lowlaser_save_path)
    print("Low LASER with stats saved.")

In [12]:
process_df("de-en", "checkpoint_best")

100%|██████████| 1941913/1941913 [10:12<00:00, 3169.30it/s]
100%|██████████| 1941913/1941913 [02:48<00:00, 11519.89it/s]
100%|██████████| 1941913/1941913 [00:06<00:00, 299424.94it/s]


Heldout with stats saved.
Low COMET-QE with stats saved.
Low LASER with stats saved.


In [5]:
process_df("en-ru", "checkpoint_best")

100%|██████████| 827858/827858 [04:30<00:00, 3056.39it/s] 
100%|██████████| 827858/827858 [01:13<00:00, 11234.09it/s]
100%|██████████| 827858/827858 [00:08<00:00, 103141.92it/s]


Heldout with stats saved.
Lowcomet with stats saved.


In [7]:
df = pd.read_pickle("/home/nunomg/mt-hallucinations/HALO/fairseq/data-bin/wmt18_de-en_heldout/checkpoint_best/dataframes/heldoutwstats_w_bicleaner.pkl")

In [None]:
lp = "de-en"
ckpt = "checkpoint_best"

model_path = "/home/nunomg/mt-hallucinations/HALO/fairseq/data-bin/wmt18_" + lp + "_heldout/" + ckpt
stats_path = model_path + "/stats/"
attn_list = {}
with open(stats_path + "stats_test_attention_beam5.pkl", 'rb') as f:
    x_attn = pickle.load(f)

df = pd.read_pickle(model_path + "/dataframes/dataframe_test_beam5.pkl")

attn_samples = []
token_logprobs_samples = []
for idx in tqdm(df.index.values):
    src_len = len(df.loc[idx]["src_ids"])
    mt_len = len(df.loc[idx]["mt_ids"])
    attn = np.array(x_attn[idx])
    if attn.shape != (src_len, mt_len):
        attn = attn[~np.all(attn == 0, axis=1)]
    assert attn.shape == (src_len, mt_len)
    attn_samples.append(attn)

with open(stats_path + "stats_test_attention_beam5_proc.pkl", "wb") as f:
    pickle.dump(attn_samples, f)