In [1]:
import pandas as pd
import os

from ast import literal_eval

os.chdir("..")

In [2]:
GOLD_STANDARD_PATH = "gold_standard/gold_standard_annot_final.csv"
MODEL_FOLDERS = [
    "CatBoostFeatDrop",
    "CatBoostVecsOnly",
    "RandomForestFreqsOnly",
    "XGBAllFeats"
]
EVAL_FIELDS = [
    "variants", "Appropriate", "Too bad", "Too good"
]
TOP_K = 4
OUTP_FN = "output.csv"

def read_df(path: str):
    df = pd.read_csv(path, index_col="Unnamed: 0", sep=";")
    for field in EVAL_FIELDS:
        if field in df:
            df[field] = df[field].apply(literal_eval)
    return df

In [3]:
gold_standard = read_df(GOLD_STANDARD_PATH)
model_dfs = dict()
for model in MODEL_FOLDERS:
    model_dfs[model] = read_df(f"{model}/{OUTP_FN}")

In [4]:
gold_standard.head()

Unnamed: 0,Masked_sentence,Right_answer,Wrong_answer,Filename,Delete,variants,Appropriate,Too bad,Too good,Consistent,In duplicate names
153493,The amount of people who has no occupation in...,stable,the same,exam/Exam2017/OBy_100-200/2017_OBy_120_1,0,"[state, dependable, consistent, steady, prospe...","[consistent, harmonious, coherent]","[state, dependable, prosperous, volatile, reli...",[steady],True,False
83294,Some politicians have come up with an idea to ...,disadvantages,backwards,exam/Exam2017/EGe_1-99/2017_EGe_19_2,0,"[cons, limitations, shortcomings, weaknesses, ...","[cons, limitations, weaknesses, pitfalls, prob...","[benefits, characteristics, alternatives, opti...","[shortcomings, risks, challenges, dangers, haz...",True,False
77723,"As for disadvantages, global warming and air ...",number,amount,exam/Exam2016/2016_MTsy_8_2,0,"[amount, quantity, level, part, value, member,...","[amount, quantity, count, rate, multiplicity]","[level, part, value, member, mark, category, p...","[proportion, multitude]",True,False
74220,It is slightly below 30°C in Yakutsk and 30°C...,trend,tendency,exam/Exam2017/ESa_1-69/2017_ESa_69_1,0,"[tendency, consistency, phenomenon, resurgence...","[tendency, upsurge, pattern, paradigm]","[consistency, phenomenon, resurgence, craze, f...",[shift],True,False
53390,The number of men who are aged between 15 and...,number,part,exam/Exam2014/2014_EPa_22_1,0,"[amount, quantity, level, value, member, count...","[amount, quantity, count, rate, portion, total]","[level, value, member, mark, category, quality...",[proportion],True,False


In [5]:
gold_standard["Consistent"].value_counts()

True    76
Name: Consistent, dtype: int64

In [6]:
gold_standard["In duplicate names"].value_counts()

False    76
Name: In duplicate names, dtype: int64

In [7]:
model_dfs["CatBoostFeatDrop"].head(), model_dfs["CatBoostFeatDrop"].shape

(                                          Masked_sentence   Right_answer  \
 153493   The amount of people who has no occupation in...         stable   
 83294   Some politicians have come up with an idea to ...  disadvantages   
 77723    As for disadvantages, global warming and air ...         number   
 74220    It is slightly below 30°C in Yakutsk and 30°C...          trend   
 53390    The number of men who are aged between 15 and...         number   
 
        Wrong_answer                                           variants  
 153493     the same  [state, dependable, consistent, steady, prospe...  
 83294     backwards  [cons, advantages, limitations, shortcomings, ...  
 77723        amount  [amount, quantity, level, part, value, count, ...  
 74220      tendency  [tendency, consistency, phenomenon, resurgence...  
 53390          part  [amount, quantity, level, value, count, rate, ...  ,
 (76, 4))

In [8]:
model_dfs["CatBoostVecsOnly"].head(), model_dfs["CatBoostVecsOnly"].shape

(                                          Masked_sentence   Right_answer  \
 153493   The amount of people who has no occupation in...         stable   
 83294   Some politicians have come up with an idea to ...  disadvantages   
 77723    As for disadvantages, global warming and air ...         number   
 74220    It is slightly below 30°C in Yakutsk and 30°C...          trend   
 53390    The number of men who are aged between 15 and...         number   
 
        Wrong_answer                                           variants  
 153493     the same  [state, dependable, consistent, steady, prospe...  
 83294     backwards  [cons, advantages, limitations, shortcomings, ...  
 77723        amount  [amount, quantity, value, count, multitude, ha...  
 74220      tendency  [tendency, consistency, phenomenon, resurgence...  
 53390          part  [amount, quantity, value, multitude, variety, ...  ,
 (76, 4))

In [9]:
model_dfs["RandomForestFreqsOnly"].head(), model_dfs["RandomForestFreqsOnly"].shape

(                                          Masked_sentence   Right_answer  \
 153493   The amount of people who has no occupation in...         stable   
 83294   Some politicians have come up with an idea to ...  disadvantages   
 77723    As for disadvantages, global warming and air ...         number   
 74220    It is slightly below 30°C in Yakutsk and 30°C...          trend   
 53390    The number of men who are aged between 15 and...         number   
 
        Wrong_answer                                           variants  
 153493     the same  [state, dependable, consistent, steady, prospe...  
 83294     backwards  [cons, advantages, limitations, shortcomings, ...  
 77723        amount  [amount, quantity, level, part, value, count, ...  
 74220      tendency  [tendency, consistency, phenomenon, resurgence...  
 53390          part  [amount, quantity, level, value, count, rate, ...  ,
 (76, 4))

In [10]:
model_dfs["XGBAllFeats"].head(), model_dfs["XGBAllFeats"].shape

(                                          Masked_sentence   Right_answer  \
 153493   The amount of people who has no occupation in...         stable   
 83294   Some politicians have come up with an idea to ...  disadvantages   
 77723    As for disadvantages, global warming and air ...         number   
 74220    It is slightly below 30°C in Yakutsk and 30°C...          trend   
 53390    The number of men who are aged between 15 and...         number   
 
        Wrong_answer                                           variants  
 153493     the same  [state, dependable, consistent, steady, prospe...  
 83294     backwards  [cons, limitations, shortcomings, weaknesses, ...  
 77723        amount  [amount, quantity, level, part, value, count, ...  
 74220      tendency  [tendency, consistency, craze, fad, upsurge, p...  
 53390          part  [amount, quantity, level, value, count, propor...  ,
 (76, 4))

In [11]:
(
    gold_standard.index.equals(model_dfs["CatBoostFeatDrop"].index),
    gold_standard.index.equals(model_dfs["CatBoostVecsOnly"].index),
    gold_standard.index.equals(model_dfs["RandomForestFreqsOnly"].index),
    gold_standard.index.equals(model_dfs["XGBAllFeats"].index)
)

(True, True, True, True)

In [12]:
new_df = []

for idx in gold_standard.index:
    gs_row = gold_standard.loc[idx]
    appr = gs_row["Appropriate"]
    tg = gs_row["Too good"]
    tb = gs_row["Too bad"]
    sent = gs_row["Masked_sentence"]
    err = gs_row["Wrong_answer"]
    corr = gs_row["Right_answer"]

    new_row = dict()
    new_row["idx"] = idx
    new_row["Masked_sentence"] = sent
    new_row["Right_answer"] = corr
    new_row["Wrong_answer"] = err

    for model in model_dfs.keys():
        model_row = model_dfs[model].loc[idx]
        model_outputs = model_row["variants"][:TOP_K]
        model_tg = set(model_outputs) & set(tg)
        model_tb = set(model_outputs) & set(tb)
        model_appr = set(model_outputs) & set(appr)

        new_row[f"{model} Too good"] = list(model_tg)
        new_row[f"{model} Too bad"] = list(model_tb)
        new_row[f"{model} Appropriate"] = list(model_appr)

    new_df.append(new_row)

new_df = pd.DataFrame(new_df)

In [13]:
new_df.head()

Unnamed: 0,idx,Masked_sentence,Right_answer,Wrong_answer,CatBoostFeatDrop Too good,CatBoostFeatDrop Too bad,CatBoostFeatDrop Appropriate,CatBoostVecsOnly Too good,CatBoostVecsOnly Too bad,CatBoostVecsOnly Appropriate,RandomForestFreqsOnly Too good,RandomForestFreqsOnly Too bad,RandomForestFreqsOnly Appropriate,XGBAllFeats Too good,XGBAllFeats Too bad,XGBAllFeats Appropriate
0,153493,The amount of people who has no occupation in...,stable,the same,[steady],"[state, dependable]",[consistent],[steady],"[state, dependable]",[consistent],[steady],"[state, dependable]",[consistent],[steady],"[state, dependable]",[consistent]
1,83294,Some politicians have come up with an idea to ...,disadvantages,backwards,[shortcomings],[],"[limitations, cons]",[shortcomings],[],"[limitations, cons]",[shortcomings],[],"[limitations, cons]",[shortcomings],[],"[limitations, cons, weaknesses]"
2,77723,"As for disadvantages, global warming and air ...",number,amount,[],"[level, part]","[quantity, amount]",[],[value],"[quantity, count, amount]",[],"[level, part]","[quantity, amount]",[],"[level, part]","[quantity, amount]"
3,74220,It is slightly below 30°C in Yakutsk and 30°C...,trend,tendency,[],"[consistency, resurgence, phenomenon]",[tendency],[],"[consistency, resurgence, phenomenon]",[tendency],[],"[consistency, resurgence, phenomenon]",[tendency],[],"[fad, craze, consistency]",[tendency]
4,53390,The number of men who are aged between 15 and...,number,part,[],"[value, level]","[quantity, amount]",[],"[value, multitude]","[quantity, amount]",[],"[value, level]","[quantity, amount]",[],"[value, level]","[quantity, amount]"


In [14]:
new_df.to_csv("tables/model_outputs_comparison.csv", sep=";")