In [27]:
from fairseq.models.roberta import RobertaModel, RobertaHubInterface
from fairseq import hub_utils
from os import listdir
import pandas as pd

In [None]:
def load_model(brand, filename):
    loaded = hub_utils.from_pretrained(
        model_name_or_path="/my_roberta/my_models/"+brand,
        checkpoint_file=filename,
        data_name_or_path="/my_roberta/data-bin/"+brand,
        bpe="sentencepiece",
        sentencepiece_vocab='/my_roberta/my_data/{}/{}.spm.model.model'.format(brand, brand),
        load_checkpoint_heads=True,
        archive_map=RobertaModel.hub_models(),
        cpu=True
    )
    return RobertaHubInterface(loaded['args'], loaded['task'], loaded['models'][0])

In [178]:
sentence_keys = [
    {
        "sentence": "{} to <mask> polityk.",
        "keywords": ("Andrzej Duda", "Jarosław Kaczyński", "Donald Tusk"),
        "reference": {
            "good": [" "+x for x in ["ważny", "doświadczony", "dobry", "uczciwy", "silny", "skuteczny",
                                     "utalentowany", "mądry", "kompetentny", "doskonały"]],
            "bad": [" "+x for x in  ["niebezpieczny", "słaby", "zły", "arogancki", "nieudolny"]]
        }
    }
]

In [179]:
df_list = []
for brand in ['agora', 'tvp', 'ringier']:
    ten_last_models = list(sorted(listdir('/my_roberta/my_models/'+brand)))[-12:-2]
    for model_name in ten_last_models:
        model = load_model(brand, model_name)
        for sentence_key in sentence_keys:
            row = []
            for key in sentence_key["keywords"]:
                predictions = model.fill_mask(sentence_key["sentence"].format(key), 100)
                try:
                    good_p = sum([x[1] for x in predictions if x[2] in sentence_key["reference"]["good"]])
                    bad_p = sum([x[1] for x in predictions if x[2] in sentence_key["reference"]["bad"]])
                    score = good_p / (good_p+bad_p)
                    df_list.append([
                        brand,
                        model_name,
                        key,
                        score
                    ])
                except:
                    df_list.append([
                        brand,
                        model_name,
                        key,
                        None
                    ])

In [180]:
df = pd.DataFrame(df_list, columns = ['brand', 'model', 'keyword', 'score'])

In [181]:
df[
    (df['brand'] == 'tvp') &
    (df['keyword'] == 'Jarosław Kaczyński')

]

Unnamed: 0,brand,model,keyword,score
31,tvp,checkpoint364.pt,Jarosław Kaczyński,1.0
34,tvp,checkpoint365.pt,Jarosław Kaczyński,1.0
37,tvp,checkpoint366.pt,Jarosław Kaczyński,1.0
40,tvp,checkpoint367.pt,Jarosław Kaczyński,1.0
43,tvp,checkpoint368.pt,Jarosław Kaczyński,1.0
46,tvp,checkpoint369.pt,Jarosław Kaczyński,1.0
49,tvp,checkpoint370.pt,Jarosław Kaczyński,1.0
52,tvp,checkpoint371.pt,Jarosław Kaczyński,1.0
55,tvp,checkpoint372.pt,Jarosław Kaczyński,1.0
58,tvp,checkpoint373.pt,Jarosław Kaczyński,1.0


In [182]:
df\
    .groupby('brand')\
    .mean()

Unnamed: 0_level_0,score
brand,Unnamed: 1_level_1
agora,0.997874
ringier,0.986804
tvp,0.998448


In [183]:
results = df\
    .groupby(['brand', 'keyword'])\
    .mean()\
    .reset_index()\
    .pivot('keyword', 'brand', 'score')

In [184]:
results.head()

brand,agora,ringier,tvp
keyword,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Andrzej Duda,1.0,0.998645,1.0
Donald Tusk,0.997775,0.984653,0.995343
Jarosław Kaczyński,0.995849,0.977113,1.0


In [185]:
results.mean()

brand
agora      0.997874
ringier    0.986804
tvp        0.998448
dtype: float64

In [186]:
results_norm = (results - results.mean()) / results.std()

In [187]:
results_norm.head()

brand,agora,ringier,tvp
keyword,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Andrzej Duda,1.023163,1.083791,0.57735
Donald Tusk,-0.048059,-0.196863,-1.154701
Jarosław Kaczyński,-0.975104,-0.886928,0.57735


In [188]:
results_scaled = ((results_norm.T - results_norm.min(axis=1)) / (results_norm.max(axis=1) - results_norm.min(axis=1))).T

In [189]:
results_scaled.head()

brand,agora,ringier,tvp
keyword,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Andrzej Duda,0.880286,1.0,0.0
Donald Tusk,1.0,0.865536,0.0
Jarosław Kaczyński,0.0,0.056798,1.0
