In [55]:
import json
import glob
import pandas as pd
from IPython.core.display import display, HTML 

RUNS = []
for file in glob.glob("dumped-results/*.json"):
    with open(file) as f:
        data = json.load(f)
        c = list(data.keys())[0]
    RUNS.append({"corpus": c.split("--")[0], "model": c.split("--")[-1], **data[c]})

    
df = pd.DataFrame(RUNS)
filtered = df[["corpus", "model", "fscore", "fscore-positive", "recall-positive", "precision-positive"]].sort_values(by="recall-positive")
def whatisenriched(val):
    if "LinearEnriched" in val:
        return "Linear"
    if "EnriLSTM" in val:
        return "LSTM"
    if "EnriAttention" in val:
        return "Attention"
    return ""
def whatismsd(val):
    if "AggloMSD" in val:
        return "Agglo."
    if "NoMorph" in val:
        return ""
    return "Split"
def whatisencoder(val):
    if "EnriLSTM" in val:
        return "LSTM"
    if "EnriAttention" in val:
        return "Attention"
    if "HAN" in val:
        return "HAN"
    if "HAN" in val:
        return "HAN"
    if "AttentPool" in val:
        return "AttentPool"
    return ""

def whatmetadata(val):
    enriched = whatisenriched(val)
    if not enriched:
        return ""
    if "NoAuthorCitation" in val:
        return "Form,Century"
    if "NoAuthor" in val:
        return "Form,Century,Citation"
    return "All"
    
def whatmodel(val):
    if "FastText" in val:
        return "FastText"
    elif "Pretrained" in val:
        return "Word2Vec"
    return ""

def hidden(val):
    if "128" in val:
        return "256"
    return "128"

filtered["Enriched"] = [whatisenriched(val) for val in filtered["model"]]
filtered["MSD"] = [whatismsd(val) for val in filtered["model"]]
filtered["Encoder"] = [whatisencoder(val) for val in filtered["model"]]
filtered["Metadata"] = [whatmetadata(val) for val in filtered["model"]]
filtered["Pretrained"] = [whatmodel(val) for val in filtered["model"]]
filtered["Bert"] = filtered["model"].str.contains("Bert")
filtered["Size"] = [hidden(val) for val in filtered["model"]]


def get(filt):
    model = filtered[filt][[col for col in filtered.columns if col != "corpus"]]
    display(HTML(model.to_html()))
    display(HTML("<h3>Without all metadata</h3>"))
    #filtered[(filtered["Metadata"] != "All") & (filtered["Metadata"] != "Form,Century,Citation")]
    display(HTML(

        model[(
            model["Metadata"] != "All"
        ) & (
            model["Metadata"] != "Form,Century,Citation"
        )].to_html()
    ))

## Main models

In [56]:
get(filtered["corpus"].str.contains("model"))

Unnamed: 0,model,fscore,fscore-positive,recall-positive,precision-positive,Enriched,MSD,Encoder,Metadata,Pretrained,Bert,Size
70,LinearBertTokenOnlyWithHighway,0.775342,0.582474,0.450199,0.824818,,Split,,,,True,128
36,model-128-LinearBertTokenOnly,0.776813,0.585859,0.462151,0.8,,Split,,,,True,256
105,model-128-LinearBertTokenOnlyWithHighway,0.777847,0.58794,0.466135,0.795918,,Split,,,,True,256
44,LinearBertTokenOnly,0.780217,0.593137,0.482072,0.770701,,Split,,,,True,128
27,LinearVanillaAggloMSD-AttentPool-Pretrained,0.820632,0.668281,0.549801,0.851852,,Agglo.,AttentPool,,Word2Vec,False,128
17,model-128-LinearVanillaAggloMSD-EnriAttention,0.84108,0.706444,0.589641,0.880952,Attention,Agglo.,Attention,All,,False,256
34,model-128-LinearVanilla,0.831596,0.690583,0.613546,0.789744,,Split,,,,False,256
25,LinearVanillaAggloMSD,0.856086,0.73516,0.641434,0.860963,,Agglo.,,,,False,128
91,LinearVanillaAggloMSD-HAN,0.852922,0.730088,0.657371,0.820895,,Agglo.,HAN,,,False,128
72,model-128-LinearVanillaAggloMSD-AttentPool,0.856249,0.736142,0.661355,0.83,,Agglo.,AttentPool,,,False,256


Unnamed: 0,model,fscore,fscore-positive,recall-positive,precision-positive,Enriched,MSD,Encoder,Metadata,Pretrained,Bert,Size
70,LinearBertTokenOnlyWithHighway,0.775342,0.582474,0.450199,0.824818,,Split,,,,True,128
36,model-128-LinearBertTokenOnly,0.776813,0.585859,0.462151,0.8,,Split,,,,True,256
105,model-128-LinearBertTokenOnlyWithHighway,0.777847,0.58794,0.466135,0.795918,,Split,,,,True,256
44,LinearBertTokenOnly,0.780217,0.593137,0.482072,0.770701,,Split,,,,True,128
27,LinearVanillaAggloMSD-AttentPool-Pretrained,0.820632,0.668281,0.549801,0.851852,,Agglo.,AttentPool,,Word2Vec,False,128
34,model-128-LinearVanilla,0.831596,0.690583,0.613546,0.789744,,Split,,,,False,256
25,LinearVanillaAggloMSD,0.856086,0.73516,0.641434,0.860963,,Agglo.,,,,False,128
91,LinearVanillaAggloMSD-HAN,0.852922,0.730088,0.657371,0.820895,,Agglo.,HAN,,,False,128
72,model-128-LinearVanillaAggloMSD-AttentPool,0.856249,0.736142,0.661355,0.83,,Agglo.,AttentPool,,,False,256
79,LinearVanilla-NoMorph-EnriLSTM-Pretrained-NoAuthorCitation-FastText,0.86482,0.751678,0.669323,0.857143,LSTM,,LSTM,"Form,Century",FastText,False,128


# Metaphors

In [58]:
get((
    filtered["corpus"].str.contains("metaphors")
) & (
    ~filtered["corpus"].str.contains("inversed")
))

Unnamed: 0,model,fscore,fscore-positive,recall-positive,precision-positive,Enriched,MSD,Encoder,Metadata,Pretrained,Bert,Size
71,LinearVanilla-NoMorph-EnriLSTM-Pretrained-NoAuthorCitation,0.580797,0.266275,0.154108,0.978395,LSTM,,LSTM,"Form,Century",Word2Vec,False,128
89,LinearVanillaAggloMSD-HAN-Pretrained,0.592286,0.287495,0.168206,0.988571,,Agglo.,HAN,,Word2Vec,False,128
4,LinearBertTokenOnly,0.617832,0.335344,0.202722,0.969767,,Split,,,,True,128
93,LinearVanilla,0.620245,0.339501,0.205153,0.983683,,Split,,,,False,128
33,LinearVanillaAggloMSD-EnriAttention-Pretrained,0.620459,0.340152,0.206125,0.972477,Attention,Agglo.,Attention,All,Word2Vec,False,128
29,LinearVanillaAggloMSD,0.621939,0.342328,0.206612,0.997653,,Agglo.,,,,False,128
45,LinearVanillaAggloMSD-Pretrained,0.627415,0.352753,0.214876,0.98441,,Agglo.,,,Word2Vec,False,128
87,LinearBertTokenOnlyWithHighway,0.639578,0.375734,0.23335,0.963855,,Split,,,,True,128
99,LinearVanillaAggloMSD-AttentPool,0.641323,0.379095,0.236266,0.95858,,Agglo.,AttentPool,,,False,128
96,LinearVanillaAggloMSD-EnriAttention-Pretrained,0.647235,0.389922,0.244531,0.961759,Attention,Agglo.,Attention,All,Word2Vec,False,128


Unnamed: 0,model,fscore,fscore-positive,recall-positive,precision-positive,Enriched,MSD,Encoder,Metadata,Pretrained,Bert,Size
71,LinearVanilla-NoMorph-EnriLSTM-Pretrained-NoAuthorCitation,0.580797,0.266275,0.154108,0.978395,LSTM,,LSTM,"Form,Century",Word2Vec,False,128
89,LinearVanillaAggloMSD-HAN-Pretrained,0.592286,0.287495,0.168206,0.988571,,Agglo.,HAN,,Word2Vec,False,128
4,LinearBertTokenOnly,0.617832,0.335344,0.202722,0.969767,,Split,,,,True,128
93,LinearVanilla,0.620245,0.339501,0.205153,0.983683,,Split,,,,False,128
29,LinearVanillaAggloMSD,0.621939,0.342328,0.206612,0.997653,,Agglo.,,,,False,128
45,LinearVanillaAggloMSD-Pretrained,0.627415,0.352753,0.214876,0.98441,,Agglo.,,,Word2Vec,False,128
87,LinearBertTokenOnlyWithHighway,0.639578,0.375734,0.23335,0.963855,,Split,,,,True,128
99,LinearVanillaAggloMSD-AttentPool,0.641323,0.379095,0.236266,0.95858,,Agglo.,AttentPool,,,False,128
10,LinearVanillaAggloMSD-HAN-Pretrained,0.650062,0.395214,0.248906,0.958802,,Agglo.,HAN,,Word2Vec,False,128
30,LinearVanillaAggloMSD-AttentPool,0.657093,0.407834,0.258143,0.97075,,Agglo.,AttentPool,,,False,128


## Not metaphors

In [59]:
get((
    filtered["corpus"].str.contains("metaphors")
) & (
    filtered["corpus"].str.contains("inversed")
))

Unnamed: 0,model,fscore,fscore-positive,recall-positive,precision-positive,Enriched,MSD,Encoder,Metadata,Pretrained,Bert,Size
65,LinearVanillaAggloMSD-HAN,0.76309,0.549223,0.461874,0.677316,,Agglo.,HAN,,,False,128
95,LinearVanillaAggloMSD,0.772122,0.567127,0.492375,0.668639,,Agglo.,,,,False,128
8,LinearBertTokenOnly,0.764104,0.552727,0.496732,0.622951,,Split,,,,True,128
56,LinearVanilla,0.775209,0.573529,0.509804,0.655462,,Split,,,,False,128
47,LinearBertLemma-HAN,0.799044,0.617761,0.522876,0.754717,,Split,HAN,,,True,128
32,LinearBertTokenOnlyWithHighway,0.778675,0.581609,0.551198,0.615572,,Split,,,,True,128


Unnamed: 0,model,fscore,fscore-positive,recall-positive,precision-positive,Enriched,MSD,Encoder,Metadata,Pretrained,Bert,Size
65,LinearVanillaAggloMSD-HAN,0.76309,0.549223,0.461874,0.677316,,Agglo.,HAN,,,False,128
95,LinearVanillaAggloMSD,0.772122,0.567127,0.492375,0.668639,,Agglo.,,,,False,128
8,LinearBertTokenOnly,0.764104,0.552727,0.496732,0.622951,,Split,,,,True,128
56,LinearVanilla,0.775209,0.573529,0.509804,0.655462,,Split,,,,False,128
47,LinearBertLemma-HAN,0.799044,0.617761,0.522876,0.754717,,Split,HAN,,,True,128
32,LinearBertTokenOnlyWithHighway,0.778675,0.581609,0.551198,0.615572,,Split,,,,True,128
