In [17]:
import pandas as pd
import numpy as np

perf_amazon_reviews_multi_en = 'nlp_stitching-amazon_reviews_multi_en.csv'
perf_dbpedia_14 = 'nlp_stitching-dbpedia_14.csv'
perf_trec = 'nlp_stitching-trec.csv'


trec = pd.read_csv(perf_trec, sep="\t", index_col=0)
trec

dbpedia = pd.read_csv(perf_dbpedia_14, sep="\t", index_col=0)
dbpedia

amazon = pd.read_csv(perf_amazon_reviews_multi_en, sep="\t", index_col=0)
amazon

Unnamed: 0,seed,embed_type,embed_transformer,classifier_transformer,precision,recall,fscore,stitched
0,0,absolute,bert-base-cased,bert-base-cased,0.576667,0.5718,0.572847,False
1,0,absolute,bert-base-cased,bert-base-uncased,0.211683,0.2100,0.209411,True
2,0,absolute,bert-base-cased,google/electra-base-discriminator,0.170813,0.1740,0.171458,True
3,0,absolute,bert-base-cased,roberta-base,0.180815,0.1868,0.181710,True
4,0,absolute,bert-base-cased,xlm-roberta-base,0.165387,0.1722,0.165767,True
...,...,...,...,...,...,...,...,...
245,4,relative,xlm-roberta-base,bert-base-cased,0.483128,0.4848,0.472236,True
246,4,relative,xlm-roberta-base,bert-base-uncased,0.469431,0.4768,0.459729,True
247,4,relative,xlm-roberta-base,google/electra-base-discriminator,0.435226,0.4546,0.436225,True
248,4,relative,xlm-roberta-base,roberta-base,0.459044,0.4700,0.455128,True


In [28]:
def display_results(df):
    o = df.drop(columns=['seed', 'precision', 'recall', 'embed_transformer', 'classifier_transformer']).groupby(['embed_type', 'stitched']).agg([np.mean, np.std, 'count'])
    print(o)
    return o

In [29]:
trec_df = display_results(trec)

                       fscore                
                         mean       std count
embed_type stitched                          
absolute   False     0.913593  0.013512    25
           True      0.215081  0.038997   100
relative   False     0.872361  0.024718    25
           True      0.744857  0.053133   100


In [30]:
dbpedia_df = display_results(dbpedia)

                       fscore                
                         mean       std count
embed_type stitched                          
absolute   False     0.985899  0.005201    25
           True      0.070190  0.014582   100
relative   False     0.974862  0.018109    25
           True      0.807910  0.191644   100


In [31]:
amazon_df = display_results(amazon)

                       fscore                
                         mean       std count
embed_type stitched                          
absolute   False     0.591791  0.014637    25
           True      0.193744  0.017599   100
relative   False     0.558625  0.015485    25
           True      0.442808  0.029609   100


In [60]:
def latex_float(f):
    float_str = "{0:.2f}".format(f)
    if "e" in float_str:
        base, exponent = float_str.split("e")
        return r"{0} \times 10^{{{1}}}".format(base, int(exponent))
    else:
        return float_str

def extract_mean_std(df: pd.DataFrame, model_type: str, stitching: bool) -> str:
    df = df*100
    df = df.round(2)
    try:
        mean_std = df.loc[model_type, stitching]
        return rf"${latex_float(mean_std['fscore']['mean'])} \pm {latex_float(mean_std['fscore']['std'])}$"
    except (AttributeError, KeyError):
        return "?"
    
classification_rel = r"{} & {} & {} & {} & {} \\[1ex]"

for available_model_type in   ("absolute", "relative"):
    for stitching, stit_name in zip([False, True, ], [ 'Non-Stitch', 'Stitch',]):

        s = classification_rel.format(
            available_model_type, 
            stit_name,
            *[
                extract_mean_std(df,  available_model_type, stitching)
                for df in [trec_df, dbpedia_df, amazon_df]
            ],
        )
        print(s)

absolute & Non-Stitch & $91.36 \pm 1.35$ & $98.59 \pm 0.52$ & $59.18 \pm 1.46$ \\[1ex]
absolute & Stitch & $21.51 \pm 3.90$ & $7.02 \pm 1.46$ & $19.37 \pm 1.76$ \\[1ex]
relative & Non-Stitch & $87.24 \pm 2.47$ & $97.49 \pm 1.81$ & $55.86 \pm 1.55$ \\[1ex]
relative & Stitch & $74.49 \pm 5.31$ & $80.79 \pm 19.16$ & $44.28 \pm 2.96$ \\[1ex]


In [36]:
amazon_df.reset_index()

Unnamed: 0_level_0,embed_type,stitched,fscore,fscore,fscore
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count
0,absolute,False,0.591791,0.014637,25
1,absolute,True,0.193744,0.017599,100
2,relative,False,0.558625,0.015485,25
3,relative,True,0.442808,0.029609,100


In [46]:
amazon_df.loc['absolute', True]['fscore']['mean']

0.1937444515128519