In [2]:
import sys
import os

thesis_path = "/" + os.path.join(
    *os.getcwd().split(os.path.sep)[:-1])
sys.path.append(thesis_path)


In [None]:
from typing import Dict, List
import pandas as pd
import pickle 
from glob import glob
from models.metrics import ASPMetrics
import json
import os

def get_per_sample_metrics():
    metrics_file_path = os.path.join(thesis_path, "evaluations", "metrics", "01_performance_per_sample_metrics.pkl")
    if not os.path.exists(metrics_file_path):
        datasets: Dict[str,List[dict]] = {}
        for dataset_name, dataset_path in [
            ("lowner_train", os.path.join(thesis_path, "data", "mlowner",
                                "lowner_train.json")),
            ("lowner_dev", os.path.join(thesis_path, "data", "mlowner",
                                "lowner_dev.json")),
            ("lowner_test", os.path.join(thesis_path, "data", "mlowner",
                                "lowner_test.json")),
        ]:
            with open(dataset_path, "r", encoding="utf-8") as file:
                datasets[dataset_name] = json.load(file)
        with open(os.path.join(thesis_path, "data", "mlowner",
                                "lowner_types.json"), "r", encoding="utf-8") as file:
            types = json.load(file)
        
        all_metrics = []
        for metrics_filepath in glob(os.path.join(thesis_path, "experiments", "01_performance", "data", r"**", "04_metrics", "**", r"*.pkl"), recursive=True):
            fp = metrics_filepath.split(os.path.sep)
            with open(metrics_filepath, "rb") as file:
                metrics: ASPMetrics = pickle.load(file)
            dataset = fp[-1][5:].split(".")[0]
            seed = fp[-4][-1]
            model = fp[-2]
            checkpoint = fp[-1][:4]
            for sample_metrics in metrics.metrics_per_sample(datasets[dataset], types).to_dict(orient="records"):
                all_metrics.append({
                    "seed": seed,
                    "model": model,
                    "checkpoint": checkpoint,
                    "dataset": dataset,
                    **sample_metrics
                })
        metrics_df = pd.DataFrame.from_records(all_metrics)
        metrics_df.to_pickle(metrics_file_path)
    else:
        metrics_df = pd.read_pickle(metrics_file_path)
    return metrics_df

In [None]:
metrics_df = get_per_sample_metrics()

In [25]:
import pandas as pd
import pickle 
from glob import glob
from models.metrics import ASPMetrics

all_metrics = []

for metrics_filepath in glob(os.path.join(thesis_path, "experiments", "01_performance", "data", r"**", "04_metrics", "**", r"*.pkl"), recursive=True):
    fp = metrics_filepath.split(os.path.sep)
    with open(metrics_filepath, "rb") as file:
        metrics: ASPMetrics = pickle.load(file)
    f1 = float(metrics.f1().cpu().numpy())
    all_metrics.append({
        "seed": fp[-4][-1],
        "model": fp[-2],
        "checkpoint": fp[-1][:4],
        "dataset": fp[-1][5:].split(".")[0],
        "f1": f1
    })
all_metrics

[{'seed': '2',
  'model': 't5_asp_lownergaz',
  'checkpoint': 'best',
  'dataset': 'lowner_dev',
  'f1': 0.8342000246047974},
 {'seed': '2',
  'model': 't5_asp_lownergaz',
  'checkpoint': 'last',
  'dataset': 'lowner_train',
  'f1': 0.9279000163078308},
 {'seed': '2',
  'model': 't5_asp_lownergaz',
  'checkpoint': 'best',
  'dataset': 'lowner_test',
  'f1': 0.8389000296592712},
 {'seed': '2',
  'model': 't5_asp_lownergaz',
  'checkpoint': 'last',
  'dataset': 'lowner_test',
  'f1': 0.8457000255584717},
 {'seed': '2',
  'model': 't5_asp_lownergaz',
  'checkpoint': 'last',
  'dataset': 'lowner_dev',
  'f1': 0.8327999711036682},
 {'seed': '2',
  'model': 't5_asp_lownergaz',
  'checkpoint': 'best',
  'dataset': 'lowner_train',
  'f1': 0.9132000207901001},
 {'seed': '2',
  'model': 't5_asp_gaz',
  'checkpoint': 'best',
  'dataset': 'lowner_dev',
  'f1': 0.8737999796867371},
 {'seed': '2',
  'model': 't5_asp_gaz',
  'checkpoint': 'last',
  'dataset': 'lowner_train',
  'f1': 0.973599970340728

In [26]:
metrics_df = pd.DataFrame.from_records(all_metrics)
metrics_df.head()

Unnamed: 0,seed,model,checkpoint,dataset,f1
0,2,t5_asp_lownergaz,best,lowner_dev,0.8342
1,2,t5_asp_lownergaz,last,lowner_train,0.9279
2,2,t5_asp_lownergaz,best,lowner_test,0.8389
3,2,t5_asp_lownergaz,last,lowner_test,0.8457
4,2,t5_asp_lownergaz,last,lowner_dev,0.8328


In [68]:
metrics_df.iloc[metrics_df[metrics_df["dataset"] == "lowner_test"]["f1"].idxmax()]

seed                              1
model         t5_asp_lownergaz_sent
checkpoint                     last
dataset                 lowner_test
f1                           0.8963
Name: 93, dtype: object

In [50]:
avg_f1 = (metrics_df.pivot_table(index=["model", "checkpoint", "dataset"], values="f1", aggfunc="mean") * 100).reset_index()
std_f1 = (metrics_df.pivot_table(index=["model", "checkpoint", "dataset"], values="f1", aggfunc="std") * 100).reset_index()
avg_f1.head(), std_f1.head()

(    model checkpoint       dataset         f1
 0  t5_asp       best    lowner_dev  85.423334
 1  t5_asp       best   lowner_test  85.496666
 2  t5_asp       best  lowner_train  97.253335
 3  t5_asp       last    lowner_dev  85.279999
 4  t5_asp       last   lowner_test  85.483332,
     model checkpoint       dataset        f1
 0  t5_asp       best    lowner_dev  0.285014
 1  t5_asp       best   lowner_test  0.149779
 2  t5_asp       best  lowner_train  0.175026
 3  t5_asp       last    lowner_dev  0.199749
 4  t5_asp       last   lowner_test  0.161658)

In [56]:
dev_f1 = avg_f1[(avg_f1["checkpoint"]=="last")  & (avg_f1["dataset"] == "lowner_dev")].rename(columns={"f1": "avg_f1"})
dev_f1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dev_f1.rename(columns={"f1": "avg_f1"}, inplace=True)


Unnamed: 0,model,checkpoint,dataset,avg_f1
3,t5_asp,last,lowner_dev,85.279999
9,t5_asp_gaz,last,lowner_dev,87.273331
15,t5_asp_gaz_sent,last,lowner_dev,88.05
21,t5_asp_lownergaz,last,lowner_dev,83.676666
27,t5_asp_lownergaz_sent,last,lowner_dev,89.193334
33,t5_asp_sent,last,lowner_dev,86.353334


In [61]:
pd.concat([dev_f1, std_f1[(std_f1["checkpoint"]=="last")  & (std_f1["dataset"] == "lowner_dev")][["f1"]].rename(columns={"f1": "std_f1"})], axis=1)

Unnamed: 0,model,checkpoint,dataset,avg_f1,std_f1
3,t5_asp,last,lowner_dev,85.279999,0.199749
9,t5_asp_gaz,last,lowner_dev,87.273331,0.151437
15,t5_asp_gaz_sent,last,lowner_dev,88.05,0.104403
21,t5_asp_lownergaz,last,lowner_dev,83.676666,0.376876
27,t5_asp_lownergaz_sent,last,lowner_dev,89.193334,0.34588
33,t5_asp_sent,last,lowner_dev,86.353334,0.196301
