In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme()

## Non-Incremental vs. Incremental Training (Figure)

In [None]:
fig, axs = plt.subplots(nrows=2, ncols=4, figsize=(18, 9), dpi=300)
fig.tight_layout(pad=5.0)

input_type = "norm"
split = "test"
n_partialss = [1, 4]
metrics = ["macro_f1", "micro_f1", "cohen_kappa", "mcc", "hat3", "hat5", "hat8"]

metric_dfs = dict()
for i, metric in enumerate(metrics):
    row = i // axs.shape[1]
    col = i % axs.shape[1]
    ax = axs[row][col]
    ax.set_title(metric, fontsize=14)
    ax.set_xlabel("EMR Proportion")
    ax.set_ylabel("Performance")
    # load data for plotting
    dfs = list()
    for n_partials in n_partialss:
        df = pd.read_csv(f"../../training/dx/models_increment/encoder-BioLinkBERT__optimizer-AdamW__scheduler-linear__lr-5e-05__n_partials-{n_partials}__input_type-{input_type}__label_type-outicd__scheme-everyk/eval_results/incremental_{split}_{metric}.csv", index_col="prop")
        df = df[metric].to_frame().rename({metric: "non-incremental traning" if n_partials == 1 else "incremental training"}, axis="columns")
        df.index.rename(name="Proportion of input clinical findings", inplace=True)
        dfs.append(df)
    metric_df = pd.concat(objs=dfs, axis="columns")
    metric_dfs[metric] = metric_df
    metric_df.plot(ax=ax).legend(loc="lower right")

## Non-Incremental vs. Incremental Training (Table)

In [None]:
metric_cols = list()

for metric in metrics:
    metric_col = metric_dfs[metric].mean(axis=0).to_frame().rename({0: metric}, axis=1)
    metric_cols.append(metric_col)

pd.concat(metric_cols, axis=1)