In [1]:
import pandas as pd
import wandb
import matplotlib.pyplot as plt
import numpy as np
import os
import os.path as osp
import matplotlib

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

plt.style.use(["science", "ieee"])
api = wandb.Api()
out_dir = "../output/analysis_pnml_from_pretrained"
os.makedirs(out_dir, exist_ok=True)

# Project is specified by <entity/project-name>
runs = api.runs("kobybibas/pnml_for_dnn")


FileNotFoundError: [Errno 2] No such file or directory: '../output/analysis_pnml_from_pretrained'

In [2]:
total_samples = 600
if False: # 3 epochs
    # prune 0.7
    run = api.run("kobybibas/pnml_for_dnn/kqitnlwh")
    run.file("res_df.pkl").download(replace=True)
    prune_0_df = pd.read_pickle("res_df.pkl")

    # prune 0.8
    run = api.run("kobybibas/pnml_for_dnn/i7flad6i")
    run.file("res_df.pkl").download(replace=True)
    prune_1_df = pd.read_pickle("res_df.pkl")

    # prune 0.9
    run = api.run("kobybibas/pnml_for_dnn/3o6srigx")
    run.file("res_df.pkl").download(replace=True)
    prune_2_df = pd.read_pickle("res_df.pkl")

    # prune 0.95
    run = api.run("kobybibas/pnml_for_dnn/3htljc6s")
    run.file("res_df.pkl").download(replace=True)
    prune_3_df = pd.read_pickle("res_df.pkl")

    # prune 0.99
    run = api.run("kobybibas/pnml_for_dnn/11auktcw")
    run.file("res_df.pkl").download(replace=True)
    prune_4_df = pd.read_pickle("res_df.pkl")

else:
    # prune 0.7
    run = api.run("kobybibas/pnml_for_dnn/mbqz5e61")
    run.file("res_df.pkl").download(replace=True)
    prune_0_df = pd.read_pickle("res_df.pkl").iloc[:total_samples]

    # prune 0.8
    run = api.run("kobybibas/pnml_for_dnn/1guf6xf2")
    run.file("res_df.pkl").download(replace=True)
    prune_1_df = pd.read_pickle("res_df.pkl").iloc[:total_samples]

    # prune 0.9
    run = api.run("kobybibas/pnml_for_dnn/1lrf17di")
    run.file("res_df.pkl").download(replace=True)
    prune_2_df = pd.read_pickle("res_df.pkl").iloc[:total_samples]

    # prune 0.95
    run = api.run("kobybibas/pnml_for_dnn/1t1ydpao")
    run.file("res_df.pkl").download(replace=True)
    prune_3_df = pd.read_pickle("res_df.pkl").iloc[:total_samples]

    # prune 0.99
    run = api.run("kobybibas/pnml_for_dnn/3swolu27")
    run.file("res_df.pkl").download(replace=True)
    prune_4_df = pd.read_pickle("res_df.pkl").iloc[:total_samples]

df_dict = {
    "0.7": prune_0_df,
    "0.8": prune_1_df,
    "0.9": prune_2_df,
    "0.95": prune_3_df,
    "0.99": prune_4_df,
}


In [3]:
performance_list = []
for prune_amount, df in df_dict.items():
    performance_list.append(
        {
            "Prune amount": prune_amount,
            "pNML accuracy (%)": 100 * df["pnml_is_correct"].mean(),
            "ERM accuracy (%)": 100 * df["erm_is_correct"].mean(),
            "pNML logloss": df["pnml_logloss"].mean(),
            "ERM logloss": df["erm_logloss"].mean(),
            "pNML max logloss": df["pnml_logloss"].max(),
            "ERM max logloss": df["erm_logloss"].max(),
            "Regret": df["pnml_regret"].mean(),
            'Test set size': len(df)
        }
    )

performance_df = pd.DataFrame(performance_list).round(2).set_index("Prune amount")
performance_df


Unnamed: 0_level_0,pNML accuracy (%),ERM accuracy (%),pNML logloss,ERM logloss,pNML max logloss,ERM max logloss,Regret,Test set size
Prune amount,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.7,94.17,94.17,0.16,0.24,10.36,14.47,0.06,600
0.8,94.33,95.17,0.13,0.18,10.09,11.76,0.04,600
0.9,93.83,93.83,0.18,0.25,11.07,13.97,0.05,600
0.95,96.5,96.33,0.13,0.15,10.62,11.69,0.02,600
0.99,91.33,91.0,0.22,0.22,5.36,5.52,0.0,600


In [7]:
bins = np.linspace(0.0, np.log(2), 100)

regret_max = max([df["pnml_regret"].max() for df in df_dict.values()])
print(
    f"Max {regret_max=:.3f}. Theoretical max: {np.log(2)=:.3f}. Difference={regret_max-np.log(2)=} "
)
fig, axs = plt.subplots(len(df_dict), 1, dpi=200, sharex=True, sharey=True)

for i, (prune_amount, df) in enumerate(df_dict.items()):
    ax = axs[i]
    ax.hist(
        df["pnml_regret"], bins=bins, alpha=1.0, color=f"C0",
    )
    ax.set_yscale("log")
    ax.set_ylabel("Count")
    ax.text(
        0.02,
        0.85,
        f"Prune amount: {float(prune_amount):3.2f}",
        ha="left",
        va="top",
        transform=ax.transAxes,
    )
axs[-1].set_xlabel("Regret")
plt.xlim(0, np.log(2) + 0.05)
plt.tight_layout()
plt.savefig(osp.join(out_dir, "regret_hist.pdf"))
plt.show()


Max regret_max=0.661. Theoretical max: np.log(2)=0.693. Difference=regret_max-np.log(2)=-0.032520649912667454 


In [None]:
# Regret for pNML correctly classifed samples
bins = np.linspace(0.0, np.log(2), 100)
fig, axs = plt.subplots(len(df_dict), 1, dpi=200, sharex=True, sharey=True)

for i, (prune_amount, df) in enumerate(df_dict.items()):
    regret_for_correct = df[df['pnml_is_correct']==True]["pnml_regret"]
    regret_for_incorrect = df[df['pnml_is_correct']==False]["pnml_regret"]

    ax = axs[i]
    ax.hist(
        regret_for_incorrect, bins=bins, alpha=0.75, color=f"C3",label='pNML incorrectly classified',density=True
    )
    ax.hist(
        regret_for_correct, bins=bins, alpha=0.25, color=f"C2",label='pNML correctly classified',density=True
    )

    ax.set_yscale("log")
    ax.set_ylabel("Frequency")
    ax.text(
        0.02,
        0.85,
        f"Prune amount: {float(prune_amount):3.2f}",
        ha="left",
        va="top",
        transform=ax.transAxes,
    )
axs[-1].set_xlabel("Regret")
axs[0].legend(
    loc="upper center",
    bbox_to_anchor=(0.5, 1.75),
    frameon=True,
    shadow=False,
    ncol=2,
    prop={"size": 6},
)
plt.xlim(0, np.log(2) + 0.05)
plt.tight_layout()
plt.savefig(osp.join(out_dir, "regret_hist_for_pnml_correct_and_incorrect.pdf"))
plt.show()


In [None]:
# Regret for ERM correctly classifed samples
bins = np.linspace(0.0, np.log(2), 100)
fig, axs = plt.subplots(len(df_dict), 1, dpi=200, sharex=True, sharey=True)

for i, (prune_amount, df) in enumerate(df_dict.items()):
    regret_for_correct = df[df["erm_is_correct"] == True]["pnml_regret"]
    regret_for_incorrect = df[df["erm_is_correct"] == False]["pnml_regret"]

    ax = axs[i]
    ax.hist(
        regret_for_incorrect,
        bins=bins,
        alpha=0.75,
        color=f"C3",
        label="ERM incorrectly classified",
        density=True
    )
    ax.hist(
        regret_for_correct,
        bins=bins,
        alpha=0.25,
        color=f"C2",
        label="ERM correctly classified",
        density=True
    )

    ax.set_yscale("log")
    ax.set_ylabel("Frequency")
    ax.text(
        0.02,
        0.85,
        f"Prune amount: {float(prune_amount):3.2f}",
        ha="left",
        va="top",
        transform=ax.transAxes,
    )
axs[-1].set_xlabel("Regret")
axs[0].legend(
    loc="upper center",
    bbox_to_anchor=(0.5, 1.75),
    frameon=True,
    shadow=False,
    ncol=2,
    prop={"size": 6},
)
plt.xlim(0, np.log(2) + 0.05)
plt.tight_layout()
plt.savefig(osp.join(out_dir, "regret_hist_for_erm_correct_and_incorrect.pdf"))
plt.show()


In [None]:
# Logloss
loss_max = max(
    [df["pnml_logloss"].max() for df in df_dict.values()]
    + [df["erm_logloss"].max() for df in df_dict.values()]
)
loss_min = min(
    [df["pnml_logloss"].min() for df in df_dict.values()]
    + [df["erm_logloss"].min() for df in df_dict.values()]
)

bins = np.logspace(-9, np.log10(loss_max), 100)
bins = np.linspace(0.0, loss_max, 100)


fig, axs = plt.subplots(len(df_dict), 1, dpi=200, sharex=True, sharey=True)

for i, (prune_amount, df) in enumerate(df_dict.items()):
    ax = axs[i]
    ax.hist(
        df["pnml_logloss"], bins=bins, label=f"pNML", alpha=1.0, color="C0",
    )
    ax.hist(
        df["erm_logloss"], bins=bins, alpha=0.5, color="C1", label=f"ERM",
    )

    ax.text(
        0.975,
        0.8,
        f"Prune amount: {float(prune_amount):3.2f}",
        ha="right",
        va="top",
        transform=ax.transAxes,
    )
    ax.set_yscale("log")
    ax.set_ylabel("Count")
axs[0].legend(
    loc="upper center",
    bbox_to_anchor=(0.5, 1.75),
    frameon=True,
    shadow=False,
    ncol=2,
    prop={"size": 6},
)
axs[-1].set_xlabel(f"Logloss")
axs[-1].set_xlim(left=0.0)
plt.tight_layout()
plt.savefig(osp.join(out_dir,'logloss_hist.pdf'))
plt.show()


In [None]:
bins = np.linspace(0.0, 1.0, 100)


fig, axs = plt.subplots(len(df_dict), 1, dpi=200, sharex=True, sharey=True)

for i, (prune_amount, df) in enumerate(df_dict.items()):
    pnml_probs_for_correct = df.apply(
        lambda row: row["pnml_probs"][int(row["test_true_label"])], axis=1
    )
    erm_probs_for_correct = df.apply(
        lambda row: row["erm_probs"][int(row["test_true_label"])], axis=1
    )

    ax = axs[i]
    ax.hist(
        pnml_probs_for_correct, bins=bins, label=f"pNML", alpha=1.0, color="C0",
    )
    ax.hist(
        erm_probs_for_correct, bins=bins, alpha=0.5, color="C1", label=f"ERM",
    )

    ax.text(
        0.02,
        0.85,
        f"Prune amount: {float(prune_amount):3.2f}",
        ha="left",
        va="top",
        transform=ax.transAxes,
    )
    ax.set_yscale("log")
    ax.set_ylabel("Count")
axs[0].legend(
    loc="upper center",
    bbox_to_anchor=(0.5, 1.75),
    frameon=True,
    shadow=False,
    ncol=2,
    prop={"size": 6},
)
axs[-1].set_xlabel(f"Probability assignment for the correct label")
axs[-1].set_xlim(left=0.0)
plt.tight_layout()
plt.savefig(osp.join(out_dir,'prob_for_correct_label_hist.pdf'))
plt.show()
