In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import seaborn as sns
from src.plots.latex import set_size, update_rcParams, HUE_ORDER

In [None]:
df = pd.read_parquet("../reports/optimizer.parquet")
print(len(df))
df["acc_diff"] = df["Acc1"] - df["Acc2"]
df["err1"] = 1 - df["Acc1"]
df["err-mean"] = 1 - df["Acc-mean"]
print(df["Optimizer"].unique())
df.head()

## EDA

In [None]:
with plt.style.context("seaborn"):
    g = sns.catplot(
        data=df[(df.Model == "GAT2017")],
        x="Optimizer",
        y="Value",
        hue="Dataset",
        col="Metric",
        kind="bar",
        col_wrap=3,
        hue_order=HUE_ORDER,
        sharey=False,
    )
# g.set(xscale="log")

In [None]:
with plt.style.context("seaborn"):
    g = sns.catplot(
        data=df[(df.Model == "GAT2017")],
        x="Optimizer",
        y="err1",
        hue="Dataset",
        hue_order=HUE_ORDER,
        kind="bar",
        # ci="sd",
    )

In [None]:
model = "GCN2017"
metric = "PI"
with plt.style.context("seaborn"):
    with plt.style.context({"axes.grid.which": "both"}):
        nrows, ncols = 1, 1
        # width, height = set_size(subplots=(nrows, ncols), fraction=1.)
        width, height = set_size(fraction=0.5)
        height = 3
        width = 1.5 * height
        fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
        # fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(height, width))


        pf = df.loc[(df["Metric"] == metric) & (df["Model"] == model) & (df["L2"] >= 0)]
        sns.lineplot(
            data=pf,
            x="L2eps",
            y="Value",
            hue="Dataset",
            hue_order=HUE_ORDER,
            # rows="Model",
            legend=True,
            ci=None,
            ax=axes,
        )
        axes.set_xscale("log", base=10)
        # axes.set_yscale("log")
        # axes.set_yticks([0.03, 0.1, 0.5])
        axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
        # xticks = list(sorted(pf["L2"].unique()))
        # axes.set_xticks(xticks)
        # axes.set_xticklabels(xticks, rotation=90)
        # axes[0].set_title("Disagreement $d$")
        axes.set_ylabel(f"{metric}")
        # axes[0].set_ylabel("")
        lgd = axes.legend(
            loc="lower right",
            ncol=4, 
            bbox_to_anchor=(1.85, -0.66),
        )
        # fig.savefig(f"../reports/dropout1_{model}.pdf", bbox_inches="tight")
        
        
        fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
        sns.lineplot(
            data=pf,
            x="L2eps",
            y="err1",
            hue="Dataset",
            hue_order=HUE_ORDER,
            # style="Model",
            ci=99,
            ax=axes,
            legend=True
        )
        axes.set_xscale("log", base=10)
        # axes[1].set_yscale("log")
        # xticks = list(sorted(pf["L2"].unique()))
        # axes.set_xticks(xticks)
        # axes.set_xticklabels(xticks, rotation=90)
        # axes.set_yscale("log")
        # axes.set_yticks([0.1, 0.2,0.3,0.4])
        axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
        # axes[1].set_title("Error Rate")
        axes.set_ylabel("Error Rate")
        # axes[1].set_ylabel("")
        # fig.tight_layout()
        # fig.set_figheight(15)
        # lgd = axes[1].legend(
        #     loc="lower right",
        #     ncol=4, 
        #     bbox_to_anchor=(0.5, -0.76),
        # )
        # fig.savefig(f"../reports/dropout2_{model}.pdf", bbox_inches="tight")

## Paper Plot

In [None]:
df.head(2)

In [None]:
models = ["GCN2017", "GAT2017"]
for model in models:
    with plt.style.context("seaborn"):
        with update_rcParams({"axes.grid.which": "both",  "lines.linewidth": 1, "lines.markersize": 5}):
            nrows, ncols = 1, 1
            # width, height = set_size(subplots=(nrows, ncols), fraction=1.)
            width, height = set_size(fraction=0.5)
            fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
            # fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(height, width))

            pf = df.loc[(df["Metric"] == "PI") & (df["Model"] == model)]
            sns.barplot(
                data=pf,
                x="Optimizer",
                y="Value",
                hue="Dataset",
                hue_order=HUE_ORDER,
                order=["Adam", "SGD-0.9M", "SGD-0.0M"],
                # rows="Model",
                # legend=True,
                ci=None,
                ax=axes,
            )
            # axes.set_yscale("log")
            # axes.set_yticks([0.03, 0.1, 0.5])
            # axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            # axes.set_xscale("log", base=10)
            # xticks = list(sorted(pf["L2eps"].unique()))
            # xticks = [1e-8, 1e-7, 1e-6, 1e-5,1e-4,1e-3]
            # axes.set_xticks(xticks)
            # axes.set_xticklabels([f'{int(np.log10(t))}' for t in xticks])
            # axes.get_xaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            # # axes[0].set_title("Disagreement $d$")
            # axes.set_xlabel("$\log_{10}$ L2 Regularization")
            axes.set_ylabel("Disagreement $d$")
            # # axes[0].set_ylabel("")
            lgd = axes.legend(
                # loc="lower right",
                loc="upper left",
                ncol=2, 
                # ncol=4, 
                # bbox_to_anchor=(1.85, -0.66),
            )
            fig.savefig(f"../reports/optim1_{model}.pdf", bbox_inches="tight")
            
            
            fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
            g = sns.barplot(
                data=pf,
                x="Optimizer",
                y="err-mean",
                hue="Dataset",
                order=["Adam", "SGD-0.9M", "SGD-0.0M"],
                hue_order=HUE_ORDER,
                # style="Model",
                ci=None,
                ax=axes,
                # legend=False,
            )
            axes.get_legend().remove()
            # axes.set_xscale("log", base=10)
            # axes[1].set_yscale("log")
            # xticks = list(sorted(pf["Layers"].unique()))
            # axes.set_xticks(xticks)
            # axes.set_xticklabels(xticks)
            # xticks = [1e-8, 1e-7, 1e-6, 1e-5,1e-4,1e-3]
            # axes.set_xticks(xticks)
            # axes.set_xticklabels([f'{int(np.log10(t))}' for t in xticks])
            # axes.set_yscale("log")
            # axes.set_yticks([0.1, 0.2,0.3,0.4])
            # axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            # axes[1].set_title("Error Rate")
            axes.set_ylabel("Error Rate")
            # axes.set_xlabel("$\log_{10}$ L2 Regularization")
            # axes[1].set_ylabel("")powe
            # fig.tight_layout()
            # fig.set_figheight(15)
            # lgd = axes[1].legend(
            #     loc="lower right",
            #     ncol=4, 
            #     bbox_to_anchor=(0.5, -0.76),
            # )
            fig.savefig(f"../reports/optim2_{model}.pdf", bbox_inches="tight")

In [None]:
models = ["GCN2017", "GAT2017"]
for model in models:
    with plt.style.context("seaborn"):
        with update_rcParams({"axes.grid.which": "both",  "lines.linewidth": 1, "lines.markersize": 5}):
            nrows, ncols = 1, 1
            # width, height = set_size(subplots=(nrows, ncols), fraction=1.)
            width, height = set_size(fraction=0.5)
            fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
            # fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(height, width))

            pf = df.loc[(df["Metric"] == "False PI") & (df["Model"] == model)]
            sns.barplot(
                data=pf,
                x="Optimizer",
                y="Value",
                hue="Dataset",
                hue_order=HUE_ORDER,
                order=["Adam", "SGD-0.9M", "SGD-0.0M"],
                # rows="Model",
                # legend=True,
                ci=None,
                ax=axes,
            )
            # axes.set_yscale("log")
            # axes.set_yticks([0.03, 0.1, 0.5])
            # axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            # axes.set_xscale("log", base=10)
            # xticks = list(sorted(pf["L2eps"].unique()))
            # xticks = [1e-8, 1e-7, 1e-6, 1e-5,1e-4,1e-3]
            # axes.set_xticks(xticks)
            # axes.set_xticklabels([f'{int(np.log10(t))}' for t in xticks])
            # axes.get_xaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            # # axes[0].set_title("Disagreement $d$")
            # axes.set_xlabel("$\log_{10}$ L2 Regularization")
            axes.set_ylabel("False Disagr. $d_{False}$")
            # # axes[0].set_ylabel("")
            lgd = axes.legend(
                # loc="lower right",
                loc="upper left",
                ncol=2, 
                # ncol=4, 
                # bbox_to_anchor=(1.85, -0.66),
            )
            fig.savefig(f"../reports/optim1_{model}_fpi.pdf", bbox_inches="tight")
            
        

In [None]:
models = ["GCN2017", "GAT2017"]
metric_to_name = {
    "PI": "Disagreement $d$",
    "NormPI": "Norm. Dis. $d_{Norm}$",
    "False PI": "False Dis. $d_{False}$",
    "True PI": "True Dis. $d_{True}$",
    "MAE": "MAE",
    "SymKL": "Symmetric KL-Div",
}

for model in df["Model"].unique():
    for metric in df["Metric"].unique():
        with plt.style.context("seaborn"):
            with update_rcParams({"axes.grid.which": "both",  "lines.linewidth": 1, "lines.markersize": 5}):
                nrows, ncols = 1, 1
                # width, height = set_size(subplots=(nrows, ncols), fraction=1.)
                width, height = set_size(fraction=0.5)
                fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
                # fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(height, width))

                pf = df.loc[(df["Metric"] == metric) & (df["Model"] == model)]
                sns.barplot(
                    data=pf,
                    x="Optimizer",
                    y="Value",
                    hue="Dataset",
                    hue_order=HUE_ORDER,
                    order=["Adam", "SGD-0.9M", "SGD-0.0M"],
                    # rows="Model",
                    # legend=True,
                    ci=None,
                    ax=axes,
                )
                # axes.set_yscale("log")
                # axes.set_yticks([0.03, 0.1, 0.5])
                # axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
                # axes.set_xscale("log", base=10)
                # xticks = list(sorted(pf["L2eps"].unique()))
                # xticks = [1e-8, 1e-7, 1e-6, 1e-5,1e-4,1e-3]
                # axes.set_xticks(xticks)
                # axes.set_xticklabels([f'{int(np.log10(t))}' for t in xticks])
                # axes.get_xaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
                # # axes[0].set_title("Disagreement $d$")
                # axes.set_xlabel("$\log_{10}$ L2 Regularization")
                axes.set_ylabel(metric_to_name[metric])

                # # axes[0].set_ylabel("")
                lgd = axes.legend(
                    # loc="lower right",
                    loc="best",
                    ncol=2, 
                    # ncol=4, 
                    # bbox_to_anchor=(1.85, -0.66),
                )
                fig.savefig(f"../reports/appendix/optim1_{model}_{metric}.pdf", bbox_inches="tight")
                
            