In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from src.plots.latex import set_size, update_rcParams, HUE_ORDER

In [None]:
df = pd.read_parquet("../reports/proportionalsplit.parquet")
print(len(df))
df["acc_diff"] = df["Acc1"] - df["Acc2"]
df["err1"] = 1 - df["Acc1"]
df["err-mean"] = 1 - df["Acc-mean"]
df["Train Size"] = 1 - df["Val Size"] - df["Test Size"]
df.head()

In [None]:
sns.relplot(
    data=df[(df.Model == "GCN2017")],
    x="Train Size",
    y="Value",
    hue="Dataset",
    col="Metric",
    kind="line",
    col_wrap=3,
    # ci="sd",
    facet_kws=dict(sharey=False)
)

In [None]:
sns.relplot(
    data=df[(df.Model == "GCN2017")],
    x="Train Size",
    y="err1",
    hue="Dataset",
    # col="Metric",
    kind="line",
    # col_wrap=3,
    # ci="sd",
    facet_kws=dict(sharey=False)
)

In [None]:
model = "GCN2017"
metric = "NormPI"
with plt.style.context("seaborn"):
    with plt.style.context(
        {"axes.grid.which": "both", "lines.linewidth": 1.0,}
    ):
        nrows, ncols = 1, 1
        width, height = set_size(fraction=0.5)
        height = 5
        width = 1.5 * height
        fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
        
        pf = df.loc[(df["Metric"] == metric) & (df["Model"] == model)]
        sns.lineplot(
            data=pf,
            x="Train Size",
            y="Value",
            hue="Dataset",
            hue_order=HUE_ORDER,
            legend=True,
            ax=axes,
        )
        axes.set_yscale("log")
        axes.set_xscale("log", base=10)
        axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
        xticks = list(sorted(pf["Train Size"].unique()))
        axes.set_xticks(xticks)
        axes.set_xticklabels([f"{t:.2f}" for t in xticks], rotation=30, horizontalalignment="right")
        axes.set_ylabel(f"{metric}")
        lgd = axes.legend(loc="lower right", ncol=4, bbox_to_anchor=(1.85, -0.66),)
        
        fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
        sns.lineplot(
            data=pf,
            x="Train Size",
            y="err1",
            hue="Dataset",
            hue_order=HUE_ORDER,
            ax=axes,
            legend=False,
        )
        xticks = list(sorted(pf["Train Size"].unique()))
        axes.set_xticks(xticks)
        axes.set_xticklabels([f"{t:.2f}" for t in xticks], rotation=30, horizontalalignment="right")
        axes.set_yscale("log")
        axes.set_xscale("log", base=10)
        axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
        axes.set_ylabel("Error Rate")
        

## Paper Plots

In [None]:
datasets = ["CiteSeer", "Pubmed", "CS", "Physics", "Computers", "Photo", "WikiCS"]
markers = ["X", "v", "o", "D", "s", "P", "^"]

for model in df["Model"].unique():
    with plt.style.context("seaborn"):
        with update_rcParams(
            {
                "axes.grid.which": "both",
                "lines.linewidth": 1.0, 
                "lines.markersize": 4,
            }
        ):
            nrows, ncols = 1, 1
            # width, height = set_size(subplots=(nrows, ncols), fraction=1.)
            width, height = set_size(fraction=0.5)
            fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
            # fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(height, width))

            pf = df.loc[(df["Metric"] == "PI") & (df["Model"] == model)]
            sns.lineplot(
                data=pf,
                x="Train Size",
                y="Value",
                hue="Dataset",
                hue_order=HUE_ORDER,
                # style="Model",
                legend=True,
                ci=None,
                ax=axes,
                style="Dataset",
                markers=markers,
                style_order=HUE_ORDER,
                dashes=False,
                # **{"marker": "^"}
            )
            # axes.set_xscale("log", base=10)
            axes.set_yscale("log")
            axes.set_yticks([0.01, 0.05, 0.1])
            axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            xticks = list(sorted(pf["Train Size"].unique()))
            axes.set_xscale("log", base=10)
            axes.set_xticks(xticks)
            axes.set_xticklabels([f"{t:.2f}" for t in xticks], rotation=30, horizontalalignment="right")
            axes.get_xaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            # axes[0].set_title("Disagreement $d$")
            axes.set_ylabel("Disagreement $d$")
            # axes[0].set_ylabel("")
            lgd = axes.legend(
                loc="lower right",
                ncol=4, 
                bbox_to_anchor=(1.85, -0.71),
            )
            fig.savefig(f"../reports/propsplit1_{model}.pdf", bbox_inches="tight")
            
            
            fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
            sns.lineplot(
                data=pf,
                x="Train Size",
                y="err-mean",
                hue="Dataset",
                hue_order=HUE_ORDER,
                # style="Model",
                ci=None,
                ax=axes,
                legend=False,
                style="Dataset",
                markers=markers,
                style_order=HUE_ORDER,
                dashes=False,
            )
            # axes.set_xscale("log", base=2)
            # axes.set_yscale("log")
            axes.set_xscale("log", base=10)
            xticks = list(sorted(pf["Train Size"].unique()))
            axes.set_xticks(xticks)
            axes.set_xticklabels([f"{t:.2f}" for t in xticks], rotation=30, horizontalalignment="right")
            axes.get_xaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            axes.set_yscale("log")
            axes.set_yticks([0.4, 0.1, 0.04])
            axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            # axes[1].set_title("Error Rate")
            axes.set_ylabel("Error Rate")
            # axes[1].set_ylabel("")
            # fig.tight_layout()
            # fig.set_figheight(15)
            # lgd = axes[1].legend(
            #     loc="lower right",
            #     ncol=4, 
            #     bbox_to_anchor=(0.5, -0.76),
            # )
            fig.savefig(f"../reports/propsplit2_{model}.pdf", bbox_inches="tight")

In [None]:
datasets = ["CiteSeer", "Pubmed", "CS", "Physics", "Computers", "Photo", "WikiCS"]
markers = ["X", "v", "o", "D", "s", "P", "^"]

for model in df["Model"].unique():
    with plt.style.context("seaborn"):
        with update_rcParams(
            {
                "axes.grid.which": "both",
                "lines.linewidth": 1.0, 
                "lines.markersize": 4,
            }
        ):
            nrows, ncols = 1, 1
            # width, height = set_size(subplots=(nrows, ncols), fraction=1.)
            width, height = set_size(fraction=0.5)
            fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
            # fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(height, width))

            pf = df.loc[(df["Metric"] == "False PI") & (df["Model"] == model)]
            sns.lineplot(
                data=pf,
                x="Train Size",
                y="Value",
                hue="Dataset",
                hue_order=HUE_ORDER,
                # style="Model",
                legend=True,
                ci=99,
                ax=axes,
                style="Dataset",
                markers=markers,
                style_order=HUE_ORDER,
                dashes=False,
                # **{"marker": "^"}
            )
            # axes.set_xscale("log", base=10)
            axes.set_yscale("log")
            axes.set_yticks([0.04, 0.1, 0.2])
            axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            xticks = list(sorted(pf["Train Size"].unique()))
            axes.set_xscale("log", base=10)
            axes.set_xticks(xticks)
            axes.set_xticklabels([f"{t:.2f}" for t in xticks], rotation=30, horizontalalignment="right")
            axes.get_xaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            # axes[0].set_title("Disagreement $d$")
            axes.set_ylabel("False Disagr. $d_{False}$")
            # axes[0].set_ylabel("")
            lgd = axes.legend(
                loc="lower right",
                ncol=4, 
                bbox_to_anchor=(1.85, -0.71),
            )
            fig.savefig(f"../reports/propsplit1_{model}_fpi.pdf", bbox_inches="tight")


In [None]:
datasets = ["CiteSeer", "Pubmed", "CS", "Physics", "Computers", "Photo", "WikiCS"]
markers = ["X", "v", "o", "D", "s", "P", "^"]
metric_to_name = {
    "PI": "Disagreement $d$",
    "NormPI": "Norm. Dis. $d_{Norm}$",
    "False PI": "False Dis. $d_{False}$",
    "True PI": "True Dis. $d_{True}$",
    "MAE": "MAE",
    "SymKL": "Symmetric KL-Div",
}

for ci, ci_txt in zip(["sd", None], ["sd", "nosd"]):
    for model in df["Model"].unique():
        for metric in df["Metric"].unique():
            with plt.style.context("seaborn"):
                with update_rcParams(
                    {
                        "axes.grid.which": "both",
                        "lines.linewidth": 1.0, 
                        "lines.markersize": 4,
                    }
                ):
                    nrows, ncols = 1, 1
                    # width, height = set_size(subplots=(nrows, ncols), fraction=1.)
                    width, height = set_size(fraction=0.5)
                    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(width, height))
                    # fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(height, width))

                    pf = df.loc[(df["Metric"] == metric) & (df["Model"] == model)]
                    sns.lineplot(
                        data=pf,
                        x="Train Size",
                        y="Value",
                        hue="Dataset",
                        hue_order=HUE_ORDER,
                        # style="Model",
                        legend=True,
                        ci=ci,
                        ax=axes,
                        style="Dataset",
                        markers=markers,
                        style_order=HUE_ORDER,
                        dashes=False,
                        # **{"marker": "^"}
                    )
                    # axes.set_xscale("log", base=10)
                    axes.set_yscale("log")
                    # axes.set_yticks([0.04, 0.1, 0.2])
                    axes.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
                    xticks = list(sorted(pf["Train Size"].unique()))
                    axes.set_xscale("log", base=10)
                    axes.set_xticks(xticks)
                    axes.set_xticklabels([f"{t:.2f}" for t in xticks], rotation=30, horizontalalignment="right")
                    axes.get_xaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
                    # axes[0].set_title("Disagreement $d$")
                    axes.set_ylabel(metric_to_name[metric])
                    # axes[0].set_ylabel("")
                    lgd = axes.legend(
                        loc="lower right",
                        ncol=3, 
                        bbox_to_anchor=(1, -0.7),
                    )
                    fig.savefig(f"../reports/appendix/propsplit1_{model}_{metric}_{ci_txt}.pdf", bbox_inches="tight")
