In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
results_dir = "../graphconformal_sweep/"
datasets = os.listdir(results_dir)
sns.set(font_scale=1.4, style="white")

## Split 

In [None]:
all_results = []
for dataset in datasets:
    for train_frac in [0.2, 0.3]:
        for valid_frac in [0.2, 0.1]:
            for alpha in [0.1, 0.2, 0.3, 0.4]:
                split_dir = os.path.join(results_dir, dataset, "split", f"{train_frac}_{valid_frac}", f"alpha_{alpha}")
                methods = os.listdir(split_dir)
                for method in methods:
                    if "_params" not in method:
                        method_name = method.split(".")[0]
                        result_file = os.path.join(split_dir, method)
                        results = pd.read_csv(result_file)
                        results["dataset"] = dataset
                        results["train_frac"] = train_frac
                        results["valid_frac"] = valid_frac
                        results["alpha"] = alpha
                        results["method"] = method_name
                        all_results.append(results)

In [None]:
all_results = []
for dataset in datasets:
    for nspc in [10, 20, 40, 80]:
        for alpha in [0.1, 0.2, 0.3, 0.4]:
            split_dir = os.path.join(results_dir, dataset, "nspc", f"{nspc}", f"alpha_{alpha}")
            if not os.path.exists(split_dir):
                continue
            methods = os.listdir(split_dir)
            for method in methods:
                if "_params" not in method:
                    method_name = method.split(".")[0]
                    result_file = os.path.join(split_dir, method)
                    results = pd.read_csv(result_file)
                    results["dataset"] = dataset
                    results["nspc"] = nspc
                    results["alpha"] = alpha
                    results["method"] = method_name
                    all_results.append(results)

In [None]:
full_df = pd.concat(all_results)

In [None]:
full_df.method.unique()

In [None]:
basic_methods = ["tps", "tps_classwise", "aps_randomized", "daps", "raps", "naps_uniform", "naps_hyperbolic", "naps_exponential"]

In [None]:
# small datasets
small_datasets = ["CiteSeer", "Amazon_Photos"]
small_df = full_df[full_df["dataset"].isin(small_datasets)]
small_df = small_df[small_df["method"].isin(basic_methods)]

ax = sns.relplot(data=small_df, x="alpha", y="efficiency", hue="method", col="dataset", kind="line")
ax.savefig("figures/small_datasets_efficiency.pdf")

In [None]:
# Medium Datasets 1
med_1_datasets = ["Cora", "PubMed"]
med_1_df = full_df[full_df["dataset"].isin(med_1_datasets)]
med_1_df = med_1_df[med_1_df["method"].isin(basic_methods)]

ax = sns.relplot(data=med_1_df, x="alpha", y="efficiency", hue="method", col="dataset", kind="line", ax=ax)
ax.savefig("figures/med_1_datasets_efficiency.pdf")

In [None]:
# Medium Datasets 2
med_2_datasets = ["Coauthor_CS", "Coauthor_Physics"]
med_2_df = full_df[full_df["dataset"].isin(med_2_datasets)]
med_2_df = med_2_df[med_2_df["method"].isin(basic_methods)]

ax = sns.relplot(data=med_2_df, x="alpha", y="efficiency", hue="method", col="dataset", kind="line", ax=ax)
ax.savefig("figures/med_2_datasets_efficiency.pdf")

In [None]:
# Large Datasets 1
large_datasets = ["Flickr"]
large_df = full_df[full_df["dataset"].isin(large_datasets)]
large_df = large_df[large_df["method"].isin(basic_methods)]

ax = sns.relplot(data=large_df, x="alpha", y="efficiency", hue="method", col="dataset", kind="line")#, ax=ax)
ax.savefig("figures/large_datasets_nspc_efficiency.pdf")

In [None]:
# OGB Datasets
ogb_datasets = ["ogbn-arxiv"]#, "ogbn-products"]
ogb_df = full_df[full_df["dataset"].isin(ogb_datasets)]
ogb_df = ogb_df[ogb_df["method"].isin(basic_methods)]

ax = sns.relplot(data=ogb_df, x="alpha", y="efficiency", hue="method", col="dataset", kind="line")#, ax=ax)
ax.savefig("figures/ogb_datasets_nspc_efficiency.pdf")

In [None]:
small_datasets = ["CiteSeer"]#, "Amazon_Photos"]
methods = ["tps", "tps_classwise", "aps_randomized", "raps", "naps"]
small_df = full_df[full_df["dataset"].isin(small_datasets)]
small_df = small_df[small_df["method"].isin(methods)]

#small_df["split"] = small_df["train_frac"].astype(str) + "_" + small_df["valid_frac"].astype(str)

#sns.kdeplot(small_df[(small_df.split == "0.2_0.1") & (small_df.alpha==0.1)], x="coverage", y="label_stratified_coverage", hue="method", kind="kde")

#sns.kdeplot(small_df[(small_df.split == "0.2_0.1") & (small_df.alpha==0.1)], x="coverage", y="size_stratified_coverage", hue="method", kind="kde")
# plt.figure()
# sns.kdeplot(small_df[(small_df.split == "0.2_0.1") & (small_df.alpha==0.1)], x="coverage", hue="method")
sns.set(rc={"figure.figsize": (14, 12)}, font_scale=1.3, style="white")
ax = sns.catplot(small_df[(small_df.alpha==0.1)], x="label_stratified_coverage", y="method", kind="box")
ax.savefig("figures/split/citeseer_label_stratified_coverage.pdf")


ax = sns.catplot(small_df[(small_df.alpha==0.1)], x="size_stratified_coverage", y="method", kind="box")
ax.savefig("figures/split/citeseer_size_stratified_coverage.pdf")

In [None]:
small_datasets = ["Cora"]#, "Amazon_Photos"]
methods = ["tps", "tps_classwise", "aps_randomized", "raps", "naps"]
small_df = full_df[full_df["dataset"].isin(small_datasets)]
small_df = small_df[small_df["method"].isin(methods)]

small_df["split"] = small_df["train_frac"].astype(str) + "_" + small_df["valid_frac"].astype(str)

#sns.kdeplot(small_df[(small_df.split == "0.2_0.1") & (small_df.alpha==0.1)], x="coverage", y="label_stratified_coverage", hue="method", kind="kde")

#sns.kdeplot(small_df[(small_df.split == "0.2_0.1") & (small_df.alpha==0.1)], x="coverage", y="size_stratified_coverage", hue="method", kind="kde")
# plt.figure()
# sns.kdeplot(small_df[(small_df.split == "0.2_0.1") & (small_df.alpha==0.1)], x="coverage", hue="method")
sns.set(rc={"figure.figsize": (14, 12)}, font_scale=1.3, style="white")
ax = sns.catplot(small_df[(small_df.alpha==0.1)], x="label_stratified_coverage", y="method", kind="box")
ax.savefig("figures/split/cora_label_stratified_coverage.pdf")

#plt.figure()
ax = sns.catplot(small_df[(small_df.alpha==0.1)], x="size_stratified_coverage", y="method", kind="box")
ax.savefig("figures/split/cora_size_stratified_coverage.pdf")

## n samples per class

In [None]:
results_dir = "../graphconformal_sweep/"
datasets = os.listdir(results_dir)

In [None]:
all_nspc_results = []
for dataset in datasets:
    for nspc in [10, 20, 40, 80]:
        for alpha in [0.1, 0.2, 0.3, 0.4]:
            split_dir = os.path.join(results_dir, dataset, "nspc", f"{nspc}", f"alpha_{alpha}")
            if not os.path.exists(split_dir):
                continue
            methods = os.listdir(split_dir)
            for method in methods:
                if "_params" not in method:
                    method_name = method.split(".")[0]
                    result_file = os.path.join(split_dir, method)
                    results = pd.read_csv(result_file)
                    results["dataset"] = dataset
                    results["nspc"] = nspc
                    results["alpha"] = alpha
                    results["method"] = method_name
                    all_nspc_results.append(results)

In [None]:
ns_df = pd.concat(all_nspc_results)

In [None]:
small_datasets = ["Amazon_Photos"]#, "Amazon_Photos"]
# methods = ["tps", "tps_classwise", "aps_randomized", "raps", "naps_uniform"]
small_df = ns_df[ns_df["dataset"].isin(small_datasets)]
small_df = small_df[small_df["method"].isin(basic_methods)]

#fig, ax = plt.subplots((2, 2))
for nspc in [10, 20, 40]:
    tdf = small_df[(small_df.nspc == nspc)]
    plt.figure()
    sns.catplot(tdf[(tdf.alpha==0.1)], x="label_stratified_coverage", y="method", kind="box")
    plt.savefig(f"figures/nspc/amazon_photos_{nspc}_label_stratified_coverage.pdf")
    plt.figure()
    sns.catplot(tdf[(tdf.alpha==0.1)], x="size_stratified_coverage", y="method", kind="box")
    plt.savefig(f"figures/nspc/amazon_photos_{nspc}_size_stratified_coverage.pdf")

In [None]:
large_datasets = ["Flickr"]
# methods = ["tps", "tps_classwise", "aps_randomized", "raps", "naps_uniform"]
large_df = ns_df[ns_df["dataset"].isin(large_datasets)]
large_df = large_df[large_df["method"].isin(basic_methods)]

#fig, ax = plt.subplots((2, 2))
for nspc in [10, 20, 40, 80]:
    tdf = large_df[(large_df.nspc == nspc)]
    plt.figure()
    sns.catplot(tdf[(tdf.alpha==0.1)], x="label_stratified_coverage", y="method", kind="box")
    plt.savefig(f"figures/nspc/amazon_photos_{nspc}_label_stratified_coverage.pdf")
    plt.figure()
    sns.catplot(tdf[(tdf.alpha==0.1)], x="size_stratified_coverage", y="method", kind="box")
    plt.savefig(f"figures/nspc/amazon_photos_{nspc}_size_stratified_coverage.pdf")

## APS randomized vs non randomized 

In [None]:
sns.set(rc={'figure.figsize': (15,10)}, font_scale=1, style="white")
aps_df = full_df[full_df.method.isin(["aps", "aps_randomized"])]

plt.figure()
sns.catplot(aps_df[aps_df.dataset != "Cora"], x="efficiency", y="dataset", hue="method", split=True, kind="violin", gap=.1, inner="quart")
plt.savefig("figures/split/aps_randomized_efficiency.pdf")
plt.figure()
sns.catplot(aps_df[aps_df.dataset == "Cora"], x="efficiency", y="dataset", hue="method", split=True, kind="violin", gap=.1, inner="quart")
plt.savefig("figures/split/aps_randomized_efficiency_cora.pdf")

In [None]:
aps_df = ns_df[ns_df.method.isin(["aps", "aps_randomized"])]

plt.figure()
sns.catplot(aps_df[aps_df.dataset != "Cora"], x="efficiency", y="dataset", hue="method", split=True, kind="violin", gap=.1, inner="quart")
plt.savefig("figures/nspc/aps_randomized_efficiency.pdf")

plt.figure()
sns.catplot(aps_df[aps_df.dataset == "Cora"], x="efficiency", y="dataset", hue="method", split=True, kind="violin", gap=.1, inner="quart")
plt.savefig("figures/nspc/aps_randomized_efficiency_cora.pdf")

## DAPS vs DTPS

In [None]:
sns.set(rc={'figure.figsize': (15,10)}, font_scale=1.5, style="white")
methods = ["daps", "dtps"]
dps_df = full_df[(full_df.method.isin(methods)) & (full_df.alpha == 0.1)]
#dps_df.groupby(["dataset", "method"])[["efficiency", "coverage", "label_stratified_coverage", "size_stratified_coverage"]].mean().reset_index()
melted = dps_df.melt(id_vars=["dataset", "method"], var_name="metric", value_name="value", value_vars=["efficiency", "coverage", "label_stratified_coverage", "size_stratified_coverage"])
plt.tight_layout()

plt.figure()
sns.barplot(data=melted[melted.dataset == "PubMed"], x="value", y="metric", hue="method")
plt.savefig("figures/split/daps_dtps_pubmed.pdf", bbox_inches = "tight")

plt.figure()
sns.barplot(data=melted[melted.dataset == "Cora"], x="value", y="metric", hue="method")
plt.savefig("figures/split/daps_dtps_cora.pdf", bbox_inches = "tight")

In [None]:
sns.set(rc={'figure.figsize': (15,10)}, font_scale=1, style="white")

methods = ["daps", "dtps"]
dps_df = ns_df[(ns_df.method.isin(methods)) & (ns_df.alpha == 0.1)]
#dps_df.groupby(["dataset", "method"])[["efficiency", "coverage", "label_stratified_coverage", "size_stratified_coverage"]].mean().reset_index()
melted = dps_df.melt(id_vars=["dataset", "method"], var_name="metric", value_name="value", value_vars=["efficiency", "coverage", "label_stratified_coverage", "size_stratified_coverage"])

plt.figure()
sns.catplot(data=melted, x="value", y="metric", hue="method", kind="bar")
plt.savefig("figures/nspc/daps_dtps_0.1.pdf")



dps_df = ns_df[(ns_df.method.isin(methods)) & (ns_df.alpha == 0.2)]
#dps_df.groupby(["dataset", "method"])[["efficiency", "coverage", "label_stratified_coverage", "size_stratified_coverage"]].mean().reset_index()
melted = dps_df.melt(id_vars=["dataset", "method"], var_name="metric", value_name="value", value_vars=["efficiency", "coverage", "label_stratified_coverage", "size_stratified_coverage"])

plt.figure()
sns.catplot(data=melted, x="value", y="metric", hue="method", kind="bar")
plt.savefig("figures/nspc/daps_dtps_0.2.pdf")

# CFGNN

In [None]:
full_df.method.unique()

In [None]:
sns.set(rc={'figure.figsize': (18,10)}, font_scale=1, style="white")

full_cf = full_df[full_df.method.isin(["cfgnn_aps", "cfgnn_orig"])]#, "aps"])]

plt.figure()
sns.catplot(data=full_cf, x="efficiency", y="dataset", hue="method", kind="bar")
plt.savefig("figures/split/cfgnn_aps_vs_orig_efficiency.pdf")

In [None]:
n_cf = ns_df[ns_df.method.isin(["cfgnn_aps", "cfgnn_orig"])]#, "aps"])]

plt.figure()
sns.catplot(data=n_cf[n_cf.nspc == 10], x="efficiency", y="dataset", hue="method", kind="bar")
plt.savefig("figures/nspc/cfgnn_aps_vs_orig_efficiency_10.pdf")

plt.figure()
sns.catplot(data=n_cf[n_cf.nspc == 20], x="efficiency", y="dataset", hue="method", kind="bar")
plt.savefig("figures/nspc/cfgnn_aps_vs_orig_efficiency_20.pdf")
