# Results part 1

This will constitute the first part of the results section. Basically the point here is to say "on highly structured graphs such as the ones used| here, mmd behaves pretty well."

In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
from pyprojroot import here
from pathlib import Path
import matplotlib.pyplot as plt

In [None]:
clustering_eps_8 = (
    here()
    / "data/systematic/human/fixed_length_kernels/eps_graph/8/gaussian_noise/clustering_histogram/gaussian_noise_mmds.csv"
)
clustering_eps_16 = (
    here()
    / "data/systematic/human/fixed_length_kernels/eps_graph/16/gaussian_noise/clustering_histogram/gaussian_noise_mmds.csv"
)
clustering_eps_32 = (
    here()
    / "data/systematic/human/fixed_length_kernels/eps_graph/32/gaussian_noise/clustering_histogram/gaussian_noise_mmds.csv"
)


In [None]:
def build_plot(path: Path) -> None:
    df = pd.read_csv(path)
    gauss_kernel_cols = [
        col for col in df.columns if "sigma" in col or "linear" in col
    ]
    for col in gauss_kernel_cols:
        # Normalize
        df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())

    df = df.melt(
        id_vars=["run", "perturb"], var_name="kernel", value_name="mmd"
    )
    df.rename(columns={"perturb": r"Std. ($\AA$)"}, inplace=True)

    # parsed_path = str(path).split("/")
    # descriptor = handle_descriptor(parsed_path[-2])
    # perturbation = handle_perturbation(parsed_path[-3])
    # extraction_param = parsed_path[-4]
    # representation = handle_representation(parsed_path[-5])

    # Clean up kernel names
    # df.kernel = df.kernel.str.replace("linear_kernel", "Linear Kernel")
    # df.kernel = df.kernel.str.replace("sigma=", "Gaussian kernel sigma =")
    df = df.rename(columns={"mmd": "MMD",},)
    # Initialize a grid of plots with an Axes for each kernel config
    palette = sns.color_palette("mako_r", df.kernel.nunique())
    g = sns.relplot(
        data=df,
        x=r"Std. ($\AA$)",
        y="MMD",
        col="kernel",
        hue="kernel",
        kind="line",
        col_wrap=4,
        height=2.7,
        aspect=0.8,
        palette=palette,
        ci=100,
    )
    # g.fig.suptitle(
    #     build_title(
    #         descriptor, perturbation, representation, extraction_param
    #     ),
    #     fontsize=16,
    # )

    plt.legend([], [], frameon=False)
    g.legend.remove()
    titles = [
        1.0e-05,
        1.0e-04,
        1.0e-03,
        1.0e-02,
        1.0e-01,
        1,
        1.0e02,
        1.0e03,
        1.0e04,
        1.0e05,
        0,
    ]
    for i, ax in enumerate(g.axes.flatten()):
        if titles[i] != 0:
            ax.set_title(r"RBF Kernel $\sigma$ " + f" = {titles[i]}")
        else:
            ax.set_title(f"Linear Kernel")
    # plt.title("Test")
    plt.tight_layout()

In [None]:
build_plot(clustering_eps_8)
build_plot(clustering_eps_16)
build_plot(clustering_eps_32)

In [None]:
degree_eps_8 = (
    here()
    / "data/systematic/human/fixed_length_kernels/eps_graph/8/gaussian_noise/degree_histogram/gaussian_noise_mmds.csv"
)
degree_eps_16 = (
    here()
    / "data/systematic/human/fixed_length_kernels/eps_graph/16/gaussian_noise/degree_histogram/gaussian_noise_mmds.csv"
)
degree_eps_32 = (
    here()
    / "data/systematic/human/fixed_length_kernels/eps_graph/32/gaussian_noise/degree_histogram/gaussian_noise_mmds.csv"
)
build_plot(degree_eps_8)
build_plot(degree_eps_16)
build_plot(degree_eps_32)


In [None]:
def normalize(df):
    cols = [col for col in df.columns if "run" not in col]
    for col in df.columns:
        df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
    return df

In [None]:
add_edges = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/add_edges/clustering_histogram/add_edges_mmds.csv"))
gaussian_noise = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/gaussian_noise/clustering_histogram/gaussian_noise_mmds.csv"))
remove_edges = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/remove_edges/clustering_histogram/removedge_mmds.csv"))
rewire_edges = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/rewire_edges/clustering_histogram/rewireedge_mmds.csv"))
shear = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/shear/clustering_histogram/shear_mmds.csv"))
taper = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/taper/clustering_histogram/taper_mmds.csv"))
twist = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/twist/clustering_histogram/twist_mmds.csv"))
sns.lineplot(data=add_edges, x="perturb", y="sigma=0.01")
sns.lineplot(data=remove_edges, x="perturb", y="sigma=0.01")
sns.lineplot(data=rewire_edges, x="perturb", y="sigma=0.01")
sns.lineplot(data=shear, x="perturb", y="sigma=0.01")
sns.lineplot(data=taper, x="perturb", y="sigma=0.01")
sns.lineplot(data=twist, x="perturb", y="sigma=0.01")
sns.lineplot(data=gaussian_noise, x="perturb", y="sigma=0.01")
plt.title("Clustering Histogram")


In [None]:
add_edges = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/add_edges/degree_histogram/add_edges_mmds.csv"))
add_edges.sort_values(by=["perturb", "run"], inplace=True)
gaussian_noise = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/gaussian_noise/degree_histogram/gaussian_noise_mmds.csv"))
remove_edges = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/remove_edges/degree_histogram/removedge_mmds.csv"))
rewire_edges = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/rewire_edges/degree_histogram/rewireedge_mmds.csv"))
shear = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/shear/degree_histogram/shear_mmds.csv"))
taper = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/taper/degree_histogram/taper_mmds.csv"))
twist = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/twist/degree_histogram/twist_mmds.csv"))
sns.lineplot(data=add_edges, x="perturb", y="sigma=0.01")
sns.lineplot(data=remove_edges, x="perturb", y="sigma=0.01")
sns.lineplot(data=rewire_edges, x="perturb", y="sigma=0.01")
sns.lineplot(data=shear, x="perturb", y="sigma=0.01")
sns.lineplot(data=taper, x="perturb", y="sigma=0.01")
sns.lineplot(data=twist, x="perturb", y="sigma=0.01")
sns.lineplot(data=gaussian_noise, x="perturb", y="sigma=0.01")
plt.title("Degree Histogram")

In [None]:
add_edges = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/add_edges/laplacian_spectrum_histogram/add_edges_mmds.csv"))
add_edges.sort_values(by=["perturb", "run"], inplace=True)
gaussian_noise = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/gaussian_noise/laplacian_spectrum_histogram/gaussian_noise_mmds.csv"))
remove_edges = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/remove_edges/laplacian_spectrum_histogram/remove_edges_mmds.csv"))
rewire_edges = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/rewire_edges/laplacian_spectrum_histogram/rewire_edges_mmds.csv"))
shear = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/shear/laplacian_spectrum_histogram/shear_mmds.csv"))
taper = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/taper/laplacian_spectrum_histogram/taper_mmds.csv"))
twist = normalize(pd.read_csv(here() / "data/systematic/human/fixed_length_kernels/eps_graph/8/twist/laplacian_spectrum_histogram/twist_mmds.csv"))
sns.lineplot(data=add_edges, x="perturb", y="sigma=0.01")
sns.lineplot(data=remove_edges, x="perturb", y="sigma=0.01")
sns.lineplot(data=rewire_edges, x="perturb", y="sigma=0.01")
sns.lineplot(data=shear, x="perturb", y="sigma=0.01")
sns.lineplot(data=taper, x="perturb", y="sigma=0.01")
sns.lineplot(data=twist, x="perturb", y="sigma=0.01")
sns.lineplot(data=gaussian_noise, x="perturb", y="sigma=0.01")
plt.title("Laplacian Histogram")