In [None]:
import pandas as pd
from process_bulk import ProcessBulk
import matplotlib.pyplot as plt
import seaborn as sns
import evaluation_helpers
from process_geography import Ward, LocalAuthority
import os
from copy import deepcopy
import numpy as np
from scipy import stats
import create_plots

In [None]:
pd.options.mode.copy_on_write = True
pd.set_option("future.no_silent_downcasting", True)

In [None]:
bulk_folder_name = "bulk_data"
ward_folder_name_tenure = "dc4201ew_htward"
index_sheet_name = "Cell Numbered DC Tables 3.2.xlsx"

cl_folder_name = "confidence_intervals"
cl_sheet_name = "2011censusconfidenceintervalsdec2013_tcm77-271638.xlsx"
cl_table_name = "95% CI widths (LA level)"

fig_folder_name = "figs/population_analysis/comparison"

p_cl = ProcessBulk(bulk_folder=cl_folder_name, index_sheet=cl_sheet_name)

p = ProcessBulk(bulk_folder=bulk_folder_name, index_sheet=index_sheet_name)

# Tenure by ethnic group by age - Household Reference Persons
table_name_tenure = "DC4201EW"

In [None]:
# list of ethnicities for filtering
general_ethnicities = [
    "White: Total",
    "Mixed/multiple ethnic group: Total",
    "Asian/Asian British: Total",
    "Black/African/Caribbean/Black British: Total",
    "Other ethnic group: Total",
]
detailed_ethnicities = [
    "White: English/Welsh/Scottish/Northern Irish/British",
    "White: Irish",
    "White: Gypsy or Irish Traveller",
    "White: Other White",
    "Mixed/multiple ethnic group: White and Black Caribbean",
    "Mixed/multiple ethnic group: White and Black African",
    "Mixed/multiple ethnic group: White and Asian",
    "Mixed/multiple ethnic group: Other Mixed",
    "Asian/Asian British: Indian",
    "Asian/Asian British: Pakistani",
    "Asian/Asian British: Bangladeshi",
    "Asian/Asian British: Chinese",
    "Asian/Asian British: Other Asian",
    "Black/African/Caribbean/Black British: African",
    "Black/African/Caribbean/Black British: Caribbean",
    "Black/African/Caribbean/Black British: Other Black",
    "Other ethnic group: Arab",
    "Other ethnic group: Any other ethnic group",
]


detailed_ethnicities_simplified = [
    "White: English/Welsh/Scottish/Northern Irish/British",
    "White: Irish",
    "White: Gypsy or Irish Traveller",
    "Other White",
    "White and Black Caribbean",
    "White and Black African",
    "White and Asian",
    "Mixed/multiple ethnic group: Other Mixed",
    "Indian",
    "Pakistani",
    "Bangladeshi",
    "Chinese",
    "Other Asian",
    "African",
    "Caribbean",
    "Other Black",
    "Arab",
    "Any other ethnic group",
]

# list of tenure categories for filtering
all_tenure = [
    "All categories: Tenure",
    "Owned or shared ownership: Total",
    "Owned: Owned outright",
    "Owned: Owned with a mortgage or loan or shared ownership",
    "Social rented: Total",
    "Social rented: Rented from council (Local Authority)",
    "Social rented: Other social rented",
    "Private rented or living rent free: Total",
    "Private rented: Private landlord or letting agency",
    "Private rented: Other private rented or living rent free",
]
general_tenure = [
    "Owned or shared ownership: Total",
    "Social rented: Total",
    "Private rented or living rent free: Total",
]
detailed_tenure = [
    "Owned: Owned outright",
    "Owned: Owned with a mortgage or loan or shared ownership",
    "Social rented: Rented from council (Local Authority)",
    "Social rented: Other social rented",
    "Private rented: Private landlord or letting agency",
    "Private rented: Other private rented or living rent free",
]

In [None]:
mus = [1.0, 0.5, 0.1, 0.01, 0.001, 0.0001]
epsilons = [0.001, 0.01, 0.1, 1, 3, 5, 10]
epsilons_short = [0.001, 0.01, 0.1, 1]
epsilons_shorter = [0.01, 0.1, 1]

In [None]:
column_names = ["Age", "Tenure", "EthnicGroup", "Dataset"]

dfs_ward = p.set_up(
    table_name=table_name_tenure,
    df_type="nested",
    column_names=column_names,
    num_nested_category=10,
    subfolder=ward_folder_name_tenure,
    level=6,
)
dfs_la = p.set_up(
    table_name=table_name_tenure,
    df_type="nested",
    column_names=column_names,
    num_nested_category=10,
    subfolder=ward_folder_name_tenure,
    level=5,
)

In [None]:
ward = Ward()
local_authority = LocalAuthority()
# geo lookup file is the same for region and ward
geo_lookup = ward.get_geo_lookup_ward()

In [None]:
# colorschemes from https://personal.sron.nl/~pault/#fig:scheme_bright

colors_2 = ["#DDAA33", "#BB5566"]
colors_3 = ["#004488", "#DDAA33", "#BB5566"]
colors_4 = ["#004488", "#DDAA33", "#BB5566", "#1B7837"]
colors_5 = ["#004488", "#DDAA33", "#BB5566", "#1B7837", "#EE7733"]  # #CC3311
colors_bright = [
    "#4477AA",
    "#EE6677",
    "#228833",
    "#CCBB44",
    "#66CCEE",
    "#AA3377",
    "#BBBBBB",
]
colors_vibrant = [
    "#EE7733",
    "#0077BB",
    "#33BBEE",
    "#EE3377",
    "#CC3311",
    "#009988",
    "#BBBBBB",
]
colors_muted = [
    "#CC6677",
    "#332288",
    "#DDCC77",
    "#117733",
    "#88CCEE",
    "#882255",
    "#44AA99",
    "#999933",
    "#AA4499",
]
colors_grey = ["#EF233C", "#B7BFCC", "#8894AA", "#2B2F42"]

In [None]:
# ward codes and names of the 3 wards I have chosen for analysis
ward_codes = ["E36002358", "E36000439", "E36003322"]
ward_codes_extended = ["E36002358", "E36000439", "E36003322", "E36001940", "E36006779"]
# ward with the largest population
largest_ward = ["E36006779"]
# ward with the smallest population in England (excluding Wales)
smallest_ward = ["E36001940"]
ward_labels_large_small = ["Ennerdale (Small Ward)", "Ladywood (Large Ward)"]
ward_names_large_small = ["Ennerdale", "Ladywood"]
ward_names_diversity = [
    "Teign Valley (Low Diversity)",
    "Newport East (Medium Diversity)",
    "Nash Mills (High Diversity)",
]
ward_names = ["Teign Valley", "Newport East", "Nash Mills"]
ward_labels_extended = [
    "Teign Valley (Low Diversity)",
    "Newport East (Medium Diversity)",
    "Nash Mills (High Diversity)",
    "Ennerdale (Small Ward)",
    "Ladywood (Large Ward)",
]
ward_names_extended = [
    "Teign Valley",
    "Newport East",
    "Nash Mills",
    "Ennerdale",
    "Ladywood",
]

In [None]:
rmse_list = ["rmse_data_error", "rmse_dp", "rmse_data_error_dp"]
rmse_dp_list = ["rmse_dp", "rmse_data_error_dp"]
inc_dec_total_list = [
    "significantly_decreased_data_error",
    "significantly_increased_data_error",
    "significantly_decreased",
    "significantly_increased",
    "significantly_decreased_data_error_dp",
    "significantly_increased_data_error_dp",
    "significantly_decreased_minority",
    "significantly_increased_minority",
    "significantly_decreased_data_error_dp_minority",
    "significantly_increased_data_error_dp_minority",
    "significantly_increased_zero",
    "significantly_increased_data_error_dp_zero",
    "significantly_decreased_zero",
    "significantly_decreased_data_error_dp_zero",
    "number_ethnicities",
    "number_minorities",
    "significantly_increased_data_error_zero",
    "significantly_increased_data_error_minority",
    "significantly_decreased_data_error_zero",
    "significantly_decreased_data_error_minority",
]
population_list = ["total %", "data error %"]
population_list_dp = ["dp %", "dp data error %"]
population_numbers_list = ["PopulationNumbers", "PopulationNumbersDataError"]
population_numbers_list_dp = ["PopulationNumbersDP", "PopulationNumbersDataErrorDP"]
inc_dec_list = [
    "significantly_decreased_data_error",
    "significantly_increased_data_error",
    "significantly_increased_data_error_zero",
    "significantly_decreased_data_error_zero",
]
inc_dec_list_dp = [
    "significantly_decreased",
    "significantly_decreased_zero",
    "significantly_increased",
    "significantly_increased_zero",
    "significantly_decreased_data_error_DP",
    "significantly_increased_data_error_DP",
    "significantly_increased_data_error_DP_zero",
    "significantly_decreased_data_error_DP_zero",
]
kl_list = ["kl_divergence", "kl_divergence_data_error", "kl_divergence_data_error_dp"]
labels_wards = [
    "Teign Valley (low diversity)",
    "Newport East (medium diversity)",
    "Nash Mills (high diversity)",
]


table_features_pop = ["total", "total_dp"]
table_features_worst_case = ["largest_observed", "lowest_observed"]
table_features_pop_data_error = [
    "total",
    "total_data_error",
    "total_dp",
    "total_data_error_dp",
]

In [None]:
# reading in the confidence intervals for all local authorities
sheet_cl = p_cl.read_cl(cl_table_name)

In [None]:
df_la = local_authority.get_local_authority(dfs_la["csv_df"], geo_lookup)

In [None]:
df_ward = ward.get_ward(dfs_ward["csv_df"], geo_lookup)

Now I load the 3 wards I have chosen. 

In [None]:
filter_dict_ward = {
    "Age": ["All categories: Age"],
    "Tenure": ["All categories: Tenure"],
    "EthnicGroup": general_ethnicities,
}

wards = evaluation_helpers.get_filtered_df_ward_dict(
    p, ward, df_ward, ward_codes_extended, dfs_ward["lookup_df"], filter_dict_ward
)

In [None]:
eps = pd.read_csv("epsilons_comparison.csv")
epsilons_comparison = eps["epsilon"].to_list()

In [None]:
wards_dp_geo_clip, metrics_df_geo_clip = evaluation_helpers.set_up_measurements_wards(
    wards,
    df_ward,
    sheet_cl,
    "geometric",
    epsilons_comparison,
    delta=0,
    sensitivity=2,
    clipping=True,
    rounding=False,
    random_state=1,
)
measurement_kl_divergence_df_geo_clip = evaluation_helpers.measure_kl_divergence(
    wards_dp_geo_clip, ward_codes_extended, epsilons_comparison, mus
)

In [None]:
experiments_geo_clip = evaluation_helpers.set_up_measurements_wards_repeat(
    25,
    wards,
    df_ward,
    sheet_cl,
    "geometric",
    epsilons_comparison,
    delta=0,
    sensitivity=2,
    clipping=True,
    rounding=False,
)
experiments_kl_divergence_df_geo_clip = (
    evaluation_helpers.measure_kl_divergence_experiments(
        experiments_geo_clip, ward_codes_extended, epsilons_comparison, mus
    )
)
df_clip_kl = evaluation_helpers.make_experiment_df_kl(
    experiments_kl_divergence_df_geo_clip,
    ward_names_extended,
    kl_list,
    epsilons_comparison,
    mus,
)

In [None]:
""" for i in range(25):
    df_metrics = experiments_geo_clip[i]["metrics_df"]
    df_metrics.to_csv(
        os.path.join(
            "trial_csvs",
            f"trial_metrics_general_{i}.csv",
        ),
        index=True,
    )
    for code in ward_codes_extended:
        df_wards = experiments_geo_clip[i]["wards_dp"][code][0]
        df_wards.to_csv(
            os.path.join(
                "trial_csvs",
                f"trial_wards_general{code}_{i}.csv",
            ),
            index=True,
        )

df_clip_kl.to_csv(
    os.path.join(
        "trial_csvs", f"df_kl.csv"
    ),
    index=True,
) """

In [None]:
""" df_clip_kl = pd.read_csv(os.path.join(
        "trial_csvs", f"df_kl.csv")) """

In [None]:
""" experiments_geo_clip = np.empty(25, dtype=object)
for i in range(25):
    metrics_df = pd.read_csv(
        os.path.join("trial_csvs", f"trial_metrics_general_{i}.csv")
    )
    wards_dp = {}
    for code in ward_codes_extended:
        wards_dp[code]= pd.read_csv(
            os.path.join("trial_csvs", f"trial_wards_general{code}_{i}.csv")
        )
    print(wards_dp)

    experiments_geo_clip[i] = {"wards_dp": wards_dp, "metrics_df": metrics_df} """

In [None]:
""" df_clip_kl.to_csv(
    os.path.join("trial_csvs", f"df_kl.csv"),
    index=True,
) """

In [None]:
df_clip = evaluation_helpers.make_experiment_df(
    experiments_geo_clip, ward_names_extended, rmse_list, epsilons_comparison
)

In [None]:
""" kl_divergence_diversity = measurement_kl_divergence_df_geo_clip[
    measurement_kl_divergence_df_geo_clip["area_name"].isin(ward_names)
]
kl_divergence_diversity """

In [None]:
def kl_divergence_areas(df, name=""):
    plt.style.use("seaborn-v0_8-dark")
    plt.figure(figsize=(12, 8))
    sns.lineplot(
        x="epsilon",
        y="kl_divergence",
        hue="area_name",
        palette=colors_5,
        data=df.query("mu == 0.0001"),
    )
    plt.ylabel("KL Divergence", fontsize=10)
    plt.xlabel("Epsilon", fontsize=10)
    plt.title(f"KL Divergence")
    plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0, fontsize=10)
    plt.savefig(
        (os.path.join(fig_folder_name, f"kl_areas_general_{name}.png")),
        bbox_inches="tight",
    )
    plt.savefig(
        (os.path.join(fig_folder_name, f"kl_areas_general_{name}.svg")),
        bbox_inches="tight",
    )

In [None]:
def rmse_experiments(df, ward_labels, color, fig_folder_name, title="", filename=""):

    plt.style.use("seaborn-v0_8-dark")
    plt.figure(figsize=(14, 8))
    p = sns.relplot(
        x="epsilon",
        y="value",
        col="area_name",
        hue="area_name",
        palette=color,
        data=df.query('rmse == "rmse_dp"'),
        col_wrap=3,
        height=3,
        kind="line",
        marker="o",
        markersize=4,
        markeredgewidth=0.4,
        estimator=np.mean,
    )

    plt.suptitle(f"RMSE {title}", fontsize=14)
    sns.move_legend(
        p, "upper right", bbox_to_anchor=(1.0, 0.9), ncol=1, title="", frameon=False
    )

    for i, ax in enumerate(p.axes.flatten()):
        ax.set_title(ward_labels[i])
        ax.tick_params(labelbottom=True)
        ax.set_ylabel("RMSE", fontsize=10)
        ax.set_xlabel("Epsilon", visible=True, fontsize=10)

    plt.subplots_adjust(top=0.9, wspace=None, hspace=0.35)

    plt.savefig(
        (os.path.join(fig_folder_name, f"rmse_experiments_line_{filename}.png")),
        bbox_inches="tight",
    )
    plt.savefig(
        (os.path.join(fig_folder_name, f"rmse_experiments_line_{filename}.svg")),
        bbox_inches="tight",
    )

In [None]:
def rmse_experiments_no_markers(
    df, ward_labels, color, fig_folder_name, title="", filename=""
):

    plt.style.use("seaborn-v0_8-dark")
    plt.figure(figsize=(14, 8))
    p = sns.relplot(
        x="epsilon",
        y="value",
        col="area_name",
        hue="area_name",
        palette=color,
        data=df.query('rmse == "rmse_dp"'),
        col_wrap=3,
        height=3,
        kind="line",
        estimator=np.mean,
    )

    plt.suptitle(f"RMSE {title}", fontsize=14)
    sns.move_legend(
        p, "upper right", bbox_to_anchor=(1.0, 0.9), ncol=1, title="", frameon=False
    )

    for i, ax in enumerate(p.axes.flatten()):
        ax.set_title(ward_labels[i])
        ax.tick_params(labelbottom=True)
        ax.set_ylabel("RMSE", fontsize=10)
        ax.set_xlabel("Epsilon", visible=True, fontsize=10)

    plt.subplots_adjust(top=0.9, wspace=None, hspace=0.35)

    plt.savefig(
        (
            os.path.join(
                fig_folder_name, f"rmse_experiments_line_no_markers_{filename}.png"
            )
        ),
        bbox_inches="tight",
    )
    plt.savefig(
        (
            os.path.join(
                fig_folder_name, f"rmse_experiments_line_no_markers_{filename}.svg"
            )
        ),
        bbox_inches="tight",
    )

In [None]:
rmse_experiments(
    df_clip,
    ward_labels_extended,
    colors_5,
    fig_folder_name,
    "- Geometric Mechanism",
    filename="_geo_25_trials_general",
)

In [None]:
rmse_experiments_no_markers(
    df_clip,
    ward_labels_extended,
    colors_5,
    fig_folder_name,
    "- Geometric Mechanism",
    filename="_geo_25_trials_general",
)

In [None]:
create_plots.rmse_experiments_scatter(
    df_clip,
    ward_labels_extended,
    colors_5,
    fig_folder_name,
    "- Geometric Mechanism",
    filename="_geo_25_trials_general",
)

In [None]:
def rmse_line_experiments(df):
    plt.style.use("seaborn-v0_8-dark")
    p = sns.relplot(
        x="epsilon",
        y="rmse_dp",
        col="area_name",
        hue="area_name",
        palette=colors_5,
        data=df,
        col_wrap=3,
        height=3,
        kind="line",
        marker="o",
        markersize=4,
        markeredgewidth=0.4,
        estimator=np.mean,
    )
    plt.suptitle(f"RMSE", fontsize=14)
    legend = p._legend
    legend.set_bbox_to_anchor([1.0, 0.8])
    legend.set_title("")
    p.set_ylabels("RMSE")
    p.set_xlabels("Epsilon")

    for i, ax in enumerate(p.axes.flatten()):
        ax.set_title(ward_labels_extended[i])
        ax.tick_params(labelbottom=True)
        ax.set_ylabel("RMSE", fontsize=10)
        ax.set_xlabel("Epsilon", visible=True, fontsize=10)

    plt.subplots_adjust(top=0.9, wspace=None, hspace=0.35)
    plt.savefig(
        (os.path.join(fig_folder_name, f"rmse_areas_line_25_trials.png")),
        bbox_inches="tight",
    )
    plt.savefig(
        (os.path.join(fig_folder_name, f"rmse_areas_line_25_trials.svg")),
        bbox_inches="tight",
    )

In [None]:
plt.style.use("seaborn-v0_8-dark")
p = sns.relplot(
    x="epsilon",
    y="rmse_dp",
    col="area_name",
    hue="area_name",
    palette=colors_5,
    data=metrics_df_geo_clip,
    col_wrap=3,
    height=3,
    kind="line",
    marker="o",
    markersize=4,
    markeredgewidth=0.4,
)
plt.suptitle(f"RMSE", fontsize=14)
legend = p._legend
legend.set_bbox_to_anchor([1.0, 0.8])
legend.set_title("")
p.set_ylabels("RMSE")
p.set_xlabels("Epsilon")

for i, ax in enumerate(p.axes.flatten()):
    ax.set_title(ward_labels_extended[i])
    ax.tick_params(labelbottom=True)
    ax.set_ylabel("RMSE", fontsize=10)
    ax.set_xlabel("Epsilon", visible=True, fontsize=10)

plt.subplots_adjust(top=0.9, wspace=None, hspace=0.35)
plt.savefig(
    (os.path.join(fig_folder_name, f"rmse_areas_line.png")), bbox_inches="tight"
)
plt.savefig(
    (os.path.join(fig_folder_name, f"rmse_areas_line.svg")), bbox_inches="tight"
)

In [None]:
def kl_experiments_line(
    df, mu, ward_labels, color, fig_folder_name, title="", filename=""
):

    plt.style.use("seaborn-v0_8-dark")
    plt.figure(figsize=(14, 8))
    p = sns.relplot(
        x="epsilon",
        y="value",
        col="area_name",
        hue="area_name",
        palette=color,
        data=df.query('kl_divergence == "kl_divergence" & mu == @mu'),
        col_wrap=3,
        height=3,
        kind="line",
        marker="o",
        markersize=4,
        markeredgewidth=0.4,
        estimator=np.mean,
    )

    plt.suptitle(f"KL Divergence for mu = {mu} {title}", fontsize=14)
    sns.move_legend(
        p, "upper right", bbox_to_anchor=(1.0, 0.9), ncol=1, title="", frameon=False
    )

    for i, ax in enumerate(p.axes.flatten()):
        ax.set_title(ward_labels[i])
        ax.tick_params(labelbottom=True)
        ax.set_ylabel("KL Divergence", fontsize=10)
        ax.set_xlabel("Epsilon", visible=True, fontsize=10)

    plt.subplots_adjust(top=0.9, wspace=None, hspace=0.35)

    plt.savefig(
        (
            os.path.join(
                fig_folder_name, f"kl_areas_mu_0.0001_experiments_line_{filename}.png"
            )
        ),
        bbox_inches="tight",
    )
    plt.savefig(
        (
            os.path.join(
                fig_folder_name, f"kl_areas_mu_0.0001_experiments_line_{filename}.svg"
            )
        ),
        bbox_inches="tight",
    )

In [None]:
def kl_experiments_line_no_markers(
    df, mu, ward_labels, color, fig_folder_name, title="", filename=""
):

    plt.style.use("seaborn-v0_8-dark")
    plt.figure(figsize=(14, 8))
    p = sns.relplot(
        x="epsilon",
        y="value",
        col="area_name",
        hue="area_name",
        palette=color,
        data=df.query('kl_divergence == "kl_divergence" & mu == @mu'),
        col_wrap=3,
        height=3,
        kind="line",
        estimator=np.mean,
    )

    plt.suptitle(f"KL Divergence for mu = {mu} {title}", fontsize=14)
    sns.move_legend(
        p, "upper right", bbox_to_anchor=(1.0, 0.9), ncol=1, title="", frameon=False
    )

    for i, ax in enumerate(p.axes.flatten()):
        ax.set_title(ward_labels[i])
        ax.tick_params(labelbottom=True)
        ax.set_ylabel("KL Divergence", fontsize=10)
        ax.set_xlabel("Epsilon", visible=True, fontsize=10)

    plt.subplots_adjust(top=0.9, wspace=None, hspace=0.35)

    plt.savefig(
        (
            os.path.join(
                fig_folder_name,
                f"kl_areas_mu_0.0001_experiments_line_no_markers_general_{filename}.png",
            )
        ),
        bbox_inches="tight",
    )
    plt.savefig(
        (
            os.path.join(
                fig_folder_name,
                f"kl_areas_mu_0.0001_experiments_line_no_markers_general_{filename}.svg",
            )
        ),
        bbox_inches="tight",
    )

In [None]:
create_plots.kl_experiments_scatter(
    df_clip_kl,
    0.0001,
    ward_labels_extended,
    colors_5,
    fig_folder_name,
    "- Geometric Mechanism",
    filename="geo_25_trials_general",
)

In [None]:
kl_experiments_line(
    df_clip_kl,
    0.0001,
    ward_labels_extended,
    colors_5,
    fig_folder_name,
    "- Geometric Mechanism",
    filename="_geo_25_trials_general",
)

In [None]:
kl_experiments_line_no_markers(
    df_clip_kl,
    0.0001,
    ward_labels_extended,
    colors_5,
    fig_folder_name,
    "- Geometric Mechanism",
    filename="_geo_25_trials_general",
)

In [None]:
plt.style.use("seaborn-v0_8-dark")
p = sns.relplot(
    x="epsilon",
    y="kl_divergence",
    col="area_name",
    hue="area_name",
    palette=colors_5,
    data=measurement_kl_divergence_df_geo_clip.query("mu == 0.0001"),
    col_wrap=3,
    height=3,
    kind="line",
    marker="o",
    markersize=4,
    markeredgewidth=0.4,
)
plt.suptitle(f"KL Divergence for mu = 0.0001", fontsize=14)
legend = p._legend
legend.set_bbox_to_anchor([1.0, 0.8])
legend.set_title("")
p.set_ylabels("KL Divergence")
p.set_xlabels("Epsilon")

for i, ax in enumerate(p.axes.flatten()):
    ax.set_title(ward_labels_extended[i])
    ax.tick_params(labelbottom=True)
    ax.set_ylabel("KL Divergence", fontsize=10)
    ax.set_xlabel("Epsilon", visible=True, fontsize=10)

plt.subplots_adjust(top=0.9, wspace=None, hspace=0.35)

plt.savefig(
    (os.path.join(fig_folder_name, f"kl_areas_mu_0.0001.png")), bbox_inches="tight"
)
plt.savefig(
    (os.path.join(fig_folder_name, f"kl_areas_mu_0.0001.svg")), bbox_inches="tight"
)

In [None]:
#kl_divergence_areas(kl_divergence_diversity, "_diversity")

In [None]:
kl_divergence_areas(measurement_kl_divergence_df_geo_clip)

In [None]:
colors = ["red", "blue", "orange", "green", "purple", "yellow"]
for i, ward in enumerate(ward_names_extended):
    plt.style.use("seaborn-v0_8-dark")
    plt.figure(i)
    sns.lineplot(
        x="epsilon",
        y="value",
        hue="mu",
        palette=colors,
        data=df_clip_kl.query("area_name == @ward"),
        estimator=np.mean,
        errorbar=None,
    )
    plt.ylabel("KL Divergence", fontsize=10)
    plt.xlabel("Epsilon", fontsize=10)
    plt.title(f"KL Divergence Performance for Varying Mu Values {ward_labels_extended[i]}")
    plt.legend(
        bbox_to_anchor=(1.02, 1),
        loc="upper left",
        borderaxespad=0,
        fontsize=10,
        title="mu",
    )
    plt.savefig(
        (os.path.join(fig_folder_name, f"kl_mean_{ward}_general.png")),
        bbox_inches="tight",
    )
    plt.savefig(
        (os.path.join(fig_folder_name, f"kl_mean_{ward}_general.svg")),
        bbox_inches="tight",
    )

In [None]:
def kl_experiments_wards(
    df, ward_labels, color, fig_folder_name, title="", filename=""
):
    for i, ward in enumerate(ward_names_extended):
        plt.style.use("seaborn-v0_8-dark")
        plt.figure(i)

        p = sns.relplot(
            x="epsilon",
            y="value",
            col="mu",
            hue="mu",
            palette=color,
            data=df.query("area_name == @ward"),
            col_wrap=3,
            height=3,
            kind="line",
            marker="o",
            markersize=4,
            markeredgewidth=0.4,
            estimator=np.mean,
            errorbar=None,
        )

        plt.suptitle(f"KL Divergence Performance for Varying Mu Values {ward_labels[i]}", fontsize=14)
        legend = p._legend
        legend.set_bbox_to_anchor([1.0, 0.8])

        for ax in p.axes.flatten():
            ax.tick_params(labelbottom=True)
            ax.set_ylabel("KL Divergence", fontsize=10)
            ax.set_xlabel("Epsilon", visible=True, fontsize=10)
        plt.subplots_adjust(top=0.91, wspace=None, hspace=0.3)
        plt.savefig(
            (os.path.join(fig_folder_name, f"kl_replot_mean_{ward}{filename}.png")),
            bbox_inches="tight",
        )
        plt.savefig(
            (os.path.join(fig_folder_name, f"kl_relplot_mean_{ward}{filename}.svg")),
            bbox_inches="tight",
        )

In [None]:
kl_experiments_wards(
    df_clip_kl, ward_labels_extended, colors, fig_folder_name, filename="_general"
)

In [None]:
for i, ward in enumerate(ward_names_extended):
    plt.style.use("seaborn-v0_8-dark")
    plt.figure(i)

    p = sns.relplot(
        x="epsilon",
        y="kl_divergence",
        col="mu",
        hue="mu",
        palette=colors,
        data=measurement_kl_divergence_df_geo_clip.query("area_name == @ward"),
        col_wrap=3,
        height=3,
        kind="line",
        marker="o",
        markersize=4,
        markeredgewidth=0.4,
    )
    plt.suptitle(f"KL Divergence Performance for Varying Mu Values {ward}", fontsize=14)
    legend = p._legend
    legend.set_bbox_to_anchor([1.0, 0.8])

    for ax in p.axes.flatten():
        ax.tick_params(labelbottom=True)
        ax.set_ylabel("KL-Divergence", fontsize=10)
        ax.set_xlabel("Epsilon", visible=True, fontsize=10)
    plt.subplots_adjust(top=0.91, wspace=None, hspace=0.3)
    plt.savefig(
        (os.path.join(fig_folder_name, f"kl_replot_{ward}.png")), bbox_inches="tight"
    )
    plt.savefig(
        (os.path.join(fig_folder_name, f"kl_relplot_{ward}.svg")), bbox_inches="tight"
    )