In [None]:
import pandas as pd
from process_bulk import ProcessBulk
from process_geography import Ward, LocalAuthority
import evaluation_helpers
import os
import diff_priv_dataframe

In [None]:
pd.options.mode.copy_on_write = True

In [None]:
mus = [1.0, 0.5, 0.1, 0.01, 0.001, 0.0001]
epsilons = [0.001, 0.01, 0.1, 0.5, 1, 3, 5, 10]
epsilons_short = [0.001, 0.01, 0.1, 1]

diversity_levels = ["low", "medium", "high"]

In [None]:
bulk_folder_name = "census_data"
ward_folder_name_tenure = "dc4201ew_htward"
index_sheet_name = "Cell Numbered DC Tables 3.2.xlsx"
dp_folder_name = "datasets_dp"

p = ProcessBulk(bulk_folder=bulk_folder_name, index_sheet=index_sheet_name)

# Tenure by ethnic group by age - Household Reference Persons
table_name_tenure = "DC4201EW"

In [None]:
# list of ethnicities for filtering
general_ethnicities = [
    "White: Total",
    "Mixed/multiple ethnic group: Total",
    "Asian/Asian British: Total",
    "Black/African/Caribbean/Black British: Total",
    "Other ethnic group: Total",
]
detailed_ethnicities = [
    "White: English/Welsh/Scottish/Northern Irish/British",
    "White: Irish",
    "White: Gypsy or Irish Traveller",
    "White: Other White",
    "Mixed/multiple ethnic group: White and Black Caribbean",
    "Mixed/multiple ethnic group: White and Black African",
    "Mixed/multiple ethnic group: White and Asian",
    "Mixed/multiple ethnic group: Other Mixed",
    "Asian/Asian British: Indian",
    "Asian/Asian British: Pakistani",
    "Asian/Asian British: Bangladeshi",
    "Asian/Asian British: Chinese",
    "Asian/Asian British: Other Asian",
    "Black/African/Caribbean/Black British: African",
    "Black/African/Caribbean/Black British: Caribbean",
    "Black/African/Caribbean/Black British: Other Black",
    "Other ethnic group: Arab",
    "Other ethnic group: Any other ethnic group",
]

white = ["White: English/Welsh/Scottish/Northern Irish/British"]

total_ethnicities = ["All categories: Ethnic group"]

# list of tenure categories for filtering
all_tenure = [
    "All categories: Tenure",
    "Owned or shared ownership: Total",
    "Owned: Owned outright",
    "Owned: Owned with a mortgage or loan or shared ownership",
    "Social rented: Total",
    "Social rented: Rented from council (Local Authority)",
    "Social rented: Other social rented",
    "Private rented or living rent free: Total",
    "Private rented: Private landlord or letting agency",
    "Private rented: Other private rented or living rent free",
]
general_tenure = [
    "Owned or shared ownership: Total",
    "Social rented: Total",
    "Private rented or living rent free: Total",
]
detailed_tenure = [
    "Owned: Owned outright",
    "Owned: Owned with a mortgage or loan or shared ownership",
    "Social rented: Rented from council (Local Authority)",
    "Social rented: Other social rented",
    "Private rented: Private landlord or letting agency",
    "Private rented: Other private rented or living rent free",
]

detailed_age = [
    "Age 24 and under",
    "Age 25 to 34",
    "Age 35 to 49",
    "Age 50 to 64",
    "Age 65 to 74",
    "Age 75 to 84",
    "Age 85 and over",
]

general_age = ["All categories: Age"]

In [None]:
column_names = ["Age", "Tenure", "EthnicGroup", "Dataset"]
dfs_ward = p.set_up(
    table_name=table_name_tenure,
    df_type="nested",
    column_names=column_names,
    num_nested_category=10,
    subfolder=ward_folder_name_tenure,
    level=6,
)
dfs_la = p.set_up(
    table_name=table_name_tenure,
    df_type="nested",
    column_names=column_names,
    num_nested_category=10,
    subfolder=ward_folder_name_tenure,
    level=5,
)

In [None]:
ward = Ward()
local_authority = LocalAuthority()
geo_lookup = ward.get_geo_lookup_ward()

In [None]:
geo_lookup_birmingham = geo_lookup[geo_lookup["LAD11NM"].str.contains("Birmingham")]

# creating list of all ward codes in Birmingham
ward_codes = geo_lookup_birmingham["CMWD11CD"].tolist()

In [None]:
lookup = dfs_ward["lookup_df"]

In [None]:
csv = dfs_ward["csv_df"]

In [None]:
filter_dict = {
    "Age": general_age,
    "Tenure": detailed_tenure,
    "EthnicGroup": detailed_ethnicities,
}

In [None]:
reduced_lookup, datasets_reduced, reduced_csv = evaluation_helpers.get_reduced_data(
    dfs_ward["lookup_df"], filter_dict, dfs_ward["csv_df"]
)

In [None]:
df_ward = ward.get_ward(reduced_csv, geo_lookup)
wards_tenure = evaluation_helpers.get_filtered_df_ward_dict(
    p, ward, df_ward, ward_codes, reduced_lookup, filter_dict
)

In [None]:
ward_codes_full = reduced_csv.GeographyCode.values.tolist()

In [None]:
path_laplace = os.path.join(
    p.get_bulk_data_path(), os.path.join(dp_folder_name, "laplace")
)
path_laplace_rounding = os.path.join(path_laplace, "rounding")
path_laplace_clipping = os.path.join(path_laplace, "clipping")
path_laplace_rounding_clipping = os.path.join(path_laplace, "clipping_rounding")

In [None]:
path_geometric = os.path.join(
    p.get_bulk_data_path(), os.path.join(dp_folder_name, "geometric")
)
path_geometric_clipping = os.path.join(path_geometric, "clipping")

The following code snippets only have to be run once to create the dp csv files, commented out for better performance of code 

In [None]:
""" diff_priv_dataframe.apply_geometric_to_dataframe(
    path_geometric,
    reduced_csv,
    datasets_reduced,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
) """

In [None]:
""" diff_priv_dataframe.apply_geometric_to_dataframe(
    path_geometric_clipping,
    reduced_csv,
    datasets_reduced,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
) """

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe(
    path_laplace,
    reduced_csv,
    datasets_reduced,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
    rounding=False,
) """

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe(
    path_laplace_rounding,
    reduced_csv,
    datasets_reduced,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
    rounding=True,
) """

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe(
    path_laplace_clipping,
    reduced_csv,
    datasets_reduced,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=False,
) """

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe(
    path_laplace_rounding_clipping,
    reduced_csv,
    datasets_reduced,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=True,
) """

In [None]:
wards_laplace = evaluation_helpers.get_csv_dp_dict(
    wards_tenure, epsilons_short, path_laplace, table_name_tenure
)
wards_laplace_rounding = evaluation_helpers.get_csv_dp_dict(
    wards_tenure, epsilons_short, path_laplace_rounding, table_name_tenure
)
wards_laplace_clipping = evaluation_helpers.get_csv_dp_dict(
    wards_tenure, epsilons_short, path_laplace_clipping, table_name_tenure
)
wards_laplace_rounding_clipping = evaluation_helpers.get_csv_dp_dict(
    wards_tenure, epsilons_short, path_laplace_rounding_clipping, table_name_tenure
)

In [None]:
wards_geometric = evaluation_helpers.get_csv_dp_dict(
    wards_tenure, epsilons_short, path_geometric, table_name_tenure
)
wards_geometric_clipping = evaluation_helpers.get_csv_dp_dict(
    wards_tenure, epsilons_short, path_geometric_clipping, table_name_tenure
)

In [None]:
wards_tenure["E36006764"][0]