In [None]:
import pandas as pd 
from process_bulk import ProcessBulk
from process_geography import Ward, LocalAuthority 
import evaluation_helpers
import os 
import diff_priv_dataframe
from copy import deepcopy
import data_error



For more plots, see birmingham_analysis_plots

In [None]:
pd.options.mode.copy_on_write = True


For calculating the Townsend Deprivation Index, I need 4 different variables: 
    - Unemployment 
    - Non-car ownership (DC4202EW: Tenure by car or van availability by ethnic group of Household Reference Person (HRP))
    - Non-home ownership (means Tenure=not owned)
    - Overcrowding (persons per room)

In [None]:
bulk_folder_name = "bulk_data"

ward_folder_name_tenure = "dc4201ew_htward"
ward_folder_name_occupancy = "dc4206ew_htward"
ward_folder_name_car = "dc4202ew_htward"
ward_folder_name_economic = "dc6201ew_htward"

index_sheet_name = "Cell Numbered DC Tables 3.2.xlsx"
index_sheet_name_economic = "Cell Numbered DC Tables 3.1.xlsx"
# index_sheet_name_occupation = 'cell_numbered_table_layouts_S1.xlsx'

cl_folder_name = "confidence_intervals"
cl_sheet_name = "2011censusconfidenceintervalsdec2013_tcm77-271638.xlsx"
cl_table_name = "95% CI widths (LA level)"

p_cl = ProcessBulk(bulk_folder=cl_folder_name, index_sheet=cl_sheet_name)

p = ProcessBulk(bulk_folder=bulk_folder_name, index_sheet=index_sheet_name)

p_economic = ProcessBulk(
    bulk_folder=bulk_folder_name, index_sheet=index_sheet_name_economic
)

# Tenure by ethnic group by age - Household Reference Persons
table_name_tenure = "DC4201EW"
# Dwelling type by type of central heating in household by occupancy rating (bedrooms) by ethnic group of Household Reference Person (HRP)
table_name_occupancy = "DC4206EW"
# Tenure by car or van availability by ethnic group of Household Reference Person (HRP)
table_name_car = "DC4202EW"
# Economic activity by ethnic group by sex by age
table_name_economic = "DC6201EW"

In [None]:
table_num = "".join(filter(str.isdigit, table_name_tenure))

In [None]:
dp_folder_name = "csv_files/datasets_dp_deprivation"
path_tenure = os.path.join(
    p.get_bulk_data_path(), os.path.join(dp_folder_name, "tenure")
)
path_occupancy = os.path.join(
    p.get_bulk_data_path(), os.path.join(dp_folder_name, "occupancy")
)
path_car = os.path.join(p.get_bulk_data_path(), os.path.join(dp_folder_name, "car"))
path_economic = os.path.join(
    p.get_bulk_data_path(), os.path.join(dp_folder_name, "economic")
)
path_deprivation_scores = os.path.join(
    p.get_bulk_data_path(), os.path.join(dp_folder_name, "deprivation_scores")
)
path_indicators = os.path.join(
    p.get_bulk_data_path(), os.path.join(dp_folder_name, "deprivation_indicators")
)

In [None]:
epsilons_short = [0.001, 0.01, 0.1, 1]

In [None]:
# list of ethnicities for filtering
general_ethnicities = [
    "White: Total",
    "Mixed/multiple ethnic group: Total",
    "Asian/Asian British: Total",
    "Black/African/Caribbean/Black British: Total",
    "Other ethnic group: Total",
]
detailed_ethnicities = [
    "White: English/Welsh/Scottish/Northern Irish/British",
    "White: Irish",
    "White: Gypsy or Irish Traveller",
    "White: Other White",
    "Mixed/multiple ethnic group: White and Black Caribbean",
    "Mixed/multiple ethnic group: White and Black African",
    "Mixed/multiple ethnic group: White and Asian",
    "Mixed/multiple ethnic group: Other Mixed",
    "Asian/Asian British: Indian",
    "Asian/Asian British: Pakistani",
    "Asian/Asian British: Bangladeshi",
    "Asian/Asian British: Chinese",
    "Asian/Asian British: Other Asian",
    "Black/African/Caribbean/Black British: African",
    "Black/African/Caribbean/Black British: Caribbean",
    "Black/African/Caribbean/Black British: Other Black",
    "Other ethnic group: Arab",
    "Other ethnic group: Any other ethnic group",
]
white = ["White: English/Welsh/Scottish/Northern Irish/British"]

total_ethnicities = ["All categories: Ethnic group"]

# list of tenure categories for filtering
all_tenure = [
    "All categories: Tenure",
    "Owned or shared ownership: Total",
    "Owned: Owned outright",
    "Owned: Owned with a mortgage or loan or shared ownership",
    "Social rented: Total",
    "Social rented: Rented from council (Local Authority)",
    "Social rented: Other social rented",
    "Private rented or living rent free: Total",
    "Private rented: Private landlord or letting agency",
    "Private rented: Other private rented or living rent free",
]
general_tenure = [
    "Owned or shared ownership: Total",
    "Social rented: Total",
    "Private rented or living rent free: Total",
]
detailed_tenure = [
    "Owned: Owned outright",
    "Owned: Owned with a mortgage or loan or shared ownership",
    "Social rented: Rented from council (Local Authority)",
    "Social rented: Other social rented",
    "Private rented: Private landlord or letting agency",
    "Private rented: Other private rented or living rent free",
]

all_occupancy = [
    "All categories: Occupancy rating (bedrooms)",
    "Occupancy rating (bedrooms) of +2 or more",
    "Occupancy rating (bedrooms) of +1",
    "Occupancy rating (bedrooms) of 0",
    "Occupancy rating (bedrooms) of -1 or less",
]

detailed_occupancy = [
    "Occupancy rating (bedrooms) of +2 or more",
    "Occupancy rating (bedrooms) of +1",
    "Occupancy rating (bedrooms) of 0",
    "Occupancy rating (bedrooms) of -1 or less",
]

detailed_cars = [
    "No cars or vans in household",
    "1 car or van in household",
    "2 or more cars or vans in household",
]

all_economic = [
    "All categories: Economic activity",
    "Economically active: Total",
    "Economically active: In employment: Total",
    "Economically active: In employment: Employee: Total",
    "Economically active: In employment: Employee: Part-time",
    "Economically active: In employment: Employee: Full-time",
    "Economically active: In employment: Self-employed: Total",
    "Economically active: In employment: Self-employed: Part-time",
    "Economically active: In employment: Self-employed: Full-time",
    "Economically active: In employment: Full-time students",
    "Economically active: Unemployed: Total",
    "Economically active: Unemployed: Unemployed (excluding full time students)",
    "Economically active: Unemployed: Full-time students",
    "Economically inactive: Total",
    "Economically inactive: Retired",
    "Economically inactive: Student (including full-time students)",
    "Economically inactive: Looking after home or family",
    "Economically inactive: Long-term sick or disabled",
    "Economically inactive: Other",
]

economic_detailed = [
    "Economically active: In employment: Employee: Part-time",
    "Economically active: In employment: Employee: Full-time",
    "Economically active: In employment: Self-employed: Part-time",
    "Economically active: In employment: Self-employed: Full-time",
    "Economically active: In employment: Full-time students",
    "Economically active: Unemployed: Unemployed (excluding full time students)",
    "Economically active: Unemployed: Full-time students",
    "Economically inactive: Retired",
    "Economically inactive: Student (including full-time students)",
    "Economically inactive: Looking after home or family",
    "Economically inactive: Long-term sick or disabled",
    "Economically inactive: Other",
]

economic_inactive = [
    "Economically inactive: Retired",
    "Economically inactive: Student (including full-time students)",
    "Economically inactive: Looking after home or family",
    "Economically inactive: Long-term sick or disabled",
    "Economically inactive: Other",
]

In [None]:
ward = Ward()
local_authority = LocalAuthority()

# geo lookup file is the same for region and ward
geo_lookup = ward.get_geo_lookup_ward()

# creating list of all ward codes
ward_codes = geo_lookup["CMWD11CD"].tolist()

# creating a dictionary for storing the deprivation index indicators
indicators = dict([(key, {}) for key in ward_codes])

# creating a dictionary for storing the deprivation index indicators calculated out of differentially private data
indicators_geometric_clipping = dict([(key, {}) for key in ward_codes])

# creating a dictionary for storing the deprivation index indicators calculated with data error applied to data
indicators_data_error = dict([(key, {}) for key in ward_codes])

# creating a dictionary for storing the deprivation index indicators calculated out of differentially private data
indicators_data_error_geometric_clipping = dict([(key, {}) for key in ward_codes])

In [None]:
# loading tenure

column_names = ["Age", "Tenure", "EthnicGroup", "Dataset"]
dfs_tenure = p.set_up(
    table_name=table_name_tenure,
    df_type="nested",
    column_names=column_names,
    num_nested_category=10,
    subfolder=ward_folder_name_tenure,
    level=6,
)


# reading in the confidence intervals for all local authorities
sheet_cl = p_cl.read_cl(cl_table_name)

Indicators can only calculated on non-negative data. Therefore, clipping has to be done as a post-processing step. 

In [None]:
# Tenure: extracting percentage of households that are not owner-occupied, i.e. rented or living rent free. For testing, just the totals for all ethnic groups and ages are currently used

filter_dict_tenure = {
    "Age": ["All categories: Age"],
    "Tenure": general_tenure,
    "EthnicGroup": detailed_ethnicities,
}

reduced_lookup_tenure, datasets_reduced_tenure, reduced_csv_tenure = (
    evaluation_helpers.get_reduced_data(
        dfs_tenure["lookup_df"], filter_dict_tenure, dfs_tenure["csv_df"]
    )
)
df_ward_tenure = ward.get_ward(reduced_csv_tenure, geo_lookup)

df_tenure = evaluation_helpers.get_filtered_df_ward_dict(
    p, ward, df_ward_tenure, ward_codes, reduced_lookup_tenure, filter_dict_tenure
)

In [None]:
indicators = evaluation_helpers.calculate_indicators_tenure(df_tenure, indicators)

In [None]:
""" path_tenure_laplace = os.path.join(os.path.join(path_tenure, "laplace"))
path_tenure_laplace_rounding = os.path.join(path_tenure_laplace, "rounding")
path_tenure_laplace_clipping = os.path.join(path_tenure_laplace, "clipping")
path_tenure_laplace_rounding_clipping = os.path.join(
    path_tenure_laplace, "clipping_rounding"
) """

In [None]:
path_tenure_geometric = os.path.join(os.path.join(path_tenure, "geometric"))
path_tenure_geometric_clipping = os.path.join(path_tenure_geometric, "clipping")

In [None]:
path_tenure_data_error = os.path.join(os.path.join(path_tenure, "data_error"))

In [None]:
data_error.apply_data_error_to_dataframe(
    reduced_csv_tenure,
    sheet_cl,
    path_tenure_data_error,
    table_name_tenure,
    geo_lookup,
    ward,
)
reduced_csv_tenure_data_error = pd.read_csv(
    os.path.join(path_tenure_data_error, f"{table_name_tenure}_data_error.csv")
)
df_ward_tenure_data_error = ward.get_ward(reduced_csv_tenure_data_error, geo_lookup)
df_tenure_data_error = evaluation_helpers.get_filtered_df_ward_dict(
    p,
    ward,
    df_ward_tenure_data_error,
    ward_codes,
    reduced_lookup_tenure,
    filter_dict_tenure,
)

In [None]:
indicators_data_error = evaluation_helpers.calculate_indicators_tenure(
    df_tenure_data_error, indicators_data_error
)

In [None]:
""" diff_priv_dataframe.apply_geometric_to_dataframe(
    path_tenure_geometric,
    reduced_csv_tenure,
    datasets_reduced_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
)
diff_priv_dataframe.apply_geometric_to_dataframe(
    path_tenure_geometric_clipping,
    reduced_csv_tenure,
    datasets_reduced_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
) """

In [None]:
""" diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_tenure_geometric,
    reduced_csv_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
) """
diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_tenure_geometric_clipping,
    reduced_csv_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe(
    path_tenure_laplace,
    reduced_csv_tenure,
    datasets_reduced_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_tenure_laplace_rounding,
    reduced_csv_tenure,
    datasets_reduced_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
    rounding=True,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_tenure_laplace_clipping,
    reduced_csv_tenure,
    datasets_reduced_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_tenure_laplace_rounding_clipping,
    reduced_csv_tenure,
    datasets_reduced_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=True,
) """

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_tenure_laplace,
    reduced_csv_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=False,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_tenure_laplace_rounding,
    reduced_csv_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=False,
    rounding=True,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_tenure_laplace_clipping,
    reduced_csv_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=True,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_tenure_laplace_rounding_clipping,
    reduced_csv_tenure,
    ward,
    geo_lookup,
    table_name_tenure,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=True,
    rounding=True,
) """

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe(
    path_tenure_data_error,
    reduced_csv_tenure_data_error,
    datasets_reduced_tenure,
    ward,
    geo_lookup,
    table_name_tenure + "_laplace_clipping",
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_tenure_data_error,
    reduced_csv_tenure_data_error,
    datasets_reduced_tenure,
    ward,
    geo_lookup,
    table_name_tenure + "_laplace_clipping_rounding",
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=True,
) """

In [None]:
df_tenure_geometric_clipping = evaluation_helpers.get_csv_dp_dict(
    df_tenure, epsilons_short, path_tenure_geometric_clipping, table_name_tenure
)
indicators_geometric_clipping = evaluation_helpers.calculate_indicators_tenure_dp(
    df_tenure_geometric_clipping, indicators_geometric_clipping, epsilons_short
)

In [None]:
diff_priv_dataframe.apply_geometric_to_dataframe(
    path_tenure_data_error,
    reduced_csv_tenure_data_error,
    datasets_reduced_tenure,
    ward,
    geo_lookup,
    table_name_tenure + "_geometric_clipping",
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)
diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_tenure_data_error,
    reduced_csv_tenure_data_error,
    ward,
    geo_lookup,
    table_name_tenure + "_geometric_clipping",
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)
df_tenure_data_error_geometric_clipping = evaluation_helpers.get_csv_dp_dict(
    df_tenure, epsilons_short, path_tenure_geometric_clipping, table_name_tenure
)
indicators_data_error_geometric_clipping = (
    evaluation_helpers.calculate_indicators_tenure_dp(
        df_tenure_data_error_geometric_clipping,
        indicators_data_error_geometric_clipping,
        epsilons_short,
    )
)

In [None]:
# loading occupancy

column_names_occ = ["Heating", "Occupancy", "EthnicGroup", "Dataset"]
dfs_occ = p.set_up_sub_table(
    table_name=table_name_occupancy,
    df_type="nested",
    column_names=column_names_occ,
    num_nested_category=5,
    nrows=20,
    start_sheet=2,
    subfolder=ward_folder_name_occupancy,
    level=6,
)

In [None]:
# Overcrowding: extracting percentage of households that are overcrowded, an occupancy rating of -1 or less is overcrowded

filter_dict_occ = {
    "Heating": ["All categories: Type of central heating in household"],
    "Occupancy": detailed_occupancy,
    "EthnicGroup": detailed_ethnicities,
}

reduced_lookup_occ, datasets_reduced_occ, reduced_csv_occ = (
    evaluation_helpers.get_reduced_data(
        dfs_occ["lookup_df"], filter_dict_occ, dfs_occ["csv_df"]
    )
)
df_ward_occ = ward.get_ward(reduced_csv_occ, geo_lookup)

df_occ = evaluation_helpers.get_filtered_df_ward_dict(
    p, ward, df_ward_occ, ward_codes, reduced_lookup_occ, filter_dict_occ
)

indicators = evaluation_helpers.calculate_indicators_occ(df_occ, indicators)

In [None]:
""" path_occupancy_laplace = os.path.join(os.path.join(path_occupancy, "laplace"))
path_occupancy_laplace_rounding = os.path.join(path_occupancy_laplace, "rounding")
path_occupancy_laplace_clipping = os.path.join(path_occupancy_laplace, "clipping")
path_occupancy_laplace_rounding_clipping = os.path.join(
    path_occupancy_laplace, "clipping_rounding"
) """

path_occupancy_geometric = os.path.join(os.path.join(path_occupancy, "geometric"))
path_occupancy_geometric_clipping = os.path.join(path_occupancy_geometric, "clipping")

In [None]:
path_occupancy_data_error = os.path.join(os.path.join(path_occupancy, "data_error"))

In [None]:
data_error.apply_data_error_to_dataframe(
    reduced_csv_occ,
    sheet_cl,
    path_occupancy_data_error,
    table_name_occupancy,
    geo_lookup,
    ward,
)
reduced_csv_occ_data_error = pd.read_csv(
    os.path.join(path_occupancy_data_error, f"{table_name_occupancy}_data_error.csv")
)
df_ward_occ_data_error = ward.get_ward(reduced_csv_occ_data_error, geo_lookup)
df_occ_data_error = evaluation_helpers.get_filtered_df_ward_dict(
    p, ward, df_ward_occ_data_error, ward_codes, reduced_lookup_occ, filter_dict_occ
)

In [None]:
indicators_data_error = evaluation_helpers.calculate_indicators_occ(
    df_occ_data_error, indicators_data_error
)

In [None]:
""" diff_priv_dataframe.apply_geometric_to_dataframe(
    path_occupancy_geometric,
    reduced_csv_occ,
    datasets_reduced_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
)
diff_priv_dataframe.apply_geometric_to_dataframe(
    path_occupancy_geometric_clipping,
    reduced_csv_occ,
    datasets_reduced_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
) """

In [None]:
""" diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_occupancy_geometric,
    reduced_csv_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
)
diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_occupancy_geometric_clipping,
    reduced_csv_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
) """

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe(
    path_occupancy_laplace,
    reduced_csv_occ,
    datasets_reduced_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_occupancy_laplace_rounding,
    reduced_csv_occ,
    datasets_reduced_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
    rounding=True,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_occupancy_laplace_clipping,
    reduced_csv_occ,
    datasets_reduced_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_occupancy_laplace_rounding_clipping,
    reduced_csv_occ,
    datasets_reduced_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=True,
) """

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_occupancy_laplace,
    reduced_csv_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=False,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_occupancy_laplace_rounding,
    reduced_csv_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=False,
    rounding=True,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_occupancy_laplace_clipping,
    reduced_csv_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=True,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_occupancy_laplace_rounding_clipping,
    reduced_csv_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=True,
    rounding=True,
) """

In [None]:
diff_priv_dataframe.apply_geometric_to_dataframe(
    path_occupancy_geometric_clipping,
    reduced_csv_occ,
    datasets_reduced_occ,
    ward,
    geo_lookup,
    table_name_occupancy,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)
df_occ_geometric_clipping = evaluation_helpers.get_csv_dp_dict(
    df_occ, epsilons_short, path_occupancy_geometric_clipping, table_name_occupancy
)
indicators_geometric_clipping = evaluation_helpers.calculate_indicators_occ_dp(
    df_occ_geometric_clipping, indicators_geometric_clipping, epsilons_short
)
# indicators_geometric_clipping

In [None]:
diff_priv_dataframe.apply_geometric_to_dataframe(
    path_occupancy_data_error,
    reduced_csv_occ_data_error,
    datasets_reduced_occ,
    ward,
    geo_lookup,
    table_name_occupancy + "_geometric_clipping",
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)
diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_occupancy_data_error,
    reduced_csv_occ,
    ward,
    geo_lookup,
    table_name_occupancy + "_geometric_clipping",
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)
df_occ_data_error_geometric_clipping = evaluation_helpers.get_csv_dp_dict(
    df_occ, epsilons_short, path_occupancy_geometric_clipping, table_name_occupancy
)
indicators_data_error_geometric_clipping = (
    evaluation_helpers.calculate_indicators_occ_dp(
        df_occ_data_error_geometric_clipping,
        indicators_data_error_geometric_clipping,
        epsilons_short,
    )
)

In [None]:
# # loading car
column_names_car = ["Tenure", "Car", "EthnicGroup", "Dataset"]
dfs_car = p.set_up(
    table_name=table_name_car,
    df_type="nested",
    column_names=column_names_car,
    num_nested_category=4,
    subfolder=ward_folder_name_car,
    level=6,
)

In [None]:
# # Car availability: extracting percentage of households without car or van

filter_dict_car = {
    "Tenure": ["All categories: Tenure"],
    "Car": detailed_cars,
    "EthnicGroup": detailed_ethnicities,
}

reduced_lookup_car, datasets_reduced_car, reduced_csv_car = (
    evaluation_helpers.get_reduced_data(
        dfs_car["lookup_df"], filter_dict_car, dfs_car["csv_df"]
    )
)
df_ward_car = ward.get_ward(reduced_csv_car, geo_lookup)

df_car = evaluation_helpers.get_filtered_df_ward_dict(
    p, ward, df_ward_car, ward_codes, reduced_lookup_car, filter_dict_car
)

indicators = evaluation_helpers.calculate_indicators_car(df_car, indicators)
# indicators

In [None]:
""" path_car_laplace = os.path.join(os.path.join(path_car, "laplace"))
path_car_laplace_rounding = os.path.join(path_car_laplace, "rounding")
path_car_laplace_clipping = os.path.join(path_car_laplace, "clipping")
path_car_laplace_rounding_clipping = os.path.join(path_car_laplace, "clipping_rounding") """

path_car_geometric = os.path.join(os.path.join(path_car, "geometric"))
path_car_geometric_clipping = os.path.join(path_car_geometric, "clipping")

In [None]:
path_car_data_error = os.path.join(os.path.join(path_car, "data_error"))

In [None]:
data_error.apply_data_error_to_dataframe(
    reduced_csv_car, sheet_cl, path_car_data_error, table_name_car, geo_lookup, ward
)
reduced_csv_car_data_error = pd.read_csv(
    os.path.join(path_car_data_error, f"{table_name_car}_data_error.csv")
)
df_ward_car_data_error = ward.get_ward(reduced_csv_car_data_error, geo_lookup)
df_car_data_error = evaluation_helpers.get_filtered_df_ward_dict(
    p, ward, df_ward_car_data_error, ward_codes, reduced_lookup_car, filter_dict_car
)

In [None]:
indicators_data_error = evaluation_helpers.calculate_indicators_car(
    df_car_data_error, indicators_data_error
)

In [None]:
""" diff_priv_dataframe.apply_geometric_to_dataframe(
    path_car_geometric,
    reduced_csv_car,
    datasets_reduced_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
)
diff_priv_dataframe.apply_geometric_to_dataframe(
    path_car_geometric_clipping,
    reduced_csv_car,
    datasets_reduced_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
) """

In [None]:
""" diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_car_geometric,
    reduced_csv_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
) """
diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_car_geometric_clipping,
    reduced_csv_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe(
    path_car_laplace,
    reduced_csv_car,
    datasets_reduced_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_car_laplace_rounding,
    reduced_csv_car,
    datasets_reduced_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
    rounding=True,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_car_laplace_clipping,
    reduced_csv_car,
    datasets_reduced_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_car_laplace_rounding_clipping,
    reduced_csv_car,
    datasets_reduced_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=True,
) """

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_car_laplace,
    reduced_csv_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=False,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_car_laplace_clipping,
    reduced_csv_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=True,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_car_laplace_rounding,
    reduced_csv_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=False,
    rounding=True,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_car_laplace_rounding_clipping,
    reduced_csv_car,
    ward,
    geo_lookup,
    table_name_car,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=True,
    rounding=True,
) """

In [None]:
df_car_geometric_clipping = evaluation_helpers.get_csv_dp_dict(
    df_car, epsilons_short, path_car_geometric_clipping, table_name_car
)
indicators_geometric_clipping = evaluation_helpers.calculate_indicators_car_dp(
    df_car_geometric_clipping, indicators_geometric_clipping, epsilons_short
)

In [None]:
diff_priv_dataframe.apply_geometric_to_dataframe(
    path_car_data_error,
    reduced_csv_car_data_error,
    datasets_reduced_car,
    ward,
    geo_lookup,
    table_name_car + "_geometric_clipping",
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)
diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_car_data_error,
    reduced_csv_car_data_error,
    ward,
    geo_lookup,
    table_name_car + "_geometric_clipping",
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)
df_car_data_error_geometric_clipping = evaluation_helpers.get_csv_dp_dict(
    df_car, epsilons_short, path_car_geometric_clipping, table_name_car
)
indicators_data_error_geometric_clipping = (
    evaluation_helpers.calculate_indicators_car_dp(
        df_car_data_error_geometric_clipping,
        indicators_data_error_geometric_clipping,
        epsilons_short,
    )
)

In [None]:
# loading economic activity, currently for all sex categories, could also load for male and female separately
column_names_economic = ["Age", "EconomicActivity", "EthnicGroup", "Dataset"]
dfs_economic = p_economic.set_up_sub_table(
    table_name=table_name_economic,
    df_type="nested",
    column_names=column_names_economic,
    num_nested_category=19,
    nrows=82,
    start_sheet=2,
    subfolder=ward_folder_name_economic,
    level=6,
)

In [None]:
# Unemployment: extract total of economically active people and people who are unemployed 'Economically active: Unemployed: Unemployed (excluding full time students)'

filter_dict_economic = {
    "Age": ["All categories: Age 16 and over"],
    "EconomicActivity": economic_detailed,
    "EthnicGroup": detailed_ethnicities,
}

reduced_lookup_economic, datasets_reduced_economic, reduced_csv_economic = (
    evaluation_helpers.get_reduced_data(
        dfs_economic["lookup_df"], filter_dict_economic, dfs_economic["csv_df"]
    )
)
df_ward_economic = ward.get_ward(reduced_csv_economic, geo_lookup)

df_economic = evaluation_helpers.get_filtered_df_ward_dict(
    p, ward, df_ward_economic, ward_codes, reduced_lookup_economic, filter_dict_economic
)

indicators = evaluation_helpers.calculate_indicators_economic(df_economic, indicators)

In [None]:
""" path_economic_laplace = os.path.join(os.path.join(path_economic, "laplace"))
path_economic_laplace_rounding = os.path.join(path_economic_laplace, "rounding")
path_economic_laplace_clipping = os.path.join(path_economic_laplace, "clipping")
path_economic_laplace_rounding_clipping = os.path.join(
    path_economic_laplace, "clipping_rounding"
)
 """
path_economic_geometric = os.path.join(os.path.join(path_economic, "geometric"))
path_economic_geometric_clipping = os.path.join(path_economic_geometric, "clipping")

In [None]:
path_economic_data_error = os.path.join(os.path.join(path_economic, "data_error"))

In [None]:
data_error.apply_data_error_to_dataframe(
    reduced_csv_economic,
    sheet_cl,
    path_economic_data_error,
    table_name_economic,
    geo_lookup,
    ward,
)
reduced_csv_economic_data_error = pd.read_csv(
    os.path.join(path_economic_data_error, f"{table_name_economic}_data_error.csv")
)
df_ward_economic_data_error = ward.get_ward(reduced_csv_economic_data_error, geo_lookup)
df_economic_data_error = evaluation_helpers.get_filtered_df_ward_dict(
    p,
    ward,
    df_ward_economic_data_error,
    ward_codes,
    reduced_lookup_economic,
    filter_dict_economic,
)

In [None]:
indicators_data_error = evaluation_helpers.calculate_indicators_economic(
    df_economic_data_error, indicators_data_error
)

In [None]:
""" diff_priv_dataframe.apply_geometric_to_dataframe(
    path_economic_geometric,
    reduced_csv_economic,
    datasets_reduced_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
)
diff_priv_dataframe.apply_geometric_to_dataframe(
    path_economic_geometric_clipping,
    reduced_csv_economic,
    datasets_reduced_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
) """

In [None]:
""" diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_economic_geometric,
    reduced_csv_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
) """
diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_economic_geometric_clipping,
    reduced_csv_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe(
    path_economic_laplace,
    reduced_csv_economic,
    datasets_reduced_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_economic_laplace_rounding,
    reduced_csv_economic,
    datasets_reduced_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=False,
    rounding=True,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_economic_laplace_clipping,
    reduced_csv_economic,
    datasets_reduced_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe(
    path_economic_laplace_rounding_clipping,
    reduced_csv_economic,
    datasets_reduced_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
    rounding=True,
) """

In [None]:
""" diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_economic_laplace,
    reduced_csv_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=False,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_economic_laplace_rounding,
    reduced_csv_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=False,
    rounding=True,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_economic_laplace_clipping,
    reduced_csv_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=True,
    rounding=False,
)
diff_priv_dataframe.apply_laplace_to_dataframe_seed(
    path_economic_laplace_rounding_clipping,
    reduced_csv_economic,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    delta=0,
    epsilons=epsilons_short,
    clipping=True,
    rounding=True,
) """

In [None]:
df_economic_geometric_clipping = evaluation_helpers.get_csv_dp_dict(
    df_economic, epsilons_short, path_economic_geometric_clipping, table_name_economic
)
indicators_geometric_clipping = evaluation_helpers.calculate_indicators_economic_dp(
    df_economic_geometric_clipping, indicators_geometric_clipping, epsilons_short
)

In [None]:
diff_priv_dataframe.apply_geometric_to_dataframe(
    path_economic_data_error,
    reduced_csv_economic_data_error,
    datasets_reduced_economic,
    ward,
    geo_lookup,
    table_name_economic + "_geometric_clipping",
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)
diff_priv_dataframe.apply_geometric_to_dataframe_seed(
    path_economic_data_error,
    reduced_csv_economic_data_error,
    ward,
    geo_lookup,
    table_name_economic,
    sensitivity=2,
    epsilons=epsilons_short,
    clipping=True,
)
df_economic_data_error_geometric_clipping = evaluation_helpers.get_csv_dp_dict(
    df_economic, epsilons_short, path_economic_geometric_clipping, table_name_economic
)
indicators_data_error_geometric_clipping = (
    evaluation_helpers.calculate_indicators_economic_dp(
        df_economic_data_error_geometric_clipping,
        indicators_data_error_geometric_clipping,
        epsilons_short,
    )
)

In [None]:
indicators_csv = pd.DataFrame(indicators)
indicators_csv.to_csv(os.path.join(path_indicators, "indicators_baseline"), index=True)

deprivation_indices = evaluation_helpers.calculate_deprivation_indices(indicators)
evaluation_helpers.set_up_deprivation_df(
    deprivation_indices, df_tenure, path_deprivation_scores, "baseline"
)

In [None]:
indicators_data_error_csv = pd.DataFrame(indicators_data_error)
indicators_data_error_csv.to_csv(
    os.path.join(path_indicators, "indicators_data_error"), index=True
)

deprivation_indices_data_error = evaluation_helpers.calculate_deprivation_indices(
    indicators_data_error
)
evaluation_helpers.set_up_deprivation_df(
    deprivation_indices_data_error,
    df_tenure_data_error,
    path_deprivation_scores,
    "data_error",
)

In [None]:
indicators_geometric_clipping_csv = pd.DataFrame(indicators_geometric_clipping)
indicators_geometric_clipping_csv.to_csv(
    os.path.join(path_indicators, "indicators_geometric_clipping"), index=True
)


deprivation_indices_geometric_clipping = (
    evaluation_helpers.calculate_deprivation_indices_dp(
        indicators_geometric_clipping, epsilons_short
    )
)
evaluation_helpers.set_up_deprivation_df_dp(
    deprivation_indices_geometric_clipping,
    df_tenure,
    epsilons_short,
    path_deprivation_scores,
    "geometric_clipping",
)

In [None]:
indicators_geometric_clipping_csv

In [None]:
indicators_data_error_geometric_clipping_csv = pd.DataFrame(
    indicators_data_error_geometric_clipping
)
indicators_data_error_geometric_clipping_csv.to_csv(
    os.path.join(path_indicators, "indicators_data_error_geometric_clipping"),
    index=True,
)

deprivation_indices_data_error_geometric_clipping = (
    evaluation_helpers.calculate_deprivation_indices_dp(
        indicators_data_error_geometric_clipping, epsilons_short
    )
)
evaluation_helpers.set_up_deprivation_df_dp(
    deprivation_indices_data_error_geometric_clipping,
    df_tenure,
    epsilons_short,
    path_deprivation_scores,
    "data_error_geometric_clipping",
)