## This notebook contains the code to create dynamic and interactive maps of Ghana in HTML.

# 0. Import Packages

In [None]:
import pandas as pd
import shapely.wkt
import matplotlib
import geopandas as gpd
import folium
import matplotlib.cm as cm
import branca.colormap as cmp

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None  # default='warn'

STARTING_LATITUDE = 8.161477
STARTING_LONGITUDE = -1.170247
cmap = cm.viridis

# 1. Load Data

In [None]:
# Micro-data sample from Ghana census 2021, with averaged covariates across spatial units 
df_population = pd.read_csv('../data/outputs/df_stratification_frame_unit.csv')
df_population['spatial_unit'] = df_population['spatial_unit'].apply(shapely.wkt.loads)
df_population.head()

In [None]:
# synthetic subjects from 7 urban districts
df_accra = pd.read_csv('../data/synthetic_subjects_urban.csv')
df_accra.head()

In [None]:
# Synthetic subjects from 12 rural districts
df_rural = pd.read_csv('../data/synthetic_subjects_rural.csv')
df_rural.head()

In [None]:
# Subjects from RCT 
df_ghana = pd.read_csv('../data/df_ghana_complete.csv')
df_ghana.head()

In [None]:
# Subjects from CANDOUR Wave II
df_candour = pd.read_csv('../data/df_candour_final.csv')
df_candour.head()

In [None]:
# Subdistrict to spatial unit mapping
with open("../data/subdistrict_to_spatial_unit_dict.pickle", "rb") as f:
    object = pd.read_pickle(f)
    
df_district = pd.DataFrame(object)
df_district.head()

# 2. Calculate average demographic features across the lowest-level geographic space

In [None]:
def calculate_grouped_average(data: pd.DataFrame, groupby: str, features: list) -> pd.DataFrame:
    """
    Calculate the average values of different demographic features across the lowest-level
    geographical space (e.g., spatial unit, district).ipynb_checkpoints/
    
    Parameters:
        data: A dataframe containing different demograp
        groupby: A string indicating the lowest-level geographical space.
        features: A list of features that will be averaged.
    
    Returns:
        grouped_ave: A dataframe containing the averaged values of each demographic
        feature grouped across each lowest-level geographical space. 
    """
    grouped_ave = data.groupby(groupby, as_index=False)[features].mean()
    return grouped_ave


In [None]:
df_ghana_averaged = calculate_grouped_average(
    data=df_ghana,
    groupby='spatial_unit',
    features=['Age','Female','SchoolMiddleSchoolOrGreater','Reported_ITECDC','Reported_ITEHighCash','Reported_ITELowCash','RWI','NPP_%','NDC_%']
)
df_ghana_averaged['spatial_unit'] = df_ghana_averaged['spatial_unit'].apply(shapely.wkt.loads)
df_ghana_averaged.head()

In [None]:
df_candour_averaged = calculate_grouped_average(
    data=df_candour,
    groupby='District',
    features=['Age','Female','SchoolMiddleSchoolOrGreater','Dictator','RWI','NPP_%','NDC_%']
)
df_candour_averaged.head()

In [None]:
df_accra_averaged = calculate_grouped_average(
    data=df_accra,
    groupby='District',
    features=['Age','Female','SchoolMiddleSchoolOrGreater','RWI','NPP_%','NDC_%']
)
df_accra_averaged.head()

In [None]:
df_rural_averaged = calculate_grouped_average(
    data=df_rural,
    groupby='District',
    features=['Age','Female','SchoolMiddleSchoolOrGreater','RWI','NPP_%','NDC_%']
)
df_rural_averaged.head()

# 3. Map Grouped Data to Spatial Units

In [None]:
def map_data_to_spatial_units(spatial_units: pd.DataFrame, grouped_data: pd.DataFrame) -> pd.DataFrame:
    """
    Maps the district of the grouped demographic data with their spatial units.

    Parameters:
        spatial_units: A dataframe containing the mapping between subdistricts and spatial units.
        grouped_data: A dataframe containing the averaged demographic data across different subdistricts.

    Returns:
        grouped_data_with_spatial_units: A dataframe containing the averaged demographic data mapped to
        spatial units.
    """
    grouped_data_with_spatial_units = pd.merge(
        left=spatial_units[['spatial_unit', 'subdistrict']],
        right=grouped_data,
        left_on='subdistrict',
        right_on='District',
        how='left'
    )
    grouped_data_with_spatial_units = grouped_data_with_spatial_units.drop(columns=['subdistrict'])
    grouped_data_with_spatial_units = grouped_data_with_spatial_units[grouped_data_with_spatial_units['Female'].notna()]
    
    return grouped_data_with_spatial_units

In [None]:
df_candour_units = map_data_to_spatial_units(df_district, df_candour_averaged)
df_candour_units.head()

In [None]:
df_accra_units = map_data_to_spatial_units(df_district, df_accra_averaged)
df_accra_units.head()

In [None]:
df_rural_units = map_data_to_spatial_units(df_district, df_rural_averaged)
df_rural_units.head()

# 4. Create Spatial Maps

In [None]:
# run code to generate a map for each variable listed above 
def create_spatial_map(var: list, df: pd.DataFrame, data_source: str, scaled=False, cov_extremes=None, boundary_polygon=False) -> None:
    """
    Generates a spatial map for each demographic variable within the survey data.

    Parameters:
        var: A list of demographic variables to be visualised using the spatial map.
        df: Dataframe containing the survey data
        data_source: A string indicating the data source.
        scaled: Indicates if the scale is map-specific or covariate-specific.
        cov_extremes:
        boundary_polygons: Indicates if district boundaries are included in the map
        
    Returns:
        None
    """
    for v in var: 

        df_i = df[df[v].notna()] # drop all rows that have NaN for the variable 

        # scale is map-specific
        if not scaled:
            mini = round(df_i[v].min(), 3)
            maxi = round(df_i[v].max(), 3)
        else: # scale is covariate-specific
            mini = cov_extremes[v.split("_")[0]][0] # min
            maxi = cov_extremes[v.split("_")[0]][1] # max

        print(v + " Minimum: {}, Maximum: {}".format(mini, maxi))

        # convert float to color
        def float_to_color(val):
            cmap = cm.viridis
            norm = matplotlib.colors.Normalize(vmin=mini, vmax=maxi)
            rgba = cmap(norm(val))
            hex_color = '%02x%02x%02x' % (int(rgba[0]*255), int(rgba[1]*255), int(rgba[2]*255))
            return "#" + hex_color

        # create map
        m = folium.Map(location=[STARTING_LATITUDE, STARTING_LONGITUDE], zoom_start=6.5, tiles='CartoDB positron')
        for _, r in df_i[[v, 'spatial_unit']].iterrows():
            geo_j = gpd.GeoSeries(r['spatial_unit']).to_json()
            fillColor = float_to_color(r[v])
            geo_j = folium.GeoJson(data=geo_j,
                                   style_function=lambda feature, fillColor=fillColor: {'fillColor': fillColor,
                                                                   'fillOpacity': .8,
                                                            "color": "black",
                                                            'weight': 0})
            folium.Popup(html=v + ": " + str(round(r[v], 4))).add_to(geo_j)
            geo_j.add_to(m)
        
        # Add the district boundary to the map (if provided)
        if boundary_polygon:
            for bp in boundary_polygon: 
                geojson_boundary = bp.__geo_interface__
                folium.GeoJson(data=geojson_boundary, name='District Boundary', style_function=lambda x: {
                    'color': 'red',  # Change the color as desired
                    'weight': 2,
                    'opacity': 1,
                }).add_to(m)

        # add color scale to top right
        colormap = cmp.linear.viridis.scale(vmin=mini, vmax=maxi)
        colormap.caption = v
        colormap.add_to(m)

        # save map 
        m.save(outfile= "../maps/{}_".format(data_source) + v + ".html")

In [None]:
# Generate spatial maps for CENSUS data
cols = ['Age','Female','SchoolMiddleSchoolOrGreater','RWI','NPP_%','NDC_%']
create_spatial_map(cols, df_population, "CENSUS")

In [None]:
# Generate spatial maps for GHANA
cols = ['Age','Female','SchoolMiddleSchoolOrGreater','RWI','NPP_%','NDC_%','Reported_ITECDC','Reported_ITEHighCash','Reported_ITELowCash']
create_spatial_map(cols, df_ghana_averaged, "GHANA")

In [None]:
# Generate spatial maps for CANDOUR
cols = ['Age','Female','SchoolMiddleSchoolOrGreater','Dictator','RWI','NPP_%','NDC_%']
create_spatial_map(cols, df_candour_units, "CANDOUR")

In [None]:
# Generate spatial maps for ACCRA
cols = ['Age','Female','SchoolMiddleSchoolOrGreater','RWI','NPP_%','NDC_%']
create_spatial_map(cols, df_accra_units, "ACCRA")

In [None]:
# Generate spatial maps for Rural units
cols = ['Age','Female','SchoolMiddleSchoolOrGreater','RWI','NPP_%','NDC_%']
create_spatial_map(cols, df_rural_units, "RURAL")