In [1]:
import pandas as pd
pd.set_option('mode.chained_assignment','raise')
import geopandas as gpd
import numpy as np
import math
import os
import re
from fastprogress.fastprogress import master_bar, progress_bar



In [2]:
parameters = pd.read_csv('parameter.csv')
parameters = parameters.set_index('variable')
max_travel_t = parameters.loc['max_travel_t','value']
min_travel_t = parameters.loc['min_travel_t','value']

In [3]:
def load_sociodemo(path_geometry,sociodemo):
    '''
    This function loads the sociodemographic data.
    
           Parameters:
                    path_geometry (str): path where the geographic data is stored.
                    sociodemo (pd.DataFrame): sociodemographic data for all zones in the Netherlands.

            Returns:
                    sociodemo (pd.DataFrame): sociodemographic data 
                                              for the city of interest.
    '''
    sociodemo = sociodemo.copy()
    zones = gpd.read_file(path_geometry)

    if 'gemeente' in zones.columns:
        zones = zones.drop(columns = 'gemeente')

    sociodemo = zones.merge(sociodemo, on = 'Postcode')

    sociodemo['N_NL'] = (sociodemo['N_NL'] * sociodemo['area_ratio']).round()
    sociodemo['N_WE'] = (sociodemo['N_WE'] * sociodemo['area_ratio']).round()
    sociodemo['N_NW'] = (sociodemo['N_NW'] * sociodemo['area_ratio']).round()

    sociodemo['pop'] = sociodemo.loc[:,'N_NL'] + sociodemo.loc[:,'N_WE'] + sociodemo.loc[:,'N_NW']
    
    sociodemo['P_NL'] = 0
    sociodemo['P_WE'] = 0
    sociodemo['P_NW'] = 0
    sociodemo.loc[:,'P_NL'] = sociodemo['P_NL'].mask(sociodemo['pop']>0,
                                                     sociodemo['N_NL'] / sociodemo['pop'])
    sociodemo.loc[:,'P_WE'] = sociodemo['P_WE'].mask(sociodemo['pop']>0,
                                                     sociodemo['N_WE'] / sociodemo['pop'])
    sociodemo.loc[:,'P_NW'] = sociodemo['P_NW'].mask(sociodemo['pop']>0,
                                                     sociodemo['N_NW'] / sociodemo['pop'])
    
    return sociodemo

In [4]:
def compute_exposure(sociodemo, shortest_path, min_travel_t):
    '''
    This function computes the exposure to each of the social group per neighborhood.
    
           Parameters:
                    sociodemo (pd.DataFrame): sociodemographic data per zone.
                    shortest_path (pd.DataFrame): walking times from zones to zones.
                    min_travel_time (float): minimum travel time in seconds.
                                             (if the travel time between two zones is lower,
                                              it is set to min_travel_time).

            Returns:
                    zones_loc_env (pd.DataFrame): exposure to each social group per zone.
    '''
    
    sociodemo = sociodemo.copy()
    shortest_path = shortest_path.copy()
    
    # Amsterdam has 2 pieces that we want to look at separately. 
    # However, the shortest paths have the info for both pieces.
    # We filter out the unwanted information.
    shortest_path = shortest_path.loc[shortest_path['from_id_unit'].isin(sociodemo['id_unit'])]
    shortest_path = shortest_path.loc[shortest_path['to_id_unit'].isin(sociodemo['id_unit'])]

    shortest_path.loc[:,'walk_t'] = shortest_path['walk_t'].mask(shortest_path['walk_t'] < min_travel_t, 
                                                                 min_travel_t)
    shortest_path['proximity'] = min_travel_t**2 / shortest_path['walk_t']**2

    # Maybe consider a joint accessibility metric.
    zones_loc_env = pd.merge(sociodemo,
                             shortest_path,
                             left_on = 'id_unit',
                             right_on = 'from_id_unit').drop(columns = ['id_unit',
                                                                         'from_id_unit',
                                                                         'walk_t'])

    zones_loc_env.loc[:,['N_NL','N_WE',
                         'N_NW','pop']] = zones_loc_env[['N_NL','N_WE','N_NW',
                                                         'pop']].multiply(zones_loc_env['proximity'],
                                                                          axis = 'index')

    zones_loc_env = zones_loc_env[['to_id_unit',
                                   'N_NL','N_WE','N_NW','pop']].groupby(by = 'to_id_unit').sum().reset_index()

    zones_loc_env['expos_NW'] = 0
    zones_loc_env.loc[:,'expos_NW'] = zones_loc_env['expos_NW'].mask(zones_loc_env['pop'] > 0,
                                                                     zones_loc_env['N_NW'] / zones_loc_env['pop'])
    zones_loc_env['expos_NL'] = 0
    zones_loc_env.loc[:,'expos_NL'] = zones_loc_env['expos_NL'].mask(zones_loc_env['pop'] > 0,
                                                                     zones_loc_env['N_NL'] / zones_loc_env['pop'])
    zones_loc_env['expos_WE'] = 0
    zones_loc_env.loc[:,'expos_WE'] = zones_loc_env['expos_WE'].mask(zones_loc_env['pop'] > 0,
                                                                     zones_loc_env['N_WE'] / zones_loc_env['pop'])
    
    zones_loc_env = zones_loc_env.rename(columns = {'to_id_unit':'id_unit'})
    
    zones_loc_env = pd.merge(zones_loc_env,
                             sociodemo[['id_unit','pop',
                                        'N_NW','N_NL','N_WE',
                                        'P_NW','P_NL','P_WE']].rename(columns = {'pop':'pop_res',
                                                                                 'N_NW':'N_NW_res',
                                                                                 'N_NL':'N_NL_res',
                                                                                 'N_WE':'N_WE_res',
                                                                                 'P_NW':'P_NW_res',
                                                                                 'P_NL':'P_NL_res',
                                                                                 'P_WE':'P_WE_res'}), 
                             on = 'id_unit', 
                             how = 'left')
    
    return zones_loc_env

In [5]:
dir_sociodemo = '../data/processed_data/sociodemographics/'
dir_geometry_edited = '../data/processed_data/zones_delineation/edited/'
dir_geometry = '../data/processed_data/zones_delineation/'
dir_shortest_path = '../data/processed_data/shortest_path/'
dir_exposure = '../data/results/exposure/'
dir_gemeente = '../data/processed_data/city_boundary/'

parameter = min_travel_t**2

sociodemo_raw = pd.read_csv(dir_sociodemo + 'sociodemographics.csv')

result_per_PC = pd.DataFrame({'id_unit': pd.Series([], dtype='str'),
                              'access_tot': pd.Series([], dtype='float64'),
                              'access_NL': pd.Series([], dtype='float64'),
                              'access_NW': pd.Series([], dtype='float64'),
                              'access_WE': pd.Series([], dtype='float64'),
                              'pop_res': pd.Series([], dtype='float64'),
                              'N_NW_res': pd.Series([], dtype='float64'),
                              'N_NL_res': pd.Series([], dtype='float64'),
                              'N_WE_res': pd.Series([], dtype='float64'),
                              'P_NW_res': pd.Series([], dtype='float64'),
                              'P_NL_res': pd.Series([], dtype='float64'),
                              'P_WE_res': pd.Series([], dtype='float64'),
                              'expos_NL': pd.Series([], dtype='float64'),
                              'expos_WE': pd.Series([], dtype='float64'),
                              'expos_NW': pd.Series([], dtype='float64'),
                              'share_NL_c': pd.Series([], dtype='float64'),
                              'share_WE_c': pd.Series([], dtype='float64'),
                              'share_NW_c': pd.Series([], dtype='float64'),
                            #   'expos_NL_c': pd.Series([], dtype='float64'),
                            #   'expos_WE_c': pd.Series([], dtype='float64'),
                            #   'expos_NW_c': pd.Series([], dtype='float64'), 
                            #   'expos_NL_c_alt': pd.Series([], dtype='float64'),
                            #   'expos_WE_c_alt': pd.Series([], dtype='float64'),
                            #   'expos_NW_c_alt': pd.Series([], dtype='float64'), 
                              'city': pd.Series([], dtype='str')})


list_files = os.listdir(dir_gemeente)
list_files.remove('README_city_boundary.mkd')

pb = progress_bar(range(len(list_files)))

for i in pb:
    file = list_files[i]
    city = file[:-5]
    pb.comment = city

    # Amsterdam has two pieces.
    if city == 'Amsterdam':
        sociodemo_1 = load_sociodemo(dir_geometry_edited + 'PC_' + city + '_1.gpkg', sociodemo_raw)
        sociodemo_2 = load_sociodemo(dir_geometry_edited + 'PC_' + city + '_2.gpkg', sociodemo_raw)
        sociodemo = pd.concat([sociodemo_1, sociodemo_2], ignore_index=True)
        shortest_path = pd.read_csv(dir_shortest_path + city + '_walk_t_unit_to_unit.csv')

        city_mix = sociodemo.loc[:,['N_NL','N_WE','N_NW']].sum() / sociodemo['pop'].sum()

        result_1 = compute_exposure(sociodemo_1,shortest_path,min_travel_t)
        result_2 = compute_exposure(sociodemo_2,shortest_path,min_travel_t)
        result_1 = result_1.assign(share_NL_c = city_mix['N_NL'])
        result_1 = result_1.assign(share_WE_c = city_mix['N_WE'])
        result_1 = result_1.assign(share_NW_c = city_mix['N_NW'])
        result_2 = result_2.assign(share_NL_c = city_mix['N_NL'])
        result_2 = result_2.assign(share_WE_c = city_mix['N_WE'])
        result_2 = result_2.assign(share_NW_c = city_mix['N_NW'])
        result_1['city'] = city
        result_2['city'] = city
        result_1 = result_1.rename(columns = {'pop':'access_tot',
                                              'N_NW':'access_NW',
                                              'N_NL':'access_NL',
                                              'N_WE':'access_WE'})
        result_2 = result_2.rename(columns = {'pop':'access_tot',
                                              'N_NW':'access_NW',
                                              'N_NL':'access_NL',
                                              'N_WE':'access_WE'})

        result = pd.concat([result_1,result_2], ignore_index=True)
        # city_av_expos = result.loc[:,['access_NW','access_NL','access_WE']].sum() / result['access_tot'].sum()
        # city_av_expos_alt = (result.loc[:,['expos_NW',
        #                                    'expos_NL',
        #                                    'expos_WE']].multiply(result['pop_res'], axis = 'index')).sum() / result['pop_res'].sum()

        # result_1 = result_1.assign(expos_NL_c = city_av_expos['access_NL'])
        # result_1 = result_1.assign(expos_WE_c = city_av_expos['access_WE'])
        # result_1 = result_1.assign(expos_NW_c = city_av_expos['access_NW'])
        # result_2 = result_2.assign(expos_NL_c = city_av_expos['access_NL'])
        # result_2 = result_2.assign(expos_WE_c = city_av_expos['access_WE'])
        # result_2 = result_2.assign(expos_NW_c = city_av_expos['access_NW'])

        # result_1 = result_1.assign(expos_NL_c_alt = city_av_expos_alt['expos_NL'])
        # result_1 = result_1.assign(expos_WE_c_alt = city_av_expos_alt['expos_WE'])
        # result_1 = result_1.assign(expos_NW_c_alt = city_av_expos_alt['expos_NW'])
        # result_2 = result_2.assign(expos_NL_c_alt = city_av_expos_alt['expos_NL'])
        # result_2 = result_2.assign(expos_WE_c_alt = city_av_expos_alt['expos_WE'])
        # result_2 = result_2.assign(expos_NW_c_alt = city_av_expos_alt['expos_NW'])

        result_1.to_csv(dir_exposure + city + '_exposure_1.csv', index = False)
        result_2.to_csv(dir_exposure + city + '_exposure_2.csv', index = False)
        result_per_PC = pd.concat([result_per_PC,result_1,result_2], ignore_index=True)

    elif os.path.isfile(dir_geometry_edited + 'PC_' + file):
        sociodemo = load_sociodemo(dir_geometry_edited + 'PC_' + file, sociodemo_raw)
        shortest_path = pd.read_csv(dir_shortest_path + city + '_walk_t_unit_to_unit.csv')
        city_mix = sociodemo.loc[:,['N_NL','N_WE','N_NW']].sum() / sociodemo['pop'].sum()

        result = compute_exposure(sociodemo,shortest_path,min_travel_t)
        result = result.assign(share_NL_c = city_mix['N_NL'])
        result = result.assign(share_WE_c = city_mix['N_WE'])
        result = result.assign(share_NW_c = city_mix['N_NW'])

        result['city'] = city
        
        result = result.rename(columns = {'pop':'access_tot',
                                          'N_NW':'access_NW',
                                          'N_NL':'access_NL',
                                          'N_WE':'access_WE'})

        # city_av_expos = result.loc[:,['access_NW','access_NL','access_WE']].sum() / result['access_tot'].sum()
        # city_av_expos_alt = (result.loc[:,['expos_NW',
        #                                    'expos_NL',
        #                                    'expos_WE']].multiply(result['pop_res'], axis = 'index')).sum() / result['pop_res'].sum()

        # result = result.assign(expos_NL_c = city_av_expos['access_NL'])
        # result = result.assign(expos_WE_c = city_av_expos['access_WE'])
        # result = result.assign(expos_NW_c = city_av_expos['access_NW'])
        # result = result.assign(expos_NL_c_alt = city_av_expos_alt['expos_NL'])
        # result = result.assign(expos_WE_c_alt = city_av_expos_alt['expos_WE'])
        # result = result.assign(expos_NW_c_alt = city_av_expos_alt['expos_NW'])

        result.to_csv(dir_exposure + city + '_exposure.csv', index = False)
        result_per_PC = pd.concat([result_per_PC, result], ignore_index=True)
        
    elif os.path.isfile(dir_geometry + 'PC_' + file):
        sociodemo = load_sociodemo(dir_geometry + 'PC_' + file, sociodemo_raw)
        shortest_path = pd.read_csv(dir_shortest_path + city + '_walk_t_unit_to_unit.csv')
        city_mix = sociodemo.loc[:,['N_NL','N_WE','N_NW']].sum() / sociodemo['pop'].sum()

        result = compute_exposure(sociodemo,shortest_path,min_travel_t)
        result = result.assign(share_NL_c = city_mix['N_NL'])
        result = result.assign(share_WE_c = city_mix['N_WE'])
        result = result.assign(share_NW_c = city_mix['N_NW'])

        result['city'] = city
        result = result.rename(columns = {'pop':'access_tot',
                                          'N_NW':'access_NW',
                                          'N_NL':'access_NL',
                                          'N_WE':'access_WE'})

        # city_av_expos = result.loc[:,['access_NW','access_NL','access_WE']].sum() / result['access_tot'].sum()
        # city_av_expos_alt = (result.loc[:,['expos_NW',
        #                                    'expos_NL',
        #                                    'expos_WE']].multiply(result['pop_res'], axis = 'index')).sum() / result['pop_res'].sum()
        # result = result.assign(expos_NL_c = city_av_expos['access_NL'])
        # result = result.assign(expos_WE_c = city_av_expos['access_WE'])
        # result = result.assign(expos_NW_c = city_av_expos['access_NW'])
        # result = result.assign(expos_NL_c_alt = city_av_expos_alt['expos_NL'])
        # result = result.assign(expos_WE_c_alt = city_av_expos_alt['expos_WE'])
        # result = result.assign(expos_NW_c_alt = city_av_expos_alt['expos_NW'])
        
        if not os.path.isfile(dir_exposure + city + '_exposure.csv'):
            result.to_csv(dir_exposure + city + '_exposure.csv', index = False)
        result_per_PC = pd.concat([result_per_PC, result], ignore_index = True)
        

        
result_per_PC.to_csv(dir_exposure + 'exposure_by_spatial_unit.csv', index = False)