In [2]:
import geopandas as gpd
import pandas as pd
import os
import numpy as np


continents = ['asia','africa','central-america','europe','north-america','oceania','south-america']

for continent in continents:
    df = pd.read_csv('/Users/guilhermeiablonovski/Dropbox (SDSN)/SDG Geospatial Indicators Project/sdg-accessibility/data/final_by_continent/'+continent+'_raw.csv')
    gdf = gpd.GeoDataFrame(df, geometry=gpd.GeoSeries.from_xy(df['lng'], df['lat']), crs=4326)
    
    #TO GO FROM METERS TO MINUTES JUST MULTIPLY BY 0.0125 (WALKING ENGLISHMEN, CONSIDERING 3MPH)
    #1200m - 15min
    #1m - x
    #1min = 1m * 0.0125

    gdf['health_wellbeing'] = gdf['health_wellbeing']*0.0125
    gdf['community_space'] = gdf['community_space']*0.0125
    gdf['education'] = gdf['education']*0.0125
    gdf['food_choices'] = gdf['food_choices']*0.0125
    gdf['nightlife'] = gdf['nightlife']*0.0125
    gdf['mobility'] = gdf['mobility']*0.0125
    gdf['active_living'] = gdf['active_living']*0.0125
    gdf = gdf.fillna(62.5)
    #Mathematical weights
    gdf['pois'] = (gdf['health_wellbeing']*0.1424558/0.8621979 + 
                   gdf['community_space']*0.1324206/0.8621979 + 
                   gdf['education']*0.1525164/0.8621979 + 
                   gdf['food_choices']*0.1494957/0.8621979 + 
                   gdf['mobility']*0.1350267/0.8621979 + 
                   gdf['active_living']*0.1502826/0.8621979)
    
    w_health = gdf.groupby('ID_HDC_G0').apply(lambda x: np.average(x.health_wellbeing, weights=x.POP_2015)).rename("w_health")
    w_community = gdf.groupby('ID_HDC_G0').apply(lambda x: np.average(x.community_space, weights=x.POP_2015)).rename("w_community")
    w_education = gdf.groupby('ID_HDC_G0').apply(lambda x: np.average(x.education, weights=x.POP_2015)).rename("w_education")
    w_food = gdf.groupby('ID_HDC_G0').apply(lambda x: np.average(x.food_choices, weights=x.POP_2015)).rename("w_food")
    w_nightlife = gdf.groupby('ID_HDC_G0').apply(lambda x: np.average(x.nightlife, weights=x.POP_2015)).rename("w_nightlife")
    w_mobility = gdf.groupby('ID_HDC_G0').apply(lambda x: np.average(x.mobility, weights=x.POP_2015)).rename("w_mobility")
    w_active = gdf.groupby('ID_HDC_G0').apply(lambda x: np.average(x.active_living, weights=x.POP_2015)).rename("w_active")

    weighted = pd.concat([w_health, w_community, w_education, w_food, w_nightlife, w_mobility, w_active], axis=1)
    weighted['w_pois'] = (weighted['w_health']*0.1424558/0.8621979 + 
                          weighted['w_community']*0.1324206/0.8621979 + 
                          weighted['w_education']*0.1525164/0.8621979 + 
                          weighted['w_food']*0.1494957/0.8621979 +
                          weighted['w_mobility']*0.1350267/0.8621979 + 
                          weighted['w_active']*0.1502826/0.8621979)
    
    pop2015 = gdf.groupby('ID_HDC_G0').apply(lambda x: np.sum(x.POP_2015)).rename("POP_2015")
    p_community = gdf[gdf['community_space']<=15].groupby('ID_HDC_G0').apply(lambda x: np.sum(x.POP_2015)).rename("p_community")
    p_health = gdf[gdf['health_wellbeing']<=15].groupby('ID_HDC_G0').apply(lambda x: np.sum(x.POP_2015)).rename("p_health")
    p_education = gdf[gdf['education']<=15].groupby('ID_HDC_G0').apply(lambda x: np.sum(x.POP_2015)).rename("p_education")
    p_food = gdf[gdf['food_choices']<=15].groupby('ID_HDC_G0').apply(lambda x: np.sum(x.POP_2015)).rename("p_food")
    p_nightlife = gdf[gdf['nightlife']<=15].groupby('ID_HDC_G0').apply(lambda x: np.sum(x.POP_2015)).rename("p_nightlife")
    p_mobility = gdf[gdf['mobility']<=15].groupby('ID_HDC_G0').apply(lambda x: np.sum(x.POP_2015)).rename("p_mobility")
    p_active = gdf[gdf['active_living']<=15].groupby('ID_HDC_G0').apply(lambda x: np.sum(x.POP_2015)).rename("p_active")
    p_pois = gdf[gdf['pois']<=15].groupby('ID_HDC_G0').apply(lambda x: np.sum(x.POP_2015)).rename("p_pois")

    people = pd.concat([pop2015, p_health, p_community, p_education, p_food, p_nightlife, p_mobility, p_active, p_pois], axis=1)
    people = people.fillna(0)
    
    weighted = weighted.merge(people, on='ID_HDC_G0')
    
    weighted['c_health'] = weighted['p_health']/weighted['POP_2015']
    weighted['c_community'] = weighted['p_community']/weighted['POP_2015']
    weighted['c_education'] = weighted['p_education']/weighted['POP_2015']
    weighted['c_food'] = weighted['p_food']/weighted['POP_2015']
    weighted['c_nightlife'] = weighted['p_nightlife']/weighted['POP_2015']
    weighted['c_mobility'] = weighted['p_mobility']/weighted['POP_2015']
    weighted['c_active'] = weighted['p_active']/weighted['POP_2015']
    weighted['c_pois'] = weighted['p_pois']/weighted['POP_2015']
    
    
    polygons = gpd.read_file('/Users/guilhermeiablonovski/Dropbox (SDSN)/SDG Geospatial Indicators Project/sdg-accessibility/data/GHS_STAT_UCDB2015MT_GLOBE_R2019A/subset-'+continent+'.gpkg')
    #polygons = gpd.read_file('/Users/guilhermeiablonovski/Dropbox (SDSN)/SDG Geospatial Indicators Project/sdg-accessibility/data/GHS_STAT_UCDB2015MT_GLOBE_R2019A/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2_2.gpkg')
    
    cities = polygons.merge(weighted, left_on='ID_HDC_G0', right_index=True)
    cities.geometry = cities.geometry.representative_point()
    cities.drop_duplicates()
    
    cities.to_file('data/final_by_continent/'+continent+'_cities_weightedpoi.gpkg')
    
    


Now for the country index (Not needed, see script 6)

In [14]:
world = gpd.read_file('/Users/guilhermeiablonovski/Dropbox (SDSN)/SDG Geospatial Indicators Project/sdg-accessibility/data/world-administrative-boundaries-sdsn/world-administrative-boundaries-sdsn.shp')


In [15]:
#DON'T
#gdf = gdf.sjoin(world, how="inner")


In [16]:
gdf = gdf.drop(['index_right'], axis=1).sjoin(polygons, how="inner")

In [17]:
gdf = gdf[gdf['AREA']>40]

In [18]:
gdf

Unnamed: 0,field_1,POP_2015,community_space,education,food_choices,health_wellbeing,nightlife,ID_HDC_G0_left,lng,lat,...,EX_SS_P90,EX_SS_P00,EX_SS_P15,EX_EQ19PGA,EX_EQ19MMI,EX_EQ19_Q,EX_HW_IDX,SDG_LUE9015,SDG_A2G14,SDG_OS15MX
61201,0,82,14.3750,14.3750,21.4750,18.6500,15.4000,5329,39.699505,-3.987246,...,0.0,0.0,0.0,0.069613,4.0,available,6.09711,0.71031,0.105604,67.71
61202,1,69,13.4375,13.4375,20.5375,17.7125,14.4625,5329,39.698530,-3.988055,...,0.0,0.0,0.0,0.069613,4.0,available,6.09711,0.71031,0.105604,67.71
61203,2,97,14.3750,14.3750,21.4750,18.6500,15.4000,5329,39.699529,-3.988055,...,0.0,0.0,0.0,0.069613,4.0,available,6.09711,0.71031,0.105604,67.71
61204,3,109,27.0375,25.5625,22.0000,10.3250,25.3250,5329,39.703526,-3.988055,...,0.0,0.0,0.0,0.069613,4.0,available,6.09711,0.71031,0.105604,67.71
61205,4,10,27.0375,25.5625,22.0000,10.3250,25.3250,5329,39.704525,-3.988055,...,0.0,0.0,0.0,0.069613,4.0,available,6.09711,0.71031,0.105604,67.71
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1993146,3551,136,39.2250,34.0375,5.1375,6.5625,47.2125,5685,44.072796,9.521039,...,0.0,0.0,0.0,0.031344,3.0,available,6.85601,1.3033,0.000000,62.96
1993147,3552,172,42.6625,37.4625,3.5500,10.1125,50.6500,5685,44.074809,9.521039,...,0.0,0.0,0.0,0.031344,3.0,available,6.85601,1.3033,0.000000,62.96
1993148,3553,50,34.8750,35.9250,10.7875,3.5625,46.8625,5685,44.068706,9.520226,...,0.0,0.0,0.0,0.031344,3.0,available,6.85601,1.3033,0.000000,62.96
1993149,3554,63,36.1375,35.0625,9.4250,2.2750,48.1250,5685,44.069713,9.520226,...,0.0,0.0,0.0,0.031344,3.0,available,6.85601,1.3033,0.000000,62.96


In [19]:
w_health = gdf.groupby('ISO_CC').apply(lambda x: np.average(x.health_wellbeing, weights=x.POP_2015)).rename("w_health")
w_community = gdf.groupby('ISO_CC').apply(lambda x: np.average(x.community_space, weights=x.POP_2015)).rename("w_community")
w_education = gdf.groupby('ISO_CC').apply(lambda x: np.average(x.education, weights=x.POP_2015)).rename("w_education")
w_food = gdf.groupby('ISO_CC').apply(lambda x: np.average(x.food_choices, weights=x.POP_2015)).rename("w_food")
w_nightlife = gdf.groupby('ISO_CC').apply(lambda x: np.average(x.nightlife, weights=x.POP_2015)).rename("w_nightlife")
w_mobility = gdf.groupby('ISO_CC').apply(lambda x: np.average(x.mobility, weights=x.POP_2015)).rename("w_mobility")
w_active = gdf.groupby('ISO_CC').apply(lambda x: np.average(x.active_living, weights=x.POP_2015)).rename("w_active")

weighted = pd.concat([w_health, w_community, w_education, w_food, w_nightlife, w_mobility, w_active], axis=1)
weighted['w_pois'] = (weighted['w_health'] + weighted['w_community'] + weighted['w_education'] + weighted['w_food'] + weighted['w_mobility'] + weighted['w_active'])/6
weighted


Unnamed: 0_level_0,w_health,w_community,w_education,w_food,w_nightlife,w_mobility,w_active,w_pois
ISO_CC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AGO,19.755746,24.079357,20.761499,18.014189,30.480077,41.438317,33.348926,26.233006
BDI,6.264715,11.730022,10.90575,11.817904,13.252029,49.473806,26.437,19.4382
BEN,11.391471,12.811083,9.262321,11.532299,17.350277,32.205317,29.196902,17.733232
BFA,14.952786,15.382569,9.440019,13.803821,18.665358,11.359876,32.985751,16.320804
BWA,15.392905,12.643455,12.140788,11.781897,19.728207,42.855797,26.243597,20.176406
CAF,12.30133,12.960476,15.949503,27.952722,33.593581,53.828993,43.65405,27.774512
CIV,6.757671,7.41372,6.171476,5.8627,11.036442,8.481874,21.538544,9.370998
CMR,9.618562,11.990729,7.05134,14.631687,19.586456,35.609379,25.443189,17.390814
COD,9.633997,10.687688,9.187893,17.154046,25.768786,44.902894,34.456182,21.003783
COG,11.529302,12.658598,8.102906,11.290993,15.810612,41.013498,28.264124,18.809904


In [20]:
pop2015 = gdf.groupby('ISO_CC').apply(lambda x: np.sum(x.POP_2015)).rename("POP_2015")
p_community = gdf[gdf['community_space']<=15].groupby('ISO_CC').apply(lambda x: np.sum(x.POP_2015)).rename("p_community")
p_health = gdf[gdf['health_wellbeing']<=15].groupby('ISO_CC').apply(lambda x: np.sum(x.POP_2015)).rename("p_health")
p_education = gdf[gdf['education']<=15].groupby('ISO_CC').apply(lambda x: np.sum(x.POP_2015)).rename("p_education")
p_food = gdf[gdf['food_choices']<=15].groupby('ISO_CC').apply(lambda x: np.sum(x.POP_2015)).rename("p_food")
p_nightlife = gdf[gdf['nightlife']<=15].groupby('ISO_CC').apply(lambda x: np.sum(x.POP_2015)).rename("p_nightlife")
p_mobility = gdf[gdf['mobility']<=15].groupby('ISO_CC').apply(lambda x: np.sum(x.POP_2015)).rename("p_mobility")
p_active = gdf[gdf['active_living']<=15].groupby('ISO_CC').apply(lambda x: np.sum(x.POP_2015)).rename("p_active")
p_pois = gdf[gdf['pois']<=15].groupby('ISO_CC').apply(lambda x: np.sum(x.POP_2015)).rename("p_pois")

people = pd.concat([pop2015, p_health, p_community, p_education, p_food, p_nightlife, p_mobility, p_active, p_pois], axis=1)
people = people.fillna(0)
people

Unnamed: 0_level_0,POP_2015,p_health,p_community,p_education,p_food,p_nightlife,p_mobility,p_active,p_pois
ISO_CC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AGO,10848426,5406943,4397784,5238683,6039058,2988040,1164045,2416273,2399349
BDI,1371582,1282258,1020799,1109707,1012328,908085,18585,328551,517397
BEN,2930630,2177669,2038168,2477814,2136494,1662901,759624,775179,1271755
BFA,3829103,2400438,2253941,3084542,2562220,1985196,2753360,851621,1916595
BWA,99046,51794,67150,67813,72332,37947,5944,34674,30785
CAF,667784,473239,451748,374789,187249,100776,4002,39909,86806
CIV,5579610,4988957,4876038,5194382,5098546,4195394,4753724,2601465,4553714
CMR,9765117,7881950,7079598,8774561,6122118,4565563,1717635,3675073,4529123
COD,9293999,7569230,7132049,7664923,5152555,3264835,1012232,1800370,2627947
COG,2897461,2240630,2185026,2526481,2278821,2085735,486184,1044741,1472622


In [21]:
weighted = weighted.merge(people, on='ISO_CC')

In [22]:
weighted['c_health'] = weighted['p_health']/weighted['POP_2015']
weighted['c_community'] = weighted['p_community']/weighted['POP_2015']
weighted['c_education'] = weighted['p_education']/weighted['POP_2015']
weighted['c_food'] = weighted['p_food']/weighted['POP_2015']
weighted['c_nightlife'] = weighted['p_nightlife']/weighted['POP_2015']
weighted['c_mobility'] = weighted['p_mobility']/weighted['POP_2015']
weighted['c_active'] = weighted['p_active']/weighted['POP_2015']
weighted['c_pois'] = weighted['p_pois']/weighted['POP_2015']

In [23]:
countries = world.merge(weighted, left_on='ISO_CC', right_index=True)
countries.to_file('data/final_by_continent/'+continent+'_countries.gpkg')

In [24]:
countries.to_file('data/final_by_continent/'+continent+'_countries.shp')

  """Entry point for launching an IPython kernel.
