# Prague Pedestrian Accessibility for Children (age 10-16). Part 3
## Modelling

In [None]:
!conda install -c conda-forge basemap

In [None]:
!conda install -c conda-forge pandana

In [None]:
from IPython.display import display_html
display_html("<script>Jupyter.notebook.kernel.restart()</script>",raw=True)

In [1]:
!conda update --all #update all other package. Restart kernel if needed

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    pyrsistent-0.15.4          |   py36h7b6447c_0          92 KB
    pytables-3.4.4             |   py36ha205bf6_0         1.5 MB
    pycurl-7.43.0.2            |   py36hb7f436b_0          60 KB
    nltk-3.4.5                 |           py36_0         2.1 MB
    openssl-1.0.2t             |       h7b6447c_1         3.1 MB
    zipp-0.6.0                 |             py_0           9 KB
    scikit-learn-0.21.3        |   py36hd81dba3_0         6.8 MB
    spyder-kernels-0.5.2       |           py36_0          69 KB
    jinja2-2.10.3              |             py_0          95 KB
    tensorflow-base-2.0.0      |eigen_py36h4ed9498_0        68.9 MB
    lazy-object-proxy-1.4.2    |   py36h7b6447c_0          30 KB
    libspatialindex-1.8.5      |       h

In [1]:
import io,time, os
import pandas as pd
import numpy as np
import pandana
from pandana.loaders import osm
from bokeh.palettes import brewer
%matplotlib inline

ModuleNotFoundError: No module named 'pandana'

In [3]:
# @hidden_cell
storage_creds = {'apikey' : 'rqrlKTO277J6k4N_5X_wpI62WqwslFDxspY7o2Nb6s0A',
                 'iam_serviceid_crn' : 'crn:v1:bluemix:public:cloud-object-storage:global:a/8aa0fa0d4ad4480b8bfdf1c4d79f9442:021a8d33-89af-44aa-b548-e6f14a067d79:bucket:prague-data-set',
                 'auth_ep': 'https://iam.cloud.ibm.com/identity/token',
                 'ep': 'https://s3.private.eu-de.cloud-object-storage.appdomain.cloud',
                 'bucket' : 'prague-data-set'}

In [4]:
import sys
from ibm_botocore.client import Config
import ibm_boto3

def upload_file(credentials,local_file_name,key): 
    storage = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=credentials['apikey'],
    ibm_service_instance_id=credentials['iam_serviceid_crn'],
    ibm_auth_endpoint=credentials['auth_ep'],
    config=Config(signature_version='oauth'),
    endpoint_url=credentials['ep'])
    
    try:
        res=storage.upload_file(Filename=local_file_name, Bucket=credentials['bucket'],Key=key)
    except Exception as e:
        print(Exception, e)
    else:
        print('File {} Uploaded'.format(local_file_name))
        
def download_file(credentials,local_file_name,key):  
    storage = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=credentials['apikey'],
    ibm_service_instance_id=credentials['iam_serviceid_crn'],
    ibm_auth_endpoint=credentials['auth_ep'],
    config=Config(signature_version='oauth'),
    endpoint_url=credentials['ep'])
    try:
        res= storage.download_file(Bucket=credentials['bucket'],Key=key,Filename=local_file_name)
        return True
    except Exception as e:
        print(Exception, e)
        return False
    else:
        print('File {} Downloaded'.format(local_file_name))


ModuleNotFoundError: No module named 'ibm_botocore'

In [5]:
from shapely.geometry import Polygon

def read_coordinates_from_str(input_values):
    splitted  = input_values[1:-1].split('],')
    result = []
    for i in splitted:
        i = i.replace('[', '')
        i = i.replace(' ', '')
        i = i.replace(']', '')
        v = i.split(',')
        v1 = float(v[0])
        v2 = float(v[1])
        result.append([v1,v2])
    
    return result

In [6]:
def get_bounding_box(polygons):
    i = 0
    for poly in polygons:
        x, y = poly.exterior.coords.xy
        c_min_x = min(x)
        c_max_x = max(x)
        c_min_y = min(y)
        c_max_y = max(y)
        if i == 0:
            min_x = c_min_x
            max_x = c_max_x
            min_y = c_min_y
            max_y = c_max_y
        else:
            if c_min_x < min_x:
                min_x = c_min_x
            if c_max_x > max_x:
                max_x = c_max_x
            if c_min_y < min_y:
                min_y = c_min_y
            if c_max_y > max_y:
                max_y = c_max_y    
        i+=1
    
    return (min_x, min_y, max_x, max_y)

In [7]:
#defime constants
districts = ['praha 4']
amenities = ['school','sport']

distance = 2000
num_pois = 5
num_categories = len(amenities) + 1 


cbar_kwargs = {}

bmap_kwargs = {}

bgcolor = '#014636'

# keyword arguments to pass for hex bin plots
hex_plot_kwargs = {'gridsize':40,
                   'alpha':0.9, 
                   'cmap':'PuBuGn_r', 
                   'edgecolor':'none'}

files = {'districts':'prague_district_population.csv', 'poi':'prague_pois.csv'}

Load datasets collected on step 1

In [8]:
poi_file_name = files['poi']
download_file(storage_creds, poi_file_name,poi_file_name)
df_parague_poi = pd.read_csv(poi_file_name)

population_file_name = files['districts']
download_file(storage_creds, population_file_name, population_file_name)
df_prague_population = pd.read_csv(population_file_name)

df_prague_population.loc[:,'Polygon'] = df_prague_population.loc[:,'Geometry'].apply(lambda x: Polygon(read_coordinates_from_str(x)))
df_prague_population.drop(columns = ['Geometry'], inplace=True)
df_prague_population.rename(columns ={'Polygon':'Geometry'}, inplace=True)

## Pedestrian accesabilty map

In [9]:
if len(districts) > 0:
    print('Getting POIs for: {}'.format(','.join([str(x) for x in districts])))
    selected_pois = df_parague_poi.loc[df_parague_poi.District_Name.isin(districts)]
else:
    selected_pois = df_parague_poi

if len(amenities) > 0:
    print('Getting POIs types: {}'.format(','.join([str(x) for x in amenities])))
    selected_pois = selected_pois.loc[selected_pois.Type.isin(amenities)]

Getting POIs for: praha 4
Getting POIs types: school,sport


In [10]:
bbox = get_bounding_box(df_prague_population['Geometry'])
bbox_string = '_'.join([str(x) for x in bbox])
net_filename = 'network_{}.h5'.format(bbox_string)

print('Selected region bounding box is {}'.format(','.join([str(x) for x in bbox])) )

bbox_aspect_ratio = (bbox[2] - bbox[0]) / (bbox[3] - bbox[1])

if download_file(storage_creds,net_filename,net_filename):
    print("Load network form storage")
    network = pandana.network.Network.from_hdf5(net_filename)
else:
    print("Build new network form storage")
    network = osm.pdna_network_from_bbox(bbox[3], bbox[2], bbox[1], bbox[0],network_type='walk')
    print ('Remove low-connectivity nodes and save to h5')
    lcn = network.low_connectivity_nodes(impedance=1000, count=10, imp_name='distance')
    network.save_hdf5(net_filename, rm_nodes=lcn)
    upload_file(storage_creds,net_filename,net_filename)

Selected region bounding box is 14.224437012000067,49.94190007000003,14.706787572000053,50.17742967400005
Build new network form storage
Requesting network data within bounding box from Overpass API in 1 request(s)
Posting to http://www.overpass-api.de/api/interpreter with timeout=180, "{'data': '[out:json][timeout:180];(way["highway"]["highway"!~"motor|proposed|construction|abandoned|platform|raceway"]["foot"!~"no"]["pedestrians"!~"no"](49.94190007,14.22443701,50.17742967,14.70678757);>;);out;'}"
Downloaded 64,544.4KB from www.overpass-api.de in 4.32 seconds
Downloaded OSM network data within bounding box from Overpass API in 1 request(s) and 6.19 seconds
Returning OSM data with 426,565 nodes and 110,645 ways...
Edge node pairs completed. Took 219.23 seconds
Returning processed graph with 140,822 nodes and 204,575 edges...
Completed OSM data download and Pandana node and edge table creation in 238.46 seconds
Remove low-connectivity nodes and save to h5
File network_14.224437012000067_

In [None]:
network.precompute(distance + 1)

In [None]:
network.init_pois(num_categories=num_categories, max_dist=distance, max_pois=num_pois)
shools =  selected_pois[selected_pois.Type == 'school']
network.set_pois(category='school', x_col=shools['longitude'], y_col=shools['latitude'])
sport =  selected_pois[selected_pois.Type == 'sport']
network.set_pois(category='sport', x_col=sport['longitude'], y_col=sport['latitude'])

school_access = network.nearest_pois(distance=distance, category='school', num_pois=num_pois)

print('{:,} nodes'.format(len(school_access)))
school_access.head()

In [None]:
sport_access = network.nearest_pois(distance=distance, category='sport', num_pois=num_pois)
sport_access.head()

In [None]:
df_access = school_access.join(sport_access,lsuffix='_shool', rsuffix='_sport')

In [None]:
df_access.head()

In [None]:
pois_filename = 'pois_{}.csv'.format(bbox_string)
df_access.to_csv(pois_filename)
upload_file(storage_creds, pois_filename, pois_filename)

In [None]:
fig_kwargs = {'facecolor':'w', 
              'figsize':(14, 14 * bbox_aspect_ratio )}

plot_kwargs = {'s':5, 
               'alpha':1, 
               'cmap':'PuBuGn_r', 
               'edgecolor':'none'}

n = 3
bmp, fig, ax = network.plot(school_access[n],plot_kwargs=plot_kwargs,  fig_kwargs=fig_kwargs, bmap_kwargs=bmap_kwargs, cbar_kwargs=cbar_kwargs)

ax.set_title('Walking distance (m) to nearest school around Praha 4', fontsize=15)
fig.savefig('accessibility-schhol-4.png', dpi=200, bbox_inches='tight')
upload_file(storage_creds,'accessibility-schhol-4.png','accessibility-schhol-4.png')

## Clastering

In [None]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score

df_edjes = df_access
sil_coeffs = []

for n in range(10):
    alg = AgglomerativeClustering(n_clusters=n, affinity='euclidean', linkage='ward')
    clu = alg.fit_predict(df_coorimates.values)
    sil_coeff = silhouette_score(df_edjes, clu, metric='euclidean')
    sil_coeffs.append(sil_coeff)
    print("For n_clusters={}, The Silhouette Coefficient is {}".format(n_cluster, sil_coeff))
    


In [None]:
df_coorimates.insert(0, 'Cluster Labels', clu)

In [None]:
df_coorimates.groupby('Cluster Labels').mean()

In [None]:
bmp, fig, ax = network.plot(df_coorimates['Cluster Labels'],plot_kwargs=plot_kwargs,  fig_kwargs=fig_kwargs, bmap_kwargs=bmap_kwargs, cbar_kwargs=cbar_kwargs)

ax.set_title('Clusters', fontsize=15)
fig.savefig('accessibility-schhol-4.png', dpi=200, bbox_inches='tight')
upload_file(storage_creds,'accessibility-schhol-4.png','accessibility-schhol-4.png')