# Pedestrian Accessibility of Prague
## Children of age 10-16
### Part 2. Data aquisition and cleaning

### 1. Environment

In [2]:
#!conda install -c conda-forge folium #Uncomment this cell to install folium package if it is not yet installed

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.3.1               |             py_0          25 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    folium-0.10.0              |             py_0          59 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         882 KB

The following NEW packages will be INSTALLED:

    altair:  3.2.0-py36_0 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.10.0-py_0  conda-forge
    vincent: 0.4.4-py_1   conda-forge


Downloading and Extracting Packages
branca-0.3.1         | 25 KB     | ###

In [5]:
#!conda install -c conda-forge geopandas #Uncomment this cell to install geopandas package if it is not yet installed

Solving environment: done

# All requested packages already installed.



In [None]:
from IPython.display import display_html
display_html("<script>Jupyter.notebook.kernel.restart()</script>",raw=True)

In [1]:
#!conda update --all #update all other package. Restart kernel if needed

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    libstdcxx-ng-9.1.0         |       hdf63c60_0         4.0 MB
    boto3-1.9.234              |             py_0          91 KB
    geopandas-0.4.1            |             py_0         891 KB
    libssh2-1.8.2              |       h1ba5d50_0         250 KB
    jupyterlab_server-1.0.6    |             py_0          26 KB
    lazy-object-proxy-1.4.2    |   py36h7b6447c_0          30 KB
    distributed-2.6.0          |             py_0         393 KB
    astropy-3.2.3              |   py36h7b6447c_0         7.2 MB
    opt_einsum-3.1.0           |             py_0          54 KB
    rope-0.14.0                |             py_0         113 KB
    cloudpickle-1.2.2          |             py_0          29 KB
    libgcc-ng-9.1.0            |       hdf6

In [3]:
import pandas as pd
import numpy as np
import geopandas as gpd
import folium as f
from folium import Choropleth
import json
from shapely.geometry import Polygon


In [4]:
# @hidden_cell
storage_creds = {'apikey' : 'rqrlKTO277J6k4N_5X_wpI62WqwslFDxspY7o2Nb6s0A',
                 'iam_serviceid_crn' : 'crn:v1:bluemix:public:cloud-object-storage:global:a/8aa0fa0d4ad4480b8bfdf1c4d79f9442:021a8d33-89af-44aa-b548-e6f14a067d79:bucket:prague-data-set',
                 'auth_ep': 'https://iam.cloud.ibm.com/identity/token',
                 'ep': 'https://s3.private.eu-de.cloud-object-storage.appdomain.cloud',
                 'bucket' : 'prague-data-set'}

In [5]:
import sys
from ibm_botocore.client import Config
import ibm_boto3

def upload_file(credentials,local_file_name,key): 
    storage = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=credentials['apikey'],
    ibm_service_instance_id=credentials['iam_serviceid_crn'],
    ibm_auth_endpoint=credentials['auth_ep'],
    config=Config(signature_version='oauth'),
    endpoint_url=credentials['ep'])
    
    try:
        res=storage.upload_file(Filename=local_file_name, Bucket=credentials['bucket'],Key=key)
    except Exception as e:
        print(Exception, e)
    else:
        print('File {} Uploaded'.format(local_file_name))
        
def download_file(credentials,local_file_name,key):  
    storage = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=credentials['apikey'],
    ibm_service_instance_id=credentials['iam_serviceid_crn'],
    ibm_auth_endpoint=credentials['auth_ep'],
    config=Config(signature_version='oauth'),
    endpoint_url=credentials['ep'])
    try:
        res= storage.download_file(Bucket=credentials['bucket'],Key=key,Filename=local_file_name)
    except Exception as e:
        print(Exception, e)
    else:
        print('File {} Downloaded'.format(local_file_name))

from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer
from bokeh.layouts import column

def roundup(x):
    return x if x % 10 == 0 else x + 10 - x % 10

def draw_map(gdf_data, palette, field, tick_labels, title):
    merged_json = json.loads(gdf_data.to_json())
    json_data = json.dumps(merged_json)
    
    geosource = GeoJSONDataSource(geojson = json_data)
    palette = palette[::-1]
    min_t = int(gdf_data[field].min())
    max_t = int(gdf_data[field].max())
    color_mapper = LinearColorMapper(palette = palette, low = min_t , high = max_t)
    color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8, width = 600, height = 20,
    border_line_color=None, location = (200,0), orientation = 'horizontal', major_label_overrides = tick_labels)
    fig = figure(title = title, plot_height = 600 , plot_width = 800, toolbar_location = None)
    fig.xgrid.grid_line_color = None
    fig.ygrid.grid_line_color = None
    fig.axis.visible = False
    fig.patches('xs','ys', source = geosource,fill_color = {'field' : field, 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)
    fig.add_layout(color_bar, 'above')
    
    return fig

def draw_bar(labels, values, fill_collor, border_color, title):
    fig = figure(x_range=labels, plot_height=250,plot_width = 800, title = title,
           toolbar_location=None, tools="")
    fig.vbar(x=labels, top=values, width=1, fill_color = fill_collor, line_color=border_color)
    fig.xaxis.major_label_orientation = 45
    fig.xgrid.grid_line_color = None
    fig.ygrid.grid_line_color = None
    
    return fig

def read_coordinates_from_str(input_values):
    splitted  = input_values[1:-1].split('],')
    result = []
    for i in splitted:
        i = i.replace('[', '')
        i = i.replace(' ', '')
        i = i.replace(']', '')
        v = i.split(',')
        v1 = float(v[0])
        v2 = float(v[1])
        result.append([v1,v2])
    
    return result

from math import log, cos, sin , pi
sm_a = 6378137.0 
sm_b = 6356752.314

def projLatLonToWorldMercator(lat,lon,isDeg=False):
    lon0 = 0
    if isDeg:
        lat = projDegToRad(lat)
        lon = projDegToRad(lon)

    x = sm_a*(lon-lon0)
    y = sm_a*math.log((math.sin(lat)+1)/math.cos(lat))

    return  x,y 

def projDegToRad(deg):
    return (deg / 180.0 * pi)

def projRadToDeg (rad):
    return (rad / pi * 180.0)

### 2. Dowload data prepared at the Part 1

In [6]:
population_file_name = 'prague_district_population.csv'
download_file(storage_creds, population_file_name, population_file_name)
df_prague_population = pd.read_csv(population_file_name)

poi_file_name = 'prague_poi.csv'
download_file(storage_creds, poi_file_name,poi_file_name)
df_parague_poi = pd.read_csv(poi_file_name).set_index('index')

File prague_district_population.csv Downloaded
File prague_poi.csv Downloaded


In [7]:
df_prague_population.set_index('Name', inplace= True)
df_prague_population.loc[:,'Polygon'] = df_prague_population.loc[:,'Geometry'].apply(lambda x: Polygon(read_coordinates_from_str(x))) 
df_prague_population.drop(columns = ['Geometry'], inplace=True)
df_prague_population.rename(columns ={'Polygon':'Geometry'}, inplace=True)

In [8]:
df_children = df_prague_population[['Kids', 'Kids_per_1000', 'Total', 'Geometry', 'latitude', 'longitude']]
df_children = df_children.sort_values(by=['Kids'], ascending=False)
df_children['Kids_percent'] = df_children['Kids'] * 100 / df_children['Total']
total_kids  = df_children['Kids'].sum(axis = 0, skipna = True) 
df_children['Kids_from_total'] = df_children['Kids'] * 100 / total_kids

df_children_t = df_children.loc[df_children['Kids_from_total']>0.5].sort_values(axis=0,by=['Kids_from_total'], ascending=False)
df_children_t.head()

Unnamed: 0_level_0,Kids,Kids_per_1000,Total,Geometry,latitude,longitude,Kids_percent,Kids_from_total
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
praha 4,13793.0,155.603439,131793.0,"POLYGON ((14.48827570700007 50.04455542200003,...",50.04231,14.44805,10.465654,8.978532
praha 8,12485.0,169.787714,104918.0,"POLYGON ((14.44549218800006 50.11342461800007,...",50.12692,14.45672,11.899769,8.127091
praha 10,12213.0,159.386623,113200.0,"POLYGON ((14.53132108600005 50.07224028800005,...",50.06762,14.46016,10.788869,7.950033
praha 6,11990.0,169.402922,104185.0,"POLYGON ((14.29320690800006 50.07751405400006,...",50.10106,14.39981,11.508375,7.804872
praha 5,10571.0,172.463863,83968.0,"POLYGON ((14.41022472400005 50.04464234000005,...",50.07167,14.40098,12.58932,6.881176


In [9]:
df_parague_poi['Type'] = pd.Categorical(df_parague_poi['Type'])
dfDummies = pd.get_dummies(df_parague_poi['Type'], prefix = 'category')
df_parague_poi = pd.concat([df_parague_poi, dfDummies], axis=1)

In [10]:
df_parague_poi_sum = df_parague_poi.groupby('District_Name').sum()
df_parague_poi_sum.drop(columns=['latitude','longitude','Unnamed: 0'], inplace= True)
df_parague_poi_sum.head()

Unnamed: 0_level_0,category_educatioanal center,category_library,category_playground,category_school,category_sport
District_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
praha 1,3,2,10,35,65
praha 10,3,5,10,61,96
praha 11,0,0,6,0,8
praha 12,0,0,2,0,9
praha 13,0,0,8,0,11


In [11]:
devider = 1000

df_poi_kids = df_parague_poi_sum.join(df_children_t).sort_values(axis=0,by=['Kids_from_total'], ascending=False)
df_poi_kids.dropna(inplace=True)

df_poi_kids['category_library_kids'] = df_poi_kids['category_library']/(df_poi_kids['Kids']/devider)
df_poi_kids['category_playground_kids'] = df_poi_kids['category_playground']/(df_poi_kids['Kids']/devider)
df_poi_kids['category_school_kids'] = df_poi_kids['category_school']/(df_poi_kids['Kids']/devider)
df_poi_kids['category_sport_kids'] = df_poi_kids['category_sport']/(df_poi_kids['Kids']/devider)
df_poi_kids['category_educatioanal center_kids'] = df_poi_kids['category_educatioanal center']/(df_poi_kids['Kids']/devider)
df_poi_kids['category_total'] = df_poi_kids['category_library'] +  df_poi_kids['category_playground'] + df_poi_kids['category_school'] + df_poi_kids['category_educatioanal center']
df_poi_kids['category_total_kids'] = df_poi_kids['category_total']/(df_poi_kids['Kids']/devider)

df_poi_kids.head()

Unnamed: 0_level_0,category_educatioanal center,category_library,category_playground,category_school,category_sport,Kids,Kids_per_1000,Total,Geometry,latitude,longitude,Kids_percent,Kids_from_total,category_library_kids,category_playground_kids,category_school_kids,category_sport_kids,category_educatioanal center_kids,category_total,category_total_kids
District_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
praha 4,8,9,11,106,148,13793.0,155.603439,131793.0,"POLYGON ((14.48827570700007 50.04455542200003,...",50.04231,14.44805,10.465654,8.978532,0.652505,0.797506,7.685058,10.73008,0.580004,134,9.715073
praha 8,5,3,9,45,76,12485.0,169.787714,104918.0,"POLYGON ((14.44549218800006 50.11342461800007,...",50.12692,14.45672,11.899769,8.127091,0.240288,0.720865,3.604325,6.087305,0.400481,62,4.965959
praha 10,3,5,10,61,96,12213.0,159.386623,113200.0,"POLYGON ((14.53132108600005 50.07224028800005,...",50.06762,14.46016,10.788869,7.950033,0.4094,0.8188,4.994678,7.860477,0.24564,79,6.468517
praha 6,4,4,8,45,93,11990.0,169.402922,104185.0,"POLYGON ((14.29320690800006 50.07751405400006,...",50.10106,14.39981,11.508375,7.804872,0.333611,0.667223,3.753128,7.756464,0.333611,61,5.087573
praha 5,3,7,10,68,113,10571.0,172.463863,83968.0,"POLYGON ((14.41022472400005 50.04464234000005,...",50.07167,14.40098,12.58932,6.881176,0.662189,0.945984,6.432693,10.689623,0.283795,88,8.324662


In [12]:
df_poi_kids.reset_index(inplace = True)

### 3. Data visualization
### 3.1 Children population in Prague

In [13]:
poi_gdf = gpd.GeoDataFrame(df_poi_kids, geometry='Geometry')
poi_gdf.crs= {'init':'epsg:4326'} 
poi_gdf.reset_index(inplace = True)

In [14]:
Prague_coordinates = [50.083333, 14.416667]

merged_json = json.loads(poi_gdf.to_json())
json_data = json.dumps(merged_json)

map_prague = f.Map(location=Prague_coordinates, width=800, height=800, zoom_start = 11, max_zoom = 11, min_zoom = 1 ,tiles = 'stamentoner' , prefer_canvas = True)
c = Choropleth(
    name='Children population in Prague',
    geo_data=json_data,
    data = df_poi_kids,
    columns = ['District_Name','Total'],
    fill_color='PuBuGn',
    key_on='feature.properties.District_Name'
)
c.add_to(map_prague)
map_prague

In [15]:
pallete = brewer['PuBuGn'][9]

results, bin_edges = pd.qcut(df_children_t['Kids_per_1000'], labels=False, retbins=True, q=[.3,.4,.5, .6,.7, .8,.9, 1])
map_ticks  = {}
for b in bin_edges:
    rb = roundup(b)
    map_ticks[str(rb)] = str(rb)

map_palette  = pallete[::-1]
fig_map = draw_map(poi_gdf, pallete ,'Kids_per_1000', map_ticks, 'Children per 1000 adults')

labels = list(df_poi_kids['District_Name'])
values = list(df_poi_kids['Kids_from_total'])
fig_bar = draw_bar(labels, values, pallete[7], pallete[6], 'Percents of children from overall children population')

output_notebook()
show(column(fig_map ,fig_bar))


### 3.2 Points of interest

In [16]:
pallete = brewer['PuBuGn'][9]

results, bin_edges = pd.qcut(df_poi_kids['category_total'], labels=False, retbins=True, q=[.3,.4,.5, .6,.7, .8,.9, 1],duplicates ='drop')
map_ticks  = {}
for b in bin_edges:
    rb = roundup(b)
    map_ticks[str(rb)] = str(rb)


fig_map = draw_map(poi_gdf, pallete ,'category_total', map_ticks, 'Total number of POI')

In [17]:
results, bin_edges = pd.qcut(df_poi_kids['category_total_kids'], labels=False, retbins=True, q=[.3,.4,.5, .6,.7, .8,.9, 1],duplicates ='drop')
map_ticks  = {}
for b in bin_edges:
    rb = roundup(b)
    map_ticks[str(rb)] = str(rb)


fig_map_2 = draw_map(poi_gdf, pallete ,'category_total_kids', map_ticks, 'Total number of POI per 1000 children')

In [18]:
values = df_poi_kids[['District_Name', 'category_school_kids']].sort_values('category_school_kids', ascending=False)
labels = list(values['District_Name'])
values = list(values['category_school_kids'])
fig_bar_school = draw_bar(labels, values, pallete[7], pallete[6], 'Schools per 1000')

In [19]:
values = df_poi_kids[['District_Name', 'category_educatioanal center_kids']].sort_values('category_educatioanal center_kids', ascending=False)
labels = list(values['District_Name'])
values = list(values['category_educatioanal center_kids'])
fig_bar_edu = draw_bar(labels, values, pallete[7], pallete[6], 'Educational and hobby centers per 1000')

In [20]:
values = df_poi_kids[['District_Name', 'category_sport_kids']].sort_values('category_sport_kids', ascending=False)
labels = list(values['District_Name'])
values = list(values['category_sport_kids'])
fig_bar_sport = draw_bar(labels, values, pallete[7], pallete[6], 'Sport facilities per 1000')

In [21]:
values = df_poi_kids[['District_Name', 'category_library_kids']].sort_values('category_library_kids', ascending=False)
labels = list(values['District_Name'])
values = list(values['category_library_kids'])
fig_bar_lib = draw_bar(labels, values, pallete[7], pallete[6], 'Libraries per 1000')

In [22]:
values = df_poi_kids[['District_Name', 'category_playground_kids']].sort_values('category_playground_kids', ascending=False)
labels = list(values['District_Name'])
values = list(values['category_playground_kids'])
fig_bar_play = draw_bar(labels, values, pallete[7], pallete[6], 'Outdoor playgrounds 1000')

In [23]:
output_notebook()
show(column(fig_map ,fig_map_2,fig_bar_school, fig_bar_edu, fig_bar_sport, fig_bar_lib, fig_bar_play))