# Prague Pedestrian Accessibility for Children (age 10-16)

Importing all nessecary libraries

## Preparing the envieroment 

In [7]:
import io
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import json
import urllib.request
import requests

Some operation for preparing and cleanning data can consume a lot of computation resourses and time. For optimization purpose we will upload our prepared datasets to IBM Cloud Storage. Further in "Data analysis" and "Modeling" sections will will use this uploaded data. 

In [5]:
# @hidden_cell
storage_creds = {'apikey' : 'rqrlKTO277J6k4N_5X_wpI62WqwslFDxspY7o2Nb6s0A',
                 'iam_serviceid_crn' : 'crn:v1:bluemix:public:cloud-object-storage:global:a/8aa0fa0d4ad4480b8bfdf1c4d79f9442:021a8d33-89af-44aa-b548-e6f14a067d79:bucket:prague-data-set',
                 'auth_ep': 'https://iam.cloud.ibm.com/identity/token',
                 'ep': 'https://s3.private.eu-de.cloud-object-storage.appdomain.cloud',
                 'bucket' : 'prague-data-set'}

Define upload and download functions.

In [6]:
import sys
from ibm_botocore.client import Config
import ibm_boto3

def upload_file(credentials,local_file_name,key): 
    storage = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=credentials['apikey'],
    ibm_service_instance_id=credentials['iam_serviceid_crn'],
    ibm_auth_endpoint=credentials['auth_ep'],
    config=Config(signature_version='oauth'),
    endpoint_url=credentials['ep'])
    
    try:
        res=storage.upload_file(Filename=local_file_name, Bucket=credentials['bucket'],Key=key)
    except Exception as e:
        print(Exception, e)
    else:
        print('File {} Uploaded'.format(local_file_name))
        
def download_file(credentials,local_file_name,key):  
    storage = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=credentials['apikey'],
    ibm_service_instance_id=credentials['iam_serviceid_crn'],
    ibm_auth_endpoint=credentials['auth_ep'],
    config=Config(signature_version='oauth'),
    endpoint_url=credentials['ep'])
    try:
        res= storage.download_file(Bucket=credentials['bucket'],Key=key,Filename=local_file_name)
    except Exception as e:
        print(Exception, e)
    else:
        print('File {} Downloaded'.format(local_file_name))

For building network toology will need geo coordinates of collected point of interests (POI). For retriving coordinates we will use geocoder package with Arcgis provider.

In [4]:
#!conda install -c conda-forge geocoder #Uncomment this cell to install geocoder package if it is not yet installed

Define coordinates retrivial function. As an input parameters it takes DataFrame and name of row with address string

In [5]:
import geocoder

def get_coordinates(dataFrame, index_row):
    dict_coordinates = {}
    total_count = len(dataFrame.index)
    current = 0
    errors = 0
    for index, row in dataFrame.iterrows():
        try:
            g = geocoder.arcgis(row[index_row])
            lat = g.json['lat']
            lng = g.json['lng']
            dict_coordinates[index] = [lat, lng]
            current+=1
            print('Completed {} of {}'.format(current, total_count))
        except:
            errors+=1
            print ('Failed to get coordinates for {}: {}'.format(index_row, sys.exc_info()[0]))
    
    dataFrame['latitude'] = 0.0
    dataFrame['longitude'] = 0.0
    
    for k, v in dict_coordinates.items():
        dataFrame.loc[k,'latitude']=v[0]
        dataFrame.loc[k,'longitude']=v[1]
        
    print('Done: Total: {} Success: {} Error {}'.format(total_count, current, errors))

## Data acquisition and cleaning

As main data source I sellected <a>http://opendata.praha.eu</a>.  This is a big database of data of different types amd it contains data from different fileds: transport, society, ecology, population e.t.c  
This data sets were mainly colleted and structured by Prague Institute of Planning and Development <a>www.iprpraha.cz</a> For my project I mainly intrested in:
* Shape and location of Prague administrative districs
* Districts population
* Geo location different types of social infrastructure 

### Districts borders and population.

First step is to determine the shape and location of the administrative disctrics of Prague.

In [6]:
mestky_casty_url = 'http://opendata.iprpraha.cz/CUR/DTMP/TMMESTSKECASTI_P/WGS_84/TMMESTSKECASTI_P.json'
results = requests.get(mestky_casty_url).json(encoding = "utf8")
mestky_casty = json_normalize(results['features']) 
mestky_casty.head()

Unnamed: 0,geometry.coordinates,geometry.type,properties.DAT_VZNIK,properties.DAT_ZMENA,properties.ID,properties.ID_POSKYT,properties.KOD_MC,properties.KOD_MO,properties.KOD_SO,properties.NAZEV_1,properties.NAZEV_MC,properties.OBJECTID,properties.PLOCHA,properties.POSKYT,properties.STAV_ZMENA,properties.Shape_Area,properties.Shape_Length,properties.TID_TMMESTSKECASTI_P,type
0,"[[[14.533725418000074, 50.16223134300003], [14...",Polygon,20181106141412,20190423111436,25,43,547310,94,221,Čakovice,Praha-Čakovice,1,10183715.88,HMP-IPR,U,10183720.0,0.213162,25,Feature
1,"[[[14.293206908000059, 50.07751405400006], [14...",Polygon,20181106141412,20181106164427,52,43,547174,60,213,Praha 17,Praha 17,2,3253142.41,HMP-IPR,U,3253142.0,0.095029,52,Feature
2,"[[[14.483934895000061, 49.99241857800007], [14...",Polygon,20181009145125,20190821104230,19,43,547051,43,124,Libuš,Praha-Libuš,3,5234736.54,HMP-IPR,U,5234737.0,0.200404,19,Feature
3,"[[[14.506905018000054, 50.17143575600005], [14...",Polygon,20170817145228,20170818091113,35,43,538124,86,86,Březiněves,Praha-Březiněves,4,3380681.9,HMP-IPR,U,3380682.0,0.127235,35,Feature
4,"[[[14.43852135000003, 50.06691477800007], [14....",Polygon,20180910110223,20180910113234,30,43,500089,27,27,Praha 2,Praha 2,5,4184937.95,HMP-IPR,U,4184938.0,0.134652,30,Feature


In [7]:
mestky_casty.shape

(57, 19)

In [8]:
geo_unique = np.array(mestky_casty['properties.NAZEV_MC'].apply(lambda x: x.lower()).unique())
geo_unique

array(['praha-čakovice', 'praha 17', 'praha-libuš', 'praha-březiněves',
       'praha 2', 'praha 1', 'praha 11', 'praha-zbraslav', 'praha 15',
       'praha 4', 'praha 5', 'praha 20', 'praha-dolní měcholupy',
       'praha 6', 'praha 9', 'praha 10', 'praha 14', 'praha 12',
       'praha-kolovraty', 'praha-újezd', 'praha 13', 'praha-řeporyje',
       'praha-suchdol', 'praha-ďáblice', 'praha-šeberov',
       'praha-dolní chabry', 'praha 19', 'praha-koloděje',
       'praha-satalice', 'praha-petrovice', 'praha 3',
       'praha-velká chuchle', 'praha-dolní počernice',
       'praha-přední kopanina', 'praha-královice', 'praha-kunratice',
       'praha-slivenec', 'praha-vinoř', 'praha-lochkov', 'praha-nebušice',
       'praha-benice', 'praha 18', 'praha-křeslice', 'praha-troja',
       'praha 7', 'praha-nedvězí', 'praha 21', 'praha-běchovice',
       'praha-štěrboholy', 'praha-dubeč', 'praha-lysolaje',
       'praha-lipence', 'praha 8', 'praha 22', 'praha-zličín', 'praha 16',
       'praha-

Districts population

In [9]:
url_population =  'https://www.czso.cz/documents/10180/25233177/sldb_zv.csv'
df_population = pd.read_csv(url_population,encoding = "ISO 8859-2")
df_population.head()

Unnamed: 0,typuz_naz,nazev,uzcis,uzkod,u01,u02,u03,u04,u05,u06,u07,u08,u09,u10,u11
0,kraj,Hlavní město Praha,100,3018,1268796.0,613738.0,655058.0,153622.0,908321.0,201029.0,644643.0,600730.0,92927.0,542168.0,579509.0
1,kraj,Středočeský kraj,100,3026,1289211.0,637252.0,651959.0,199300.0,895024.0,190911.0,639851.0,587539.0,286780.0,482860.0,523045.0
2,kraj,Jihočeský kraj,100,3034,628336.0,308296.0,320040.0,91119.0,435187.0,100000.0,307130.0,280844.0,123048.0,247608.0,262692.0
3,kraj,Plzeňský kraj,100,3042,570401.0,282137.0,288264.0,79469.0,396468.0,92734.0,278674.0,255278.0,105835.0,226298.0,242397.0
4,kraj,Karlovarský kraj,100,3051,295595.0,145483.0,150112.0,42159.0,207480.0,44538.0,139871.0,123100.0,39845.0,119403.0,128904.0


Cleaning population data set. For my project only make sence data 

In [10]:
df_population = df_population[(df_population.uzcis == 44)& (df_population.nazev.str.find('Praha') != -1)][['nazev','u01','u04', 'u05', 'u06']]
df_population.rename(columns={'nazev':'Name','u01':'Total', 'u04':'Kids', 'u05':'Middle', 'u06':'Senior'}, inplace = True)
df_population['Name'] = df_population['Name'].map(lambda x: x.lower())
df_population.shape

(57, 5)

Quick analizing of distrcit population

In [11]:
population_unique = df_population['Name'].unique()
population_unique

array(['praha 1', 'praha 2', 'praha 3', 'praha 4', 'praha 5', 'praha 6',
       'praha 7', 'praha 8', 'praha 9', 'praha 10', 'praha-běchovice',
       'praha-benice', 'praha-březiněves', 'praha-dolní počernice',
       'praha-dubeč', 'praha 20', 'praha-klánovice', 'praha-koloděje',
       'praha-kolovraty', 'praha-královice', 'praha-křeslice',
       'praha-nedvězí', 'praha-satalice', 'praha 22', 'praha 21',
       'praha-vinoř', 'praha-lipence', 'praha-lochkov',
       'praha-přední kopanina', 'praha 16', 'praha-řeporyje',
       'praha-slivenec', 'praha 13', 'praha-\x8aeberov', 'praha-újezd',
       'praha-zbraslav', 'praha-zličín', 'praha 11', 'praha-kunratice',
       'praha-libu\x9a', 'praha 12', 'praha-velká chuchle',
       'praha-lysolaje', 'praha-nebu\x9aice', 'praha 17', 'praha-suchdol',
       'praha-ďáblice', 'praha-dolní chabry', 'praha-čakovice',
       'praha-troja', 'praha 19', 'praha 14', 'praha-dolní měcholupy',
       'praha 15', 'praha-petrovice', 'praha-\x8atěrboho

Checking the difference between to datasets

In [12]:
districts_diff_geo = list(set(geo_unique)-set(population_unique))
districts_diff_geo

['praha-štěrboholy', 'praha-libuš', 'praha-nebušice', 'praha-šeberov']

In [13]:
districts_diff_pop = list(set(population_unique)-set(geo_unique))
districts_diff_pop

['praha-libu\x9a',
 'praha-\x8aeberov',
 'praha-nebu\x9aice',
 'praha-\x8atěrboholy']

Population data set has encoding errors. Lets fix it

In [14]:
df_population.loc[df_population.Name == 'praha-libu\x9a', 'Name'] = 'praha-libuš'
df_population.loc[df_population.Name == 'praha-\x8aeberov', 'Name'] = 'praha-šeberov'
df_population.loc[df_population.Name == 'praha-nebu\x9aice', 'Name'] = 'praha-nebušice'
df_population.loc[df_population.Name == 'praha-\x8atěrboholy', 'Name'] = 'praha-štěrboholy'

In [15]:
population_unique = df_population['Name'].unique()
districts_diff_pop = list(set(population_unique)-set(geo_unique))
print('Data set is equal: {}'.format(len(districts_diff_pop)==0)) 

Data set is equal: True


Districts dataset and population dataset have the same districs. Lets make a join of two data sets

In [16]:
result = []

result.append([
    v['properties']['NAZEV_MC'].lower(),
    v['geometry']['coordinates'][0],
    v['properties']['PLOCHA']] for v in results['features'])
    
df_prague_districts = pd.DataFrame([item for result in result for item in result])
df_prague_districts.columns = ['Name', 'Geometry', 'Area']

In [17]:
df_prague = df_prague_districts.set_index('Name').join(df_population.set_index('Name'))
quotient = df_prague['Middle']/1000
df_prague['Kids_per_1000'] = df_prague['Kids']/quotient
df_prague.sort_values('Name', inplace = True)
df_prague.reset_index(inplace=True)

In [18]:
df_prague.shape

(57, 8)

In [19]:
get_coordinates(df_prague, 'Name')

Completed 1 of 57
Completed 2 of 57
Completed 3 of 57
Completed 4 of 57
Completed 5 of 57
Completed 6 of 57
Completed 7 of 57
Completed 8 of 57
Completed 9 of 57
Completed 10 of 57
Completed 11 of 57
Completed 12 of 57
Completed 13 of 57
Completed 14 of 57
Completed 15 of 57
Completed 16 of 57
Completed 17 of 57
Completed 18 of 57
Completed 19 of 57
Completed 20 of 57
Completed 21 of 57
Completed 22 of 57
Completed 23 of 57
Completed 24 of 57
Completed 25 of 57
Completed 26 of 57
Completed 27 of 57
Completed 28 of 57
Completed 29 of 57
Completed 30 of 57
Completed 31 of 57
Completed 32 of 57
Completed 33 of 57
Completed 34 of 57
Completed 35 of 57
Completed 36 of 57
Completed 37 of 57
Completed 38 of 57
Completed 39 of 57
Completed 40 of 57
Completed 41 of 57
Completed 42 of 57
Completed 43 of 57
Completed 44 of 57
Completed 45 of 57
Completed 46 of 57
Completed 47 of 57
Completed 48 of 57
Completed 49 of 57
Completed 50 of 57
Completed 51 of 57
Completed 52 of 57
Completed 53 of 57
Co

In [20]:
df_prague.head()

Unnamed: 0,Name,Geometry,Area,Total,Kids,Middle,Senior,Kids_per_1000,latitude,longitude
0,praha 1,"[[14.410891049000043, 50.078674687000046], [14...",5538443.86,30561.0,2391.0,22963.0,4594.0,104.124026,50.08728,14.41742
1,praha 10,"[[14.531321086000048, 50.072240288000046], [14...",18599366.98,113200.0,12213.0,76625.0,23937.0,159.386623,50.06762,14.46016
2,praha 11,"[[14.54355294800007, 50.03618763800006], [14.5...",9793679.84,75741.0,8688.0,54983.0,11816.0,158.012477,50.03178,14.50719
3,praha 12,"[[14.450632163000023, 50.01452735600003], [14....",23317909.06,53515.0,6156.0,39699.0,7480.0,155.066878,50.00564,14.40462
4,praha 13,"[[14.320621949000042, 50.04010680700003], [14....",13196802.19,59906.0,7985.0,46514.0,5109.0,171.668745,50.05163,14.34231


Saving data set to stroage for later use

In [21]:
file_name = 'prague_district_population.csv'
df_prague.to_csv(file_name)
upload_file(storage_creds,file_name,file_name)

File prague_district_population.csv Uploaded


Explore children popupualtion in Prague

### Points of interest

#### Playgrounds
Data from Hřiště Praha 2014 - 2016 <a>http://www.hristepraha.cz</a> Last update 19.01.2018

In [22]:
url_playgrounds = 'http://opendata.praha.eu/dataset/3c3ca9ca-fbc0-4f97-b624-ed967f5d9a24/resource/e19c2e29-5e33-4449-8847-5dc8f5b8a2f2/download/db144c03-1a0f-456f-a32b-9c48ccfc0813-playgrounds.json'
results = requests.get(url_playgrounds).json(encoding = "utf8")
df_playgrounds = json_normalize(results['features']) 
df_playgrounds.head()

Unnamed: 0,geometry.coordinates,geometry.type,properties.address,properties.content,properties.district,properties.id,properties.image.url,properties.name,properties.perex,properties.properties,properties.url,type
0,"[14.56323719, 50.038024902]",Point,"Rezlerova 278, 109 00 Praha-Praha-Petrovice, Č...",Za panelovým domem v Rezlerově ulici se rozklá...,praha-petrovice,101,http://www.hristepraha.cz/images/img/41f5da50e...,Sídliště Petrovice - Rezlerova,"Lokalita nabízí několik pěkných menších hřišť,...",[],http://www.hristepraha.cz/hriste/mapa/sidliste...,Feature
1,"[14.438850403, 50.13401413]",Point,"Skálova 545/24, Čimice, 181 00 Praha-Praha 8, ...","Větší hřiště se rozkládá, mezi ulicemi Toruňsk...",praha-8,43,http://www.hristepraha.cz/images/img/b07bef69a...,Bohnice a Čimice - Čimice,Nedaleko od sebe leží 2 pěkná hřiště.,[],http://www.hristepraha.cz/hriste/mapa/bohnice-...,Feature
2,"[14.476410866, 50.094387054]",Point,"Za Žižkovskou vozovnou 2716/19, Žižkov, 130 00...",Dětské hřiště (60.A) najdete na konci ulice St...,praha-3,100,http://www.hristepraha.cz/images/img/a0cad32d8...,Na Krejcárku - hřiště 60.B,Lokalita se skvěle hodí pro rodiny s dětmi růz...,[],http://www.hristepraha.cz/hriste/mapa/na-krejc...,Feature
3,"[14.593131065, 50.036453247]",Point,"V Bytovkách 754/30, Uhříněves, 104 00 Praha-Pr...","Cestu doporučujeme zahájit na Novém náměstí, k...",praha-22,131,http://www.hristepraha.cz/images/img/3063cb73f...,Uhříněves - hřiště 82.B,"Hřiště, lesopark a další zajímavá místa, to je...",[],http://www.hristepraha.cz/hriste/mapa/uhrineve...,Feature
4,"[14.539891243, 50.043731689]",Point,"U Břehu 1111, Hostivař, 102 00 Praha-Praha 15,...",Popis: Asi 300 m od prodejny Lidl v Hornoměcho...,praha-15,72,http://www.hristepraha.cz/images/img/2d73f6832...,Hostivařský lesopark (východní část) - hřiště ...,Trasa je vhodným polodenním rodinným výletem.,[],http://www.hristepraha.cz/hriste/mapa/hostivar...,Feature


In [23]:
poi_type = 'playground'
result = []

result.append([
    poi_type,
    v['properties']['district'].lower(),
    v['geometry']['coordinates'][0],
    v['geometry']['coordinates'][1]] for v in results['features'])
    
df_prague_poi = pd.DataFrame([item for result in result for item in result])
df_prague_poi.columns = ['Type', 'District_Name', 'latitude','longitude']
df_prague_poi.head()

Unnamed: 0,Type,District_Name,latitude,longitude
0,playground,praha-petrovice,14.563237,50.038025
1,playground,praha-8,14.43885,50.134014
2,playground,praha-3,14.476411,50.094387
3,playground,praha-22,14.593131,50.036453
4,playground,praha-15,14.539891,50.043732


In [24]:
df_prague_poi.shape

(145, 4)

In [25]:
indeces=df_prague_poi.loc[df_prague_poi.District_Name.str.contains('[-][0-9]', regex=True)].index
df_prague_poi.loc[indeces, 'District_Name'] = df_prague_poi.loc[indeces, 'District_Name'].str.replace('-', ' ')
df_prague_poi.head()

Unnamed: 0,Type,District_Name,latitude,longitude
0,playground,praha-petrovice,14.563237,50.038025
1,playground,praha 8,14.43885,50.134014
2,playground,praha 3,14.476411,50.094387
3,playground,praha 22,14.593131,50.036453
4,playground,praha 15,14.539891,50.043732


### Sport facilities
Magistrát hl. m. Prahy 	1. duben 2019, 0:00 (UTC+02:00)

In [26]:
url_sport = 'http://opendata.praha.eu/datastore/dump/5d1ee13f-f6e9-4ee9-a1bd-48d5ca2bb867?format=json'
results = requests.get(url_sport).json(encoding = "utf8")
result = []

result.append([
    'sport',
    'praha {}'.format(v[6]),
     v[2]] for v in results['records'])
    
df_sport = pd.DataFrame([item for result in result for item in result])
df_sport.columns = ['Type', 'District_Name', 'Address']
df_sport.head()

Unnamed: 0,Type,District_Name,Address
0,sport,praha 5,"Butovická 837/41, Praha 5"
1,sport,praha 5,"Zahradníčkova, Praha 5, 150 00"
2,sport,praha 1,"Senovážné náměstí 6, Praha 1, 110 00"
3,sport,praha 12,"Zelenkova 3/530, Praha 12, 142 00"
4,sport,praha 7,"Štvanice 38, Praha 7, 170 00"


In [27]:
df_sport.isnull().values.any()

False

In [28]:
df_sport.District_Name.unique()

array(['praha 5', 'praha 1', 'praha 12', 'praha 7', 'praha 4', 'praha 9',
       'praha 8', 'praha 6', 'praha 11', 'praha 10', 'praha 3', 'praha 2',
       'praha 15', 'praha 22', 'praha 13', 'praha 16', 'praha 17',
       'praha 18', 'praha 20', 'praha 14', 'praha 21', 'praha 19'],
      dtype=object)

In [29]:
get_coordinates(df_sport, 'Address')
df_sport.head()    

Completed 1 of 877
Completed 2 of 877
Completed 3 of 877
Completed 4 of 877
Completed 5 of 877
Completed 6 of 877
Completed 7 of 877
Completed 8 of 877
Completed 9 of 877
Completed 10 of 877
Completed 11 of 877
Completed 12 of 877
Completed 13 of 877
Completed 14 of 877
Completed 15 of 877
Completed 16 of 877
Completed 17 of 877
Completed 18 of 877
Completed 19 of 877
Completed 20 of 877
Completed 21 of 877
Completed 22 of 877
Completed 23 of 877
Completed 24 of 877
Completed 25 of 877
Completed 26 of 877
Completed 27 of 877
Completed 28 of 877
Completed 29 of 877
Completed 30 of 877
Completed 31 of 877
Completed 32 of 877
Completed 33 of 877
Completed 34 of 877
Completed 35 of 877
Completed 36 of 877
Completed 37 of 877
Completed 38 of 877
Completed 39 of 877
Completed 40 of 877
Completed 41 of 877
Completed 42 of 877
Completed 43 of 877
Completed 44 of 877
Completed 45 of 877
Completed 46 of 877
Completed 47 of 877
Completed 48 of 877
Completed 49 of 877
Completed 50 of 877
Completed

Unnamed: 0,Type,District_Name,Address,latitude,longitude
0,sport,praha 5,"Butovická 837/41, Praha 5",50.052154,14.360772
1,sport,praha 5,"Zahradníčkova, Praha 5, 150 00",50.068925,14.345478
2,sport,praha 1,"Senovážné náměstí 6, Praha 1, 110 00",50.085924,14.431106
3,sport,praha 12,"Zelenkova 3/530, Praha 12, 142 00",50.009011,14.447198
4,sport,praha 7,"Štvanice 38, Praha 7, 170 00",50.09669,14.44014


In [30]:
df_sport.drop(columns=['Address'], inplace = True)

In [31]:
df_sport.shape

(877, 4)

### Libraries

In [32]:
url_libs = 'https://cs.wikipedia.org/wiki/M%C4%9Bstsk%C3%A1_knihovna_v_Praze'

f = urllib.request.urlopen(url_libs)
html = f.read()

try: 
    from BeautifulSoup import BeautifulSoup
except ImportError:
    from bs4 import BeautifulSoup

parsed_html = BeautifulSoup(html)
tag_header = parsed_html.find_all('h4')
district_tags = []

for tag in tag_header:
    tag_match = False
    district =''
    for child in tag.children:
        if child.get("class")[0] == 'mw-headline':
            district = child.get_text().lower()
            tag_match = True
    if tag_match == True:
        nextsibling = tag.next_sibling
        while  True:
            if nextsibling.find('ul') != -1 :
                lists = nextsibling.find_all('li')
                for lib in  lists:
                    district_tags.append(['library',district,lib.get_text()])
                break
            else:
                nextsibling = nextsibling.next_sibling

df_libs = pd.DataFrame(data=district_tags)
df_libs.columns = ['Type', 'District_Name', 'Address']
df_libs.head()

Unnamed: 0,Type,District_Name,Address
0,library,praha 1,"„Školská“, Nové Město, Školská 1267/30"
1,library,praha 1,"„Hradčany“, Hradčany, Pohořelec 111/25"
2,library,praha 2,"„Záhřebská“, Vinohrady, Záhřebská 158/20"
3,library,praha 2,"„Dittrrichova“, Nové Město, Dittrichova 1543/2"
4,library,praha 2,"„Ostrčilovo náměstí“, Nusle, Ostrčilovo náměst..."


In [33]:
df_libs.shape

(41, 3)

In [34]:
df_libs.District_Name.unique()

array(['praha 1', 'praha 2', 'praha 3', 'praha 4', 'praha 5', 'praha 6',
       'praha 7', 'praha 8', 'praha 9', 'praha 10'], dtype=object)

In [35]:
get_coordinates(df_libs, 'Address')
df_libs.head()

Completed 1 of 41
Completed 2 of 41
Completed 3 of 41
Completed 4 of 41
Completed 5 of 41
Completed 6 of 41
Completed 7 of 41
Completed 8 of 41
Completed 9 of 41
Completed 10 of 41
Completed 11 of 41
Completed 12 of 41
Completed 13 of 41
Completed 14 of 41
Completed 15 of 41
Completed 16 of 41
Completed 17 of 41
Completed 18 of 41
Completed 19 of 41
Completed 20 of 41
Completed 21 of 41
Completed 22 of 41
Completed 23 of 41
Completed 24 of 41
Completed 25 of 41
Completed 26 of 41
Completed 27 of 41
Completed 28 of 41
Completed 29 of 41
Completed 30 of 41
Completed 31 of 41
Completed 32 of 41
Completed 33 of 41
Completed 34 of 41
Completed 35 of 41
Completed 36 of 41
Completed 37 of 41
Completed 38 of 41
Completed 39 of 41
Completed 40 of 41
Completed 41 of 41
Done: Total: 41 Success: 41 Error 0


Unnamed: 0,Type,District_Name,Address,latitude,longitude
0,library,praha 1,"„Školská“, Nové Město, Školská 1267/30",50.079501,14.424045
1,library,praha 1,"„Hradčany“, Hradčany, Pohořelec 111/25",50.087778,14.389993
2,library,praha 2,"„Záhřebská“, Vinohrady, Záhřebská 158/20",50.07191,14.43697
3,library,praha 2,"„Dittrrichova“, Nové Město, Dittrichova 1543/2",50.07362,14.41641
4,library,praha 2,"„Ostrčilovo náměstí“, Nusle, Ostrčilovo náměst...",50.06579,14.42468


In [36]:
df_libs.drop(columns='Address', inplace = True)
df_libs.shape

(41, 4)

Union the results

In [37]:
df_prague_poi = pd.concat([df_prague_poi,df_sport], sort=True)
df_prague_poi = pd.concat([df_prague_poi, df_libs], sort=True)
df_prague_poi.shape

(1063, 4)

### Schools and educational centers 

Rejstřík škol a školských zařízení - Hl. m. Praha
Aktuální data rejstříku škol a školských zařízení - Hl. m. Praha
MŠMT 14.10.2019

In [38]:
url_schools = 'https://rejstriky.msmt.cz/opendata/vrejcz010.xml'
file_schools = 'schools.xml'
results = requests.get(url_schools)
results.content
with open(file_schools, 'w') as file:
    file.write(results.text)
print('Done') 

Done


In [39]:
import xml.etree.ElementTree as et 
xtree = et.parse(file_schools)
xroot = xtree.getroot()

In [40]:
dic_scools = []
try:
    for entry in xroot.findall('PravniSubjekt'):
        place_group = entry.find('SkolyZarizeni')
        if(place_group is None):
            continue
        for place in place_group.findall('SkolaZarizeni'):
            s_id = place.find('IZO').text
            s_type = place.find('SkolaDruhTyp').text
            s_name = place.find('SkolaPlnyNazev').text
            s_capasity = place.find('SkolaKapacita').text
            s_adress = place.find('SkolaMistaVykonuCinnosti')
            s_actual_add = s_adress.find('SkolaMistoVykonuCinnosti')
            s_addres1 =  s_actual_add.find('MistoAdresa1').text
            s_addres2 =  s_actual_add.find('MistoAdresa2').text
            s_addres3 =  s_actual_add.find('MistoAdresa3').text
            print(s_id, s_name,  s_type, s_capasity, '{} {} {}'.format(s_addres1, s_addres2, s_addres3))
            dic_scools.append([s_id, s_name,  s_type, s_capasity, '{} {} {}'.format(s_addres1, s_addres2, s_addres3)])
except:
    print ('Exception', sys.exc_info()[0])     

049625918 Mateřská škola A00 52 Ostrovní 139/11 Nové Město 110 00 Praha 1
102413096 Školní jídelna L11 90 Ostrovní 139/11 Nové Město 110 00 Praha 1
107500884 Mateřská škola A00 70 Ke Kamýku 686/2 Kamýk 142 00 Praha 4
161102263 Školní jídelna - výdejna L13 70 Ke Kamýku 686/2 Kamýk 142 00 Praha 4
110034384 Mateřská škola A00 6 Smolkova 567/2 Kamýk 142 00 Praha 4
110380169 Základní škola B00 30 Smolkova 567/2 Kamýk 142 00 Praha 4
110034392 Přípravný stupeň základní školy speciální M60 6 Smolkova 567/2 Kamýk 142 00 Praha 4
060437171 Mateřská škola A00 130 Podpěrova 1879/2 Stodůlky 155 00 Praha 5
102449244 Školní jídelna L11 148 Podpěrova 1879/2 Stodůlky 155 00 Praha 5
110035585 Mateřská škola A00 18 Hábova 1571/22 Stodůlky 155 00 Praha 5
049370782 Mateřská škola A00 86 Žabovřeská 1227 Zbraslav 156 00 Praha 5
102449597 Školní jídelna L11 90 Žabovřeská 1227 Zbraslav 156 00 Praha 5
110020766 Mateřská škola A00 131 Klausova 2448/6 Stodůlky 155 00 Praha 5
110020774 Školní jídelna L11 131 Klauso

In [41]:
columns = ['id', 'name', 'type', 'capacity', 'address']
df_education = pd.DataFrame(dic_scools, columns = columns)
df_education.head()

Unnamed: 0,id,name,type,capacity,address
0,49625918,Mateřská škola,A00,52,Ostrovní 139/11 Nové Město 110 00 Praha 1
1,102413096,Školní jídelna,L11,90,Ostrovní 139/11 Nové Město 110 00 Praha 1
2,107500884,Mateřská škola,A00,70,Ke Kamýku 686/2 Kamýk 142 00 Praha 4
3,161102263,Školní jídelna - výdejna,L13,70,Ke Kamýku 686/2 Kamýk 142 00 Praha 4
4,110034384,Mateřská škola,A00,6,Smolkova 567/2 Kamýk 142 00 Praha 4


In [42]:
types = df_education['type'].unique()
for t in types:
    print(t,df_education[df_education.type == t].iloc[0,1])

A00 Mateřská škola
L11 Školní jídelna
L13 Školní jídelna - výdejna
B00 Základní škola
M60 Přípravný stupeň základní školy speciální
G21 Školní družina
G22 Školní klub
F10 Základní umělecká škola
C00 Střední škola
D00 Konzervatoř
M20 Školní knihovna
E00 Vyšší odborná škola
M79 Jiné účelové zařízení
H22 Domov mládeže
G11 Dům dětí a mládeže
G40 Zařízení pro další vzdělávání pedagogických pracovníků
M40 Středisko praktického vyučování
H21 Internát
K20 Speciálně pedagogické centrum
G12 Stanice zájmových činností
K10 Pedagogicko-psychologická poradna
F20 Jazyková škola s právem státní jazykové zkoušky
J12 Dětský domov se školou
J21 Středisko výchovné péče
J14 Diagnostický ústav
J11 Dětský domov
J13 Výchovný ústav
L12 Školní jídelna - vývařovna
H10 Škola v přírodě
A15 Mateřská škola (lesní mateřská škola)


In [43]:
types = ['B00', 'F10', 'C00','H22', 'G11']
types_shu = types[0:3]
df_education_selected = df_education.loc[df_education.type.isin(types)]
df_education_selected.loc[df_education_selected['type'].isin(types_shu), 'Type'] = 'school'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [44]:
df_education_selected.fillna('educatioanal center', inplace = True)
df_education_selected['Type'].unique()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


array(['school', 'educatioanal center'], dtype=object)

In [45]:
df_education_selected.loc[0:, 'District_Name'] = df_education_selected.loc[0:,'address'].apply(lambda x: ' '.join(x.split()[-2:]).lower())

In [46]:
columns_to_drop = ['id','name','capacity']
df_education_selected.drop(columns = columns_to_drop, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [47]:
get_coordinates(df_education_selected, 'address')
df_education_selected.head()

Completed 1 of 560
Completed 2 of 560
Completed 3 of 560
Completed 4 of 560
Completed 5 of 560
Completed 6 of 560
Completed 7 of 560
Completed 8 of 560
Completed 9 of 560
Completed 10 of 560
Completed 11 of 560
Completed 12 of 560
Completed 13 of 560
Completed 14 of 560
Completed 15 of 560
Completed 16 of 560
Completed 17 of 560
Completed 18 of 560
Completed 19 of 560
Completed 20 of 560
Completed 21 of 560
Completed 22 of 560
Completed 23 of 560
Completed 24 of 560
Completed 25 of 560
Completed 26 of 560
Completed 27 of 560
Completed 28 of 560
Completed 29 of 560
Completed 30 of 560
Completed 31 of 560
Completed 32 of 560
Completed 33 of 560
Completed 34 of 560
Completed 35 of 560
Completed 36 of 560
Completed 37 of 560
Completed 38 of 560
Completed 39 of 560
Completed 40 of 560
Completed 41 of 560
Completed 42 of 560
Completed 43 of 560
Completed 44 of 560
Completed 45 of 560
Completed 46 of 560
Completed 47 of 560
Completed 48 of 560
Completed 49 of 560
Completed 50 of 560
Completed

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Done: Total: 560 Success: 560 Error 0


Unnamed: 0,type,address,Type,District_Name,latitude,longitude
5,B00,Smolkova 567/2 Kamýk 142 00 Praha 4,school,praha 4,50.00862,14.448992
22,B00,Ostrovní 2070/9 Nové Město 110 00 Praha 1,school,praha 1,50.080344,14.415264
25,B00,Soukenická 1088/10 Nové Město 110 00 Praha 1,school,praha 1,50.090834,14.428853
26,F10,Soukenická 1088/10 Nové Město 110 00 Praha 1,school,praha 1,50.090834,14.428853
30,F10,Písková 126/27 Modřany 143 00 Praha 4,school,praha 4,50.001684,14.413886


In [48]:
df_education_selected.drop(columns=['type', 'address'], inplace=True)
df_education_selected.shape

(560, 4)

In [49]:
df_prague_poi = pd.concat([df_prague_poi, df_education_selected] , sort = True)
df_prague_poi.shape

(1623, 4)

In [50]:
df_prague_poi.reset_index(inplace = True)

In [58]:
df_prague_poi['District_Name'].unique()

array(['praha-petrovice', 'praha 8', 'praha 3', 'praha 22', 'praha 15',
       'praha-dubec', 'praha 5', 'praha 2', 'praha 4', 'praha 13',
       'praha 11', 'praha 20', 'praha 6', 'praha 1', 'praha 19',
       'praha-kunratice', 'praha 7', 'praha-velka-chuchle', 'praha 10',
       'praha 9', 'praha 17', 'praha-suchdol', 'praha 14',
       'praha-dablice', 'praha-klanovice', 'praha-zbraslav', 'praha 18',
       'praha 12', 'praha-seberov', 'praha 21', 'praha 16', '4 none',
       'roztoky none', '00 ostrava', '1 none', 'praha none'], dtype=object)

In [78]:
bad_rows = df_prague_poi.loc[df_prague_poi['District_Name'].isin(['roztoky none', '00 ostrava', '1 none','praha none','4 none'])].index

In [79]:
df_prague_poi.drop(bad_rows, inplace= True)

In [80]:
df_prague_poi.shape

(1618, 5)

In [81]:
poi_file_name = 'prague_poi.csv'
df_prague_poi.to_csv(poi_file_name)
upload_file(storage_creds,poi_file_name,poi_file_name)

File prague_poi.csv Uploaded


## Exploratory Data Analysis

In [4]:
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns

import warnings; warnings.filterwarnings(action='once')

large = 18; med = 12; small = 10
params = {'axes.titlesize': small,
          'legend.fontsize': small,
          'figure.figsize': (13, 13),
          'axes.labelsize': small,
          'axes.titlesize': small,
          'xtick.labelsize': small,
          'ytick.labelsize': small,
          'figure.titlesize': large}
plt.rcParams.update(params)
plt.style.use('seaborn-whitegrid')
sns.set_style("white")
%matplotlib inline

### Prague population

In [5]:
!conda install -c conda-forge folium #uncomment this if folium is not installed yet

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    folium-0.10.0              |             py_0          59 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.3 MB

The following NEW packages will be INSTAL

In [6]:
!conda install -c conda-forge/label/gcc7 osmnx

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - osmnx


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    thrift-cpp-0.11.0          |    h23e226f_1003         2.3 MB  conda-forge/label/gcc7
    geopy-1.17.0               |             py_0          49 KB  conda-forge/label/gcc7
    boost-cpp-1.68.0           |    h11c811c_1000        20.5 MB  conda-forge/label/gcc7
    h5py-2.8.0                 |py36h3010b51_1003         1.1 MB  conda-forge/label/gcc7
    giflib-5.1.4               |    h14c3975_1001          79 KB  conda-forge/label/gcc7
    cryptography-2.3.1         |py36hb7f436b_1000         593 KB  conda-forge/label/gcc7
    libgdal-2.3.2              |       h9d4a965_0        17.8 MB
    descartes-1.1.0            |             py_2           6 KB  conda-forge/label/gcc7
    freetds-1.00.97            

In [6]:
#!conda install -c conda-forge geopandas #uncomment this if geopandas is not installed yet

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopandas


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    kealib-1.4.10              |    h1978553_1003         172 KB  conda-forge
    libspatialite-4.3.0a       |    hb5ec416_1026         3.1 MB  conda-forge
    pyproj-1.9.6               |py36h516909a_1002          75 KB  conda-forge
    shapely-1.6.4              |py36h06cd6f9_1005         332 KB  conda-forge
    poppler-0.65.0             |       h581218d_1         1.6 MB
    freexl-1.0.5               |    h14c3975_1002          43 KB  conda-forge
    hdf4-4.2.13                |                0         969 KB  conda-forge
    geos-3.7.1                 |    hf484d3e_1000         1.6 MB  conda-forge
    giflib-5.1.9               |       h516909a_0         108 KB  conda-forge
    click-plugins-1.1.1   

In [7]:
!conda update --all #update all other package. Restart kernel if needed

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    cytoolz-0.10.0             |   py36h7b6447c_0         439 KB
    bokeh-1.3.4                |           py36_0         4.0 MB
    notebook-6.0.1             |           py36_0         6.0 MB
    qtpy-1.9.0                 |             py_0          39 KB
    matplotlib-3.1.1           |   py36h5429711_0         6.7 MB
    imageio-2.6.1              |           py36_0         3.4 MB
    jinja2-2.10.3              |             py_0          95 KB
    packaging-19.2             |             py_0          30 KB
    absl-py-0.8.0              |           py36_0         161 KB
    widgetsnbextension-3.5.1   |           py36_0         1.8 MB
    jmespath-0.9.4             |             py_0          22 KB
    nbconvert-5.6.0            |           

Importing all nessessary dependencies

In [2]:
import geopandas as gpd
from shapely.geometry import Polygon

from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer

import folium as f

As coordinates stored as string array lets define function to convert it into array of floats 

In [3]:
def read_coordinates_from_str(input_values):
    splitted  = input_values[1:-1].split('],')
    result = []
    for i in splitted:
        i = i.replace('[', '')
        i = i.replace(' ', '')
        i = i.replace(']', '')
        v = i.split(',')
        v1 = float(v[0])
        v2 = float(v[1])
        result.append([v1,v2])
    
    return result

Downloading dataset of of Prague poppulation created on the first step

In [8]:
population_file_name = 'prague_district_population.csv'
download_file(storage_creds, population_file_name, population_file_name)
df_prague_population = pd.read_csv(population_file_name)
df_prague_population.head()

File prague_district_population.csv Downloaded


Unnamed: 0.1,Unnamed: 0,Name,Geometry,Area,Total,Kids,Middle,Senior,Kids_per_1000,latitude,longitude
0,0,praha 1,"[[14.410891049000043, 50.078674687000046], [14...",5538443.86,30561.0,2391.0,22963.0,4594.0,104.124026,50.08728,14.41742
1,1,praha 10,"[[14.531321086000048, 50.072240288000046], [14...",18599366.98,113200.0,12213.0,76625.0,23937.0,159.386623,50.06762,14.46016
2,2,praha 11,"[[14.54355294800007, 50.03618763800006], [14.5...",9793679.84,75741.0,8688.0,54983.0,11816.0,158.012477,50.03178,14.50719
3,3,praha 12,"[[14.450632163000023, 50.01452735600003], [14....",23317909.06,53515.0,6156.0,39699.0,7480.0,155.066878,50.00564,14.40462
4,4,praha 13,"[[14.320621949000042, 50.04010680700003], [14....",13196802.19,59906.0,7985.0,46514.0,5109.0,171.668745,50.05163,14.34231


Set district names as index.

In [9]:
df_prague_population.set_index('Name', inplace= True)

Convert string representation of the districts polygons to floats.

In [10]:
df_prague_population.loc[:,'Polygon'] = df_prague_population.loc[:,'Geometry'].apply(lambda x: Polygon(read_coordinates_from_str(x))) 

Drop unnessesary collumns

In [11]:
df_prague_population.drop(columns = ['Geometry'], inplace=True)
df_prague_population.rename(columns ={'Polygon':'Geometry'}, inplace=True)

Select data particular to children.

In [12]:
df_children = df_prague_population[['Kids', 'Kids_per_1000', 'Total', 'Geometry', 'latitude', 'longitude']]
df_children.head()

Unnamed: 0_level_0,Kids,Kids_per_1000,Total,Geometry,latitude,longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
praha 1,2391.0,104.124026,30561.0,"POLYGON ((14.41089104900004 50.07867468700005,...",50.08728,14.41742
praha 10,12213.0,159.386623,113200.0,"POLYGON ((14.53132108600005 50.07224028800005,...",50.06762,14.46016
praha 11,8688.0,158.012477,75741.0,"POLYGON ((14.54355294800007 50.03618763800006,...",50.03178,14.50719
praha 12,6156.0,155.066878,53515.0,"POLYGON ((14.45063216300002 50.01452735600003,...",50.00564,14.40462
praha 13,7985.0,171.668745,59906.0,"POLYGON ((14.32062194900004 50.04010680700003,...",50.05163,14.34231


Calculate the percentage of children from total population and percentage from children population

In [13]:
df_children = df_children.sort_values(by=['Kids'], ascending=False)
df_children['Kids_percent'] = df_children['Kids'] * 100 / df_children['Total']
total_kids  = df_children['Kids'].sum(axis = 0, skipna = True) 
df_children['Kids_from_total'] = df_children['Kids'] * 100 / total_kids

Select only those district where children population is more the 1% from overall population

In [95]:
df_children_t = df_children.loc[df_children['Kids_from_total']>0.5].sort_values(axis=0,by=['Kids_from_total'], ascending=False)

Make geo data frame from existing population dataset

In [96]:
gdf = gpd.GeoDataFrame(df_children_t, geometry='Geometry')
gdf.crs= {'init':'epsg:4326'} 
gdf.reset_index(inplace = True)
gdf.head()

Unnamed: 0,Name,Kids,Kids_per_1000,Total,Geometry,latitude,longitude,Kids_percent,Kids_from_total
0,praha 4,13793.0,155.603439,131793.0,"POLYGON ((14.48827570700007 50.04455542200003,...",50.04231,14.44805,10.465654,8.978532
1,praha 8,12485.0,169.787714,104918.0,"POLYGON ((14.44549218800006 50.11342461800007,...",50.12692,14.45672,11.899769,8.127091
2,praha 10,12213.0,159.386623,113200.0,"POLYGON ((14.53132108600005 50.07224028800005,...",50.06762,14.46016,10.788869,7.950033
3,praha 6,11990.0,169.402922,104185.0,"POLYGON ((14.29320690800006 50.07751405400006,...",50.10106,14.39981,11.508375,7.804872
4,praha 5,10571.0,172.463863,83968.0,"POLYGON ((14.41022472400005 50.04464234000005,...",50.07167,14.40098,12.58932,6.881176


In [97]:
merged_json = json.loads(gdf.to_json())
json_data = json.dumps(merged_json)

In [98]:
Prague_coordinates = [50.083333, 14.416667]
map_prague = f.Map(location=Prague_coordinates, width=800, height=800, zoom_start = 11, max_zoom = 11, min_zoom = 1 ,tiles = 'stamentoner' , prefer_canvas = True)
map_prague.choropleth(
    name='Children population in Prague',
    geo_data=json_data,
    data = df_children_t,
    columns = ['Name','Total'],
    fill_color='PuBuGn',
    key_on='feature.properties.Name'
) 

map_prague

In [99]:
def roundup(x):
    return x if x % 10 == 0 else x + 10 - x % 10

In [100]:
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer
from bokeh.layouts import column

def draw_map(gdf_data, palette, field, tick_labels, title):
    merged_json = json.loads(gdf_data.to_json())
    json_data = json.dumps(merged_json)
    
    geosource = GeoJSONDataSource(geojson = json_data)
    palette = palette[::-1]
    min_t = int(gdf_data[field].min())
    max_t = int(gdf_data[field].max())
    color_mapper = LinearColorMapper(palette = palette, low = min_t , high = max_t)
    color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8, width = 600, height = 20,
    border_line_color=None, location = (200,0), orientation = 'horizontal', major_label_overrides = tick_labels)
    fig = figure(title = title, plot_height = 600 , plot_width = 800, toolbar_location = None)
    fig.xgrid.grid_line_color = None
    fig.ygrid.grid_line_color = None
    fig.axis.visible = False
    fig.patches('xs','ys', source = geosource,fill_color = {'field' : field, 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)
    fig.add_layout(color_bar, 'above')
    
    return fig

def draw_bar(labels, values, fill_collor, border_color, title):
    fig = figure(x_range=labels, plot_height=250,plot_width = 800, title = title,
           toolbar_location=None, tools="")
    fig.vbar(x=labels, top=values, width=1, fill_color = fill_collor, line_color=border_color)
    fig.xaxis.major_label_orientation = 45
    fig.xgrid.grid_line_color = None
    fig.ygrid.grid_line_color = None
    
    return fig

Display overall children population in Prague

In [101]:
pallete = brewer['PuBuGn'][9]

results, bin_edges = pd.qcut(df_children_t['Kids_per_1000'], labels=False, retbins=True, q=[.3,.4,.5, .6,.7, .8,.9, 1])
map_ticks  = {}
for b in bin_edges:
    rb = roundup(b)
    map_ticks[str(rb)] = str(rb)

map_palette  = pallete[::-1]
fig_map = draw_map(gdf, pallete ,'Kids_per_1000', map_ticks, 'Children per 1000 adults')

labels = list(df_children_t['Name'])
values = list(df_children_t['Kids_from_total'])
fig_bar = draw_bar(labels, values, pallete[7], pallete[6], 'Percents of children from overall children population')

output_notebook()
show(column(fig_map ,fig_bar))


Download POI dataset that have been prepared at data acuqisition step

In [102]:
poi_file_name = 'prague_poi.csv'
download_file(storage_creds, poi_file_name,poi_file_name)
df_parague_poi = pd.read_csv(poi_file_name).set_index('index')
df_parague_poi.head()

File prague_poi.csv Downloaded


Unnamed: 0_level_0,Unnamed: 0,District_Name,Type,latitude,longitude
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0,praha-petrovice,playground,14.563237,50.038025
1,1,praha 8,playground,14.43885,50.134014
2,2,praha 3,playground,14.476411,50.094387
3,3,praha 22,playground,14.593131,50.036453
4,4,praha 15,playground,14.539891,50.043732


In [103]:
df_parague_poi['Type'] = pd.Categorical(df_parague_poi['Type'])
dfDummies = pd.get_dummies(df_parague_poi['Type'], prefix = 'category')
df_parague_poi = pd.concat([df_parague_poi, dfDummies], axis=1)

In [104]:
df_parague_poi.head()

Unnamed: 0_level_0,Unnamed: 0,District_Name,Type,latitude,longitude,category_educatioanal center,category_library,category_playground,category_school,category_sport
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0,praha-petrovice,playground,14.563237,50.038025,0,0,1,0,0
1,1,praha 8,playground,14.43885,50.134014,0,0,1,0,0
2,2,praha 3,playground,14.476411,50.094387,0,0,1,0,0
3,3,praha 22,playground,14.593131,50.036453,0,0,1,0,0
4,4,praha 15,playground,14.539891,50.043732,0,0,1,0,0


Calculate basic statistical inforamation over Prague districts

In [105]:
df_parague_poi_sum = df_parague_poi.groupby('District_Name').sum()
df_parague_poi_sum

Unnamed: 0_level_0,Unnamed: 0,latitude,longitude,category_educatioanal center,category_library,category_playground,category_school,category_sport
District_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
praha 1,85030,5386.996581,2087.507715,3,2,10,35,65
praha 10,154222,8331.697294,2904.016233,3,5,10,61,96
praha 11,4467,487.263515,416.301109,0,0,6,0,8
praha 12,5563,478.864484,229.921625,0,0,2,0,9
praha 13,7295,665.077336,558.115322,0,0,8,0,11
praha 14,1249,129.360393,129.351774,0,0,2,0,2
praha 15,4056,402.082006,437.524473,0,0,7,0,6
praha 16,4205,349.881977,100.524422,0,0,0,0,7
praha 17,417,78.693599,114.445297,0,0,2,0,1
praha 18,1477,164.912173,93.650889,0,0,1,0,3


In [106]:
df_parague_poi_sum.describe()

Unnamed: 0.1,Unnamed: 0,latitude,longitude,category_educatioanal center,category_library,category_playground,category_school,category_sport
count,31.0,31.0,31.0,31.0,31.0,31.0,31.0,31.0
mean,42250.290323,2441.847756,922.377386,1.354839,1.322581,4.677419,16.548387,28.290323
std,68247.559541,3745.975973,1218.94014,2.415897,2.329336,3.986536,28.373507,41.907989
min,0.0,14.372725,50.010201,0.0,0.0,0.0,0.0,0.0
25%,224.5,43.293815,100.23148,0.0,0.0,2.0,0.0,0.0
50%,4056.0,343.935762,150.071194,0.0,0.0,3.0,0.0,6.0
75%,71026.5,4161.489986,1653.413241,2.0,2.0,8.0,29.5,47.5
max,252977.0,13649.482173,4423.122131,8.0,9.0,14.0,106.0,148.0


Join population dataset with POI statistic, and calulate additional statistics

In [107]:
df_parague_poi_sum.drop(columns=['latitude','longitude','Unnamed: 0'], inplace= True)

In [117]:
df_poi_kids = df_parague_poi_sum.join(df_children).sort_values(axis=0,by=['Kids_from_total'], ascending=False)
df_poi_kids.dropna(inplace=True)
df_poi_kids['category_library_kids'] = df_poi_kids['category_library']/(df_poi_kids['Kids']/1000)
df_poi_kids['category_playground_kids'] = df_poi_kids['category_playground']/(df_poi_kids['Kids']/1000)
df_poi_kids['category_school_kids'] = df_poi_kids['category_school']/(df_poi_kids['Kids']/1000)
df_poi_kids['category_sport_kids'] = df_poi_kids['category_sport']/(df_poi_kids['Kids']/1000)
df_poi_kids['category_educatioanal center_kids'] = df_poi_kids['category_educatioanal center']/(df_poi_kids['Kids']/1000)
df_poi_kids['category_total'] = df_poi_kids['category_library'] +  df_poi_kids['category_playground'] + df_poi_kids['category_school'] + df_poi_kids['category_educatioanal center']
df_poi_kids['category_total_kids'] = df_poi_kids['category_total']/(df_poi_kids['Kids']/1000)
df_poi_kids.head()

Unnamed: 0_level_0,category_educatioanal center,category_library,category_playground,category_school,category_sport,Kids,Kids_per_1000,Total,Geometry,latitude,longitude,Kids_percent,Kids_from_total,category_library_kids,category_playground_kids,category_school_kids,category_sport_kids,category_educatioanal center_kids,category_total,category_total_kids
District_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
praha-kunratice,0,0,3,0,0,1529.0,216.082533,9654.0,"POLYGON ((14.48393489500006 49.99241857800007,...",50.01371,14.48528,15.837995,0.9953,0.0,1.962067,0.0,0.0,0.0,3,1.962067
praha 16,0,0,0,0,7,1089.0,179.70297,8486.0,"POLYGON ((14.36232016600007 49.98091841900003,...",49.98357,14.3611,12.832901,0.708883,0.0,0.0,0.0,6.427916,0.0,0,0.0
praha 19,0,0,1,0,1,1086.0,233.347658,6768.0,"POLYGON ((14.52177091000004 50.12617608700003,...",50.13422,14.54744,16.046099,0.70693,0.0,0.92081,0.0,0.92081,0.0,1,0.92081
praha-suchdol,0,0,1,0,0,942.0,189.156627,6982.0,"POLYGON ((14.38913739200007 50.12445784700003,...",50.13701,14.36932,13.491836,0.613193,0.0,1.061571,0.0,0.0,0.0,1,1.061571
praha-petrovice,0,0,1,0,0,839.0,178.967577,6126.0,"POLYGON ((14.54928140500004 50.02752104500007,...",50.03611,14.56242,13.695723,0.546146,0.0,1.191895,0.0,0.0,0.0,1,1.191895


In [118]:
df_poi_kids.reset_index(inplace = True)

In [119]:
poi_gdf = gpd.GeoDataFrame(df_poi_kids, geometry='Geometry')
poi_gdf.crs= {'init':'epsg:4326'} 
poi_gdf.reset_index(inplace = True)
poi_gdf.head()

Unnamed: 0,index,District_Name,category_educatioanal center,category_library,category_playground,category_school,category_sport,Kids,Kids_per_1000,Total,...,longitude,Kids_percent,Kids_from_total,category_library_kids,category_playground_kids,category_school_kids,category_sport_kids,category_educatioanal center_kids,category_total,category_total_kids
0,0,praha 4,8,9,11,106,148,13793.0,155.603439,131793.0,...,14.44805,10.465654,8.978532,0.652505,0.797506,7.685058,10.73008,0.580004,134,9.715073
1,1,praha 8,5,3,9,45,76,12485.0,169.787714,104918.0,...,14.45672,11.899769,8.127091,0.240288,0.720865,3.604325,6.087305,0.400481,62,4.965959
2,2,praha 10,3,5,10,61,96,12213.0,159.386623,113200.0,...,14.46016,10.788869,7.950033,0.4094,0.8188,4.994678,7.860477,0.24564,79,6.468517
3,3,praha 6,4,4,8,45,93,11990.0,169.402922,104185.0,...,14.39981,11.508375,7.804872,0.333611,0.667223,3.753128,7.756464,0.333611,61,5.087573
4,4,praha 5,3,7,10,68,113,10571.0,172.463863,83968.0,...,14.40098,12.58932,6.881176,0.662189,0.945984,6.432693,10.689623,0.283795,88,8.324662


In [127]:
pallete = brewer['PuBuGn'][9]

results, bin_edges = pd.qcut(df_poi_kids['category_total'], labels=False, retbins=True, q=[.3,.4,.5, .6,.7, .8,.9, 1],duplicates ='drop')
map_ticks  = {}
for b in bin_edges:
    rb = roundup(b)
    map_ticks[str(rb)] = str(rb)

map_palette  = pallete[::-1]
fig_map = draw_map(poi_gdf, pallete ,'category_total', map_ticks, 'Total number of POI')

In [158]:
results, bin_edges = pd.qcut(df_poi_kids['category_total_kids'], labels=False, retbins=True, q=[.3,.4,.5, .6,.7, .8,.9, 1],duplicates ='drop')
map_ticks  = {}
for b in bin_edges:
    rb = roundup(b)
    map_ticks[str(rb)] = str(rb)

map_palette  = pallete[::-1]
fig_map_2 = draw_map(poi_gdf, pallete ,'category_total_kids', map_ticks, 'Total number of POI per 1000 children')

In [152]:
values = df_poi_kids[['District_Name', 'category_school_kids']].sort_values('category_school_kids', ascending=False)
labels = list(values['District_Name'])
values = list(values['category_school_kids'])
fig_bar_school = draw_bar(labels, values, pallete[7], pallete[6], 'Schools per 1000')

In [153]:
values = df_poi_kids[['District_Name', 'category_educatioanal center_kids']].sort_values('category_educatioanal center_kids', ascending=False)
labels = list(values['District_Name'])
values = list(values['category_educatioanal center_kids'])
fig_bar_edu = draw_bar(labels, values, pallete[7], pallete[6], 'Educational and hobby centers per 1000')

In [154]:
values = df_poi_kids[['District_Name', 'category_sport_kids']].sort_values('category_sport_kids', ascending=False)
labels = list(values['District_Name'])
values = list(values['category_sport_kids'])
fig_bar_sport = draw_bar(labels, values, pallete[7], pallete[6], 'Sport facilities per 1000')

In [155]:
values = df_poi_kids[['District_Name', 'category_library_kids']].sort_values('category_library_kids', ascending=False)
labels = list(values['District_Name'])
values = list(values['category_library_kids'])
fig_bar_lib = draw_bar(labels, values, pallete[7], pallete[6], 'Libraries per 1000')

In [156]:
values = df_poi_kids[['District_Name', 'category_playground_kids']].sort_values('category_playground_kids', ascending=False)
labels = list(values['District_Name'])
values = list(values['category_playground_kids'])
fig_bar_play = draw_bar(labels, values, pallete[7], pallete[6], 'Outdoor playgrounds 1000')

In [159]:
output_notebook()
show(column(fig_map ,fig_map_2,fig_bar_school, fig_bar_edu, fig_bar_sport, fig_bar_lib, fig_bar_play))

I will use OSMnx - a Python package by Geoff Boeing to work with street networks: retrieve, construct, analyze, and visualize street networks (and more) from OpenStreetMap.
<a>https://github.com/gboeing/osmnx</a>.

In [None]:
#!conda install -c conda-forge/label/gcc7 osmnx

Installing gecoder. A simple and consistent geocoding library