# Distance to nearest compost units ('worm hotels') for all addresses in De Wittenbuurt

Using open data from the municipality, we will find the nearest plastic container for each address in De Wittenbuurt, and the distance to it in meters.

Findings:

* The largest distance is X meters. This seems reasonable!
* The distance is in straight meters. It would be cool to calculate the distance over the road, via openstreetmap.


Data:

* [Addresses in Amsterdam](https://data.amsterdam.nl/index.html#?dsd=bag&dsp=1&dsv=TABLE&mpb=topografie&mpz=11&mpv=52.3731081:4.8932945)
* [Neighbourhood compost](https://maps.amsterdam.nl/open_geodata/?LANG=en)


In [28]:
import numpy as np
import pandas as pd

# set interactivity to 'all' so we can easily print more than 1 outputs in 1 cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Cleaning the compost units dataset

In [29]:
# Load afvalcontainers from Amsterdam site
df_buurtcompost = pd.read_csv('http://maps.amsterdam.nl/open_geodata/excel.php?KAARTLAAG=BUURTCOMPOST&THEMA=buurtcompost', encoding='latin-1', sep=";")
df_buurtcompost.head()

Unnamed: 0,OBJECTNUMMER,Straatnaam,Initiatiefnemer,Email,Aantal_bewoners,Soort_afval,Gebruik,Startjaar,Foto,COORDS,LNG,LAT,Unnamed: 12
0,4,Frans Halsstraat,Peter Jan Brouwer,amsterdam+C001@wormenhotel.nl,20,GF,Toestemming van initiatiefnemer,2015,frans hals .jpeg,"POINT(4.888706,52.356007)",4888706,52356007,
1,5,hoek saffierstraat jozef israelkade,christien & anneke,amsterdam+c002@wormenhotel.nl,7,GF,Toestemming van initiatiefnemer,2016,,"POINT(4.908527,52.349857)",4908527,52349857,
2,6,zaanstraat t/o 300,soeptuinen bredius,www.soeptuinen.nl,0,GFT,Vrij inleveren,2016,,"POINT(4.871422,52.391292)",4871422,52391292,
3,7,kramatweg 51,oost indisch groen,info@oostindischgroen.nl,0,GFT,Vrij inleveren,2016,,"POINT(4.945789,52.36213)",4945789,5236213,
4,8,IJplein,buurtbak voedseltuin,ireen@balkonton.nl,6,GF,Toestemming van initiatiefnemer,2016,,"POINT(4.910984,52.382154)",4910984,52382154,


In [30]:
# Clean up the coordinates column. We need a string formatted as '{lng},{lat}'
df_buurtcompost['locatie_cleaned'] = df_buurtcompost['COORDS'].str.replace('POINT\(','').str.replace('\)','')
df_buurtcompost['lng'] = df_buurtcompost['LNG'].str.replace(',','.').astype(float)
df_buurtcompost['lat'] = df_buurtcompost['LAT'].str.replace(',','.').astype(float)

# Show frame
df_buurtcompost.head()

Unnamed: 0,OBJECTNUMMER,Straatnaam,Initiatiefnemer,Email,Aantal_bewoners,Soort_afval,Gebruik,Startjaar,Foto,COORDS,LNG,LAT,Unnamed: 12,locatie_cleaned,lng,lat
0,4,Frans Halsstraat,Peter Jan Brouwer,amsterdam+C001@wormenhotel.nl,20,GF,Toestemming van initiatiefnemer,2015,frans hals .jpeg,"POINT(4.888706,52.356007)",4888706,52356007,,"4.888706,52.356007",4.888706,52.356007
1,5,hoek saffierstraat jozef israelkade,christien & anneke,amsterdam+c002@wormenhotel.nl,7,GF,Toestemming van initiatiefnemer,2016,,"POINT(4.908527,52.349857)",4908527,52349857,,"4.908527,52.349857",4.908527,52.349857
2,6,zaanstraat t/o 300,soeptuinen bredius,www.soeptuinen.nl,0,GFT,Vrij inleveren,2016,,"POINT(4.871422,52.391292)",4871422,52391292,,"4.871422,52.391292",4.871422,52.391292
3,7,kramatweg 51,oost indisch groen,info@oostindischgroen.nl,0,GFT,Vrij inleveren,2016,,"POINT(4.945789,52.36213)",4945789,5236213,,"4.945789,52.36213",4.945789,52.36213
4,8,IJplein,buurtbak voedseltuin,ireen@balkonton.nl,6,GF,Toestemming van initiatiefnemer,2016,,"POINT(4.910984,52.382154)",4910984,52382154,,"4.910984,52.382154",4.910984,52.382154


## Cleaning the addresses dataset

In [34]:
# Loading from disk because file is 17mb, loading from site takes too long
df_adressen = pd.read_csv('../data/export_20180227_122333.csv', encoding='latin-1', sep=";")
df_adressen.head()
df_adressen.keys()
# Convert lat and long values to float
df_adressen['lng'] = df_adressen['Longitude (WGS84)'].str.replace(',', '.').astype(float)
df_adressen['lat'] = df_adressen['Latitude (WGS84)'].str.replace(',', '.').astype(float)
print(len(df_adressen))
# A few rows have NaN as longtitude or lattitude, let's remove those
df_adressen = df_adressen[pd.notnull(df_adressen['lng'])]
df_adressen = df_adressen[pd.notnull(df_adressen['lat'])]
print(len(df_adressen))
# Now combine the coordinates into a string
df_adressen['locatie_cleaned'] = df_adressen['lng'].map(str) + "," + df_adressen['lat'].map(str)
df_adressen['locatie_cleaned'].describe()



Unnamed: 0,Naam openbare ruimte,Huisnummer,Huisletter,Huisnummertoevoeging,Postcode,Woonplaats,Naam stadsdeel,Code stadsdeel,Naam gebiedsgerichtwerkengebied,Code gebiedsgerichtwerkengebied,...,Feitelijk gebruik,Oppervlakte (m2),Objecttype,Verblijfsobjectstatus,Openbareruimte-identificatie,Pandidentificatie,Verblijfsobjectidentificatie,Ligplaatsidentificatie,Standplaatsidentificatie,Nummeraanduidingidentificatie
0,Eerste Constantijn Huygensstraat,15,,1,1054BP,,West,E,Oud West / De Baarsjes,DX05,...,woning,66.0,Verblijfsobject,Verblijfsobject in gebruik,363300000002537,['0363100012159829'],363010000000000.0,,,363200000007060
1,Eerste Helmersstraat,188,,3,1054EL,,West,E,Oud West / De Baarsjes,DX05,...,woning,62.0,Verblijfsobject,Verblijfsobject in gebruik,363300000002541,['0363100012120031'],363010000000000.0,,,363200000007846
2,Eerste Anjeliersdwarsstraat,1,,2,1015NR,,Centrum,A,Centrum-West,DX01,...,woning,48.0,Verblijfsobject,Verblijfsobject in gebruik,363300000002529,['0363100012174784'],363010000000000.0,,,363200000006112
3,Eerste Constantijn Huygensstraat,19,,2,1054BP,,West,E,Oud West / De Baarsjes,DX05,...,woning,130.0,Verblijfsobject,Verblijfsobject in gebruik,363300000002537,['0363100012157570'],363010000000000.0,,,363200000007066
4,Eerste Helmersstraat,190,,3,1054EL,,West,E,Oud West / De Baarsjes,DX05,...,woning,88.0,Verblijfsobject,Verblijfsobject in gebruik,363300000002541,['0363100012120432'],363010000000000.0,,,363200000007852


Index(['Naam openbare ruimte', 'Huisnummer', 'Huisletter',
       'Huisnummertoevoeging', 'Postcode', 'Woonplaats', 'Naam stadsdeel',
       'Code stadsdeel', 'Naam gebiedsgerichtwerkengebied',
       'Code gebiedsgerichtwerkengebied', 'Naam Wijk', 'Code Wijk',
       'Naam buurt', 'Code buurt', 'Code bouwblok', 'X-coordinaat (RD)',
       'Y-coordinaat (RD)', 'Latitude (WGS84)', 'Longitude (WGS84)',
       'Indicatie hoofdadres', 'Gebruiksdoel', 'Feitelijk gebruik',
       'Oppervlakte (m2)', 'Objecttype', 'Verblijfsobjectstatus',
       'Openbareruimte-identificatie', 'Pandidentificatie',
       'Verblijfsobjectidentificatie', 'Ligplaatsidentificatie',
       'Standplaatsidentificatie', 'Nummeraanduidingidentificatie'],
      dtype='object')

510488
510436


count                   510436
unique                  314880
top       4.8507211,52.3574849
freq                       145
Name: locatie_cleaned, dtype: object

In [36]:
df_adressen['Feitelijk gebruik'].unique()
df_adressen['Naam buurt'].unique()
df_buurten_bevolking = pd.read_excel('https://www.ois.amsterdam.nl/download/11-bevolking-buurten-1-januari-2012-2017',skiprows=3,usecols=[0,5])
df_buurten_bevolking = df_buurten_bevolking.dropna()
df_buurten_bevolking

array(['woning', '(detail)handel / winkel',
       'beroepsonderwijs  (LBO / MBO)', nan, 'kantoor (hoofdcode)',
       'winkel', 'woning + winkel', 'cafÃ© / bar / restaurant', 'horeca',
       'bedrijf', 'garage',
       'parkeergarage geÃ«xploiteerd voor kortparkeren', 'praktijkruimte',
       'opslag / distributie', 'crÃ¨che / peuterspeelzaal',
       'overig cultureel', 'woning + horeca', 'onderwijs', 'kerk',
       'hogeschool / universiteit', 'dagverblijf',
       'woning + (detail)handel / winkel', 'bijzondere woonfunctie',
       'woning + kantoor', 'hotel / motel', 'woning + praktijkruimte',
       'dienstverlening openbaar nut', 'atelier / werkruimte met woning',
       'overig medisch', 'woning + bedrijf',
       'woning + kantoor (hoofdcode)', 'basisschool', 'benzinestation',
       'cultuur', 'trafo', 'showroom / werkplaats / garage', 'speeltuin',
       'sport / recreatie', 'verblijf voor verstandelijk gehandicapten',
       'bibliotheek', 'praktijkruimte (tandarts / fysio

array(['Helmersbuurt Oost', 'Cremerbuurt West', 'Anjeliersbuurt Zuid',
       'WG-terrein', 'Noordwestkwadrant Indische buurt Zuid',
       'Vondelparkbuurt Oost', 'Cremerbuurt Oost', 'Van der Pekbuurt',
       'Driehoekbuurt', 'Frederik Hendrikbuurt Zuidoost',
       'Frederik Hendrikbuurt Noord', 'Swammerdambuurt',
       'Anjeliersbuurt Noord', 'Westelijke eilanden', 'Frans Halsbuurt',
       'Noordoostkwadrant Indische buurt', 'Czaar Peterbuurt',
       'Bloemgrachtbuurt', 'Osdorp Midden Noord', 'Hercules Seghersbuurt',
       'Sarphatiparkbuurt', 'Elandsgrachtbuurt', 'De Wittenbuurt Noord',
       'Weesperzijde Midden/Zuid', 'Oosterparkbuurt Noordwest',
       'Oosterparkbuurt Zuidwest', 'Fannius Scholtenbuurt',
       'Westerstaatsman', 'Buyskade e.o.', 'Passeerdersgrachtbuurt',
       'Schinkelbuurt Noord', 'Planciusbuurt Noord', 'Gerard Doubuurt',
       'Dapperbuurt Zuid', 'Dapperbuurt Noord', 'Vogelbuurt Noord',
       'Weteringbuurt', 'De Eenhoorn', 'Frederik Hendrikbuurt Zu

Unnamed: 0,buurt,totaal
1,A00a Kop Zeedijk,1024
2,A00b Oude Kerk e.o.,664
3,A00c Burgwallen Oost,1580
4,A00d Nes e.o.,360
5,A00e BG-terrein e.o.,618
6,A01a Stationsplein e.o.,3
7,A01b Hemelrijk,533
8,A01c Nieuwendijk Noord,457
9,A01d Spuistraat Noord,799
10,A01e Nieuwe Kerk e.o.,833


### Zooming in on my neighbourhood

I live in De Wittenstraat. Let's see if I can find my neighbourhood. Let's search through the neighbourhood names with Python instead of cmd+f, for fun.


In [37]:

buurten = df_adressen['Naam buurt'].dropna().unique()
indexes_witten = [i for i, s in enumerate(buurten) if 'Witten' in s]
wittenbuurten = [buurten[i] for i in indexes_witten]
wittenbuurten


['De Wittenbuurt Noord', 'De Wittenbuurt Zuid', 'Wittenburg']

In [38]:
# We get 3 results, of which we only need te first 2 (Wittenburg is a different neighbourhood in centrum-east)
# Let's focus on De Wittenbuurt Noord & Zuid for this demo. I live there, and it's only 1454 rows

df_adressen_witten = df_adressen[(df_adressen['Naam buurt'] == "De Wittenbuurt Noord") 
                                 | (df_adressen['Naam buurt'] == "De Wittenbuurt Zuid")]
df_adressen_witten.index.size

# Let's also create a dataframe for the larger area of where I live, de Staatsliedenbuurt. 8295 entries.
df_adressen['Naam Wijk'].dropna().values
df_staatsliedenbuurt = df_adressen[(df_adressen['Naam Wijk'] == "Staatsliedenbuurt")]
# df_staatsliedenbuurt.head()
print("adressen: " + str(len(df_staatsliedenbuurt.index)))

1454

array(['Helmersbuurt', 'Overtoomse Sluis', 'Jordaan', ...,
       'Noordelijke IJ-oevers West', 'Noordelijke IJ-oevers West',
       'Noordelijke IJ-oevers West'], dtype=object)

adressen: 8295


## Now for some routing

To get walking distances, we need a routing service. I installed the excellent OSRM project locally with docker via http://router.project-osrm.org/ and used the latest information for
The Netherlands via http://download.geofabrik.de.

OSRM lets you create a matrix that shows the walking duration for a series of points. We can create a 1 by many matrix for each address, and pick out the worm hotel with the shortest walking duration. The OSMR [doesn't return distance](https://github.com/Project-OSRM/osrm-backend/issues/1353), but since walking speed is set at 5 km/h by default and walking penalties are only given for sandy terrain and the like, we can reasonably calculate the distance in meters from the duration information.

I'll first explore how to get the right information from the API, and then create a function which picks the nearest worm hotel for an address, and also prints the walking duration in minutes and distance in meters.


## First some exploration by hand

In [39]:
close_to_my_address = "4.877951499999995,52.3824856"
buurtcompostlocaties = df_buurtcompost.locatie_cleaned.values
buurtcompostlocaties_string = ";".join(buurtcompostlocaties)

profile = "foot"

# URL format for a htpp API call for getting a matrix is http://0.0.0.0:5000/table/v1/walking/
# 52.3548300687;4.7856975691585,52.349415837928;4.7941351619763,52.357223885252?sources=0
# 'sources=0' indicates that we only want the distances from the first set of coordinates
# to all the others. As a test I construct a URL that gives me the walking duration from my
# address to all the worm hotels.
 
testURL_wormhotel_distance_from_my_address = "http://0.0.0.0:5000/table/v1/" + profile + "/" \
                                        + close_to_my_address + ";" \
                                        + buurtcompostlocaties_string \
                                        + "?sources=0"
                      
testURL_wormhotel_distance_from_my_address


# Let's run a request and only select the duration array
import requests
distances_from_testURL = requests.get(testURL_wormhotel_distance_from_my_address).json()["durations"][0]

# This gives us a durations array. The first is 0 and the duration to my own address, so we can remove that:
distances_from_testURL.pop(0)
distances_from_testURL

# Give us the duration and the index of the wormhotel that's closest
print("Seconds I have to walk: " + str(min(distances_from_testURL)))
print("Index of the wormhotel in the wormhotel dataset: " + str(distances_from_testURL.index(min(distances_from_testURL))))

# Let's find out where the nearest wormhotel to my house is! 🎉
df_buurtcompost.iloc[33]

'http://0.0.0.0:5000/table/v1/foot/4.877951499999995,52.3824856;4.888706,52.356007;4.908527,52.349857;4.871422,52.391292;4.945789,52.36213;4.910984,52.382154;4.864346,52.3687925;4.926176,52.3370046;4.971355,52.37154;5.004663,52.345872;4.861766,52.376636;4.923331,52.366302;4.9177,52.351515;4.887118,52.356278;4.887075,52.35694;5.000672,52.353723;4.9758081,52.3612775;4.9457027,52.3681734;4.9939396,52.3572812;4.9520167,52.3340925;4.877403,52.313325;4.9121,52.3511739;4.8247244,52.3604001;4.8815228,52.3514622;4.9442745,52.3920048;4.9464001,52.3773338;4.8999979,52.4053634;4.9392225,52.3594959;4.7711392,52.3800813;4.9461426,52.3742948;4.8538532,52.3676329;4.8515358,52.2839112;4.9365725,52.3703089;4.8761048,52.3900331;4.8761638,52.3813059;4.8487945,52.3403283;4.8980131,52.355381;4.9840798,52.2981711;4.9370124,52.3571305;4.9071004,52.3491685?sources=0'

0

[2499.2,
 3645.8,
 1048.8,
 4231.6,
 2169.1,
 1483.8,
 5273.8,
 7029.6,
 7985.1,
 1387.7,
 2994.6,
 3918.9,
 2450.6,
 2402.2,
 7914.4,
 5714,
 4101.7,
 8331.7,
 6517.4,
 7247.1,
 3579.1,
 3995.3,
 2849.9,
 4062,
 3992,
 4135.3,
 4136.9,
 5867.5,
 3821.1,
 2157.7,
 9035.3,
 3539.6,
 821.1,
 162.2,
 4528,
 2895,
 10753,
 4293,
 3623.6]

Seconds I have to walk: 162.2
Index of the wormhotel in the wormhotel dataset: 33


OBJECTNUMMER                                          42
Straatnaam         hoek jacob catskade - de wittenstraat
Initiatiefnemer                                    hilde
Email                      amsterdam+a209@wormenhotel.nl
Aantal_bewoners                                       15
Soort_afval                                           GF
Gebruik                  Toestemming van initiatiefnemer
Startjaar                                           2017
Foto                                                 NaN
COORDS                       POINT(4.8761638,52.3813059)
LNG                                            4,8761638
LAT                                           52,3813059
Unnamed: 12                                          NaN
locatie_cleaned                     4.8761638,52.3813059
lng                                              4.87616
lat                                              52.3813
Name: 33, dtype: object

^^^^^^

Super cool! Turns out I only have to walk 2.7 minutes to get to a worm hotel! Let's wrap this up in a function so we can encode the whole dataset with this information

## Creating a function

In [40]:
import requests
buurtcompostlocaties = df_buurtcompost.locatie_cleaned.values
buurtcompostlocaties_string = ";".join(buurtcompostlocaties)

def compute_nearest_target_information(cell):
    current_coordinates = cell
    profile = "foot"
    requestURL = "http://0.0.0.0:5000/table/v1/" + profile + "/" \
                                        + current_coordinates + ";" \
                                        + buurtcompostlocaties_string \
                                        + "?sources=0"
    distances = requests.get(requestURL).json()["durations"][0]
    distances.pop(0)
    seconds = min(distances)
    meters = seconds * 1.388888889
    index = distances.index(min(distances))
    buurtcompost_values = df_buurtcompost.iloc[index].values    
    return (seconds, meters, index, buurtcompost_values)




In [41]:
# Test output
compute_nearest_target_information("4.877951499999995,52.3824856")

compute_nearest_target_information("4.8796727,52.3712739")

(162.2,
 225.27777779579998,
 33,
 array([42, 'hoek jacob catskade - de wittenstraat', 'hilde',
        'amsterdam+a209@wormenhotel.nl', 15, 'GF',
        'Toestemming van initiatiefnemer', 2017, nan,
        'POINT(4.8761638,52.3813059)', '4,8761638', '52,3813059', nan,
        '4.8761638,52.3813059', 4.8761637999999996, 52.381305900000001], dtype=object))

(949.3, 1318.4722223277, 5, array([9, 'schimmelstraat 44',
        'stadsboerderij zimmerhoeve, annelijn van amsterdam',
        'info@zimmerhoeve.nl', 0, 'GF', 'Toestemming van initiatiefnemer',
        0, nan, 'POINT(4.864346,52.3687925)', '4,864346', '52,3687925', nan,
        '4.864346,52.3687925', 4.8643460000000003, 52.368792499999998], dtype=object))

## Success!

Neat! Now we can apply this function to the addresses in Amsterdam. First, lets apply this to small amount of people and also time it, so we can see how long our computer needs to work on the whole dataset.

In [14]:
%%time
df_adressen_witten.is_copy = False
df_adressen_witten['Nearest wormhotel information'] = df_adressen_witten['locatie_cleaned'].apply(np.vectorize(compute_nearest_target_information))
df_adressen_witten

NameError: name 'df_adressen_witten' is not defined

Ok, that's 87 seconds, which we need to multiply by 351 to get to the seconds it will take us for the whole set. This would be 8.5 hours.

Let's also convert and time the Staatsliedenbuurt set.

In [15]:
%%time
import numpy as np
df_staatsliedenbuurt.is_copy = False # To surpress SettingWithCopyWarning, see https://stackoverflow.com/questions/38809796/pandas-still-getting-settingwithcopywarning-even-after-using-loc/38810015#38810015
df_staatsliedenbuurt['Nearest wormhotel information'] = df_staatsliedenbuurt['locatie_cleaned'].apply(np.vectorize(compute_nearest_target_information))


NameError: name 'df_staatsliedenbuurt' is not defined

In [16]:
df_staatsliedenbuurt

NameError: name 'df_staatsliedenbuurt' is not defined

It ran 8:34 minutes. Let's get a sample and then run the whole thing at night :)

In [None]:
%%time
df_adressen_sample = df_adressen.sample(n=3000)
df_adressen_sample.is_copy = False
df_adressen_sample['Nearest wormhotel information'] = df_adressen_sample['locatie_cleaned'].apply(
    np.vectorize(compute_nearest_target_information))
df_adressen_sample.to_csv('df_adressen_sample.csv')

In [None]:
%%time
df_adressen.is_copy = False
df_adressen['Nearest wormhotel information'] = df_adressen['locatie_cleaned'].apply(
    np.vectorize(compute_nearest_target_information))
df_adressen.to_csv('df_adressen.csv')
df_adressen.to_excel('df_adressen.xls')


## Add the hotel information as seperate columns

In [None]:
len(df_adressen['nearest_hotel_values'][45])
df_adressen['nearest_hotel_values'][45]

df_buurtcompost.columns

In [17]:
# Let's split it! Value inside df_adressen['Nearest wormhotel information'] are
# 'seconds, meters, index, buurtcompost_values', and buurtcompostvalues are
#  'OBJECTNUMMER', 'Straatnaam', 'Initiatiefnemer', 'Email', 'Aantal_bewoners', 'Soort_afval',
# 'Gebruik', 'Startjaar', 'Foto', 'COORDS', 'LNG', 'LAT', 'Unnamed: 12', 'locatie_cleaned''

df_adressen['nearest_hotel_duration'], df_adressen['nearest_hotel_meters'], df_adressen[
    'nearest_hotel_index'], df_adressen['nearest_hotel_values'] = zip(*df_adressen['Nearest wormhotel information'])

df_adressen['hotel_OBJECTNUMMER'], df_adressen['hotel_Straatnaam'], df_adressen['hotel_Initiatiefnemer'], \
    df_adressen['hotel_Email'], df_adressen['hotel_Aantal_bewoners'], df_adressen['hotel_Soort_afval', ], \
    df_adressen['hotel_Gebruik'], df_adressen['hotel_Startjaar'], df_adressen['hotel_Foto'], \
    df_adressen['hotel_COORDS'], df_adressen['hotel_LNG'], df_adressen['hotel_LAT'], df_adressen['hotel_Unnamed: 12'], \
    df_adressen['hotel_locatie_cleaned'], df_adressen['hotel_lng'], df_adressen['hotel_lat'] = zip(*df_adressen['nearest_hotel_values'])


NameError: name 'df_adressen' is not defined

# Trying things out: let's plot a heatmap

In [18]:
InteractiveShell.ast_node_interactivity = "all"
# df_adressen[df_adressen['Naam gebiedsgerichtwerkengebied'] == "Westerpark"]
df_active_dataset = df_adressen[df_adressen['Naam gebiedsgerichtwerkengebied'] == "Westerpark"]

NameError: name 'df_adressen' is not defined

In [19]:
df_active_dataset

NameError: name 'df_active_dataset' is not defined

In [20]:
import folium
from folium.plugins import HeatMap

In [21]:
max_amount = float(df_active_dataset['nearest_hotel_duration'].max())
hmap = folium.Map(location=[52.3824856,4.877951499999995], zoom_start=16, tiles='stamentoner',)
data = list(zip(df_active_dataset.lat.values, df_active_dataset.lng.values, df_active_dataset['Seconds to nearest wormhotel'].values))

hm_wide = HeatMap(data,
                  min_opacity=0.1,
                  max_val=max_amount,
                  radius=15, blur=12,
                  max_zoom=1,
                  ).add_to(hmap)


hmap



NameError: name 'df_active_dataset' is not defined

Pretty, but not that useful! It seems that a lot of people are far from the hotels. However, zooming in, you can see that the houseboats and other addresses which most likely have one household have very little color. This probably means that many addresses can share the same coordinate! This skews the results, and increases the calculation time. So it's probably a good idea to only keep unique coordinates for our calculation.

## Back to the drawing board: adjusting the addresses dataset to only include unique coordinates


In [None]:
df_adressen_witten['locatie_cleaned'].describe()

Ah! Out of the 1454 in my small neighbourhood set, only 664 rows have a unique coordinate.

In [None]:
df_adressen_witten_deduped = df_adressen_witten.drop_duplicates(subset="locatie_cleaned")
len(df_adressen_witten_deduped)

In [None]:
df_staatsliedenbuurt_deduped = df_staatsliedenbuurt.drop_duplicates(subset="locatie_cleaned")
len(df_staatsliedenbuurt)
len(df_staatsliedenbuurt_deduped)

In [22]:
df_adressen_deduped = df_adressen.drop_duplicates(subset="locatie_cleaned")
len(df_adressen)
len(df_adressen_deduped)

NameError: name 'df_adressen' is not defined

In [23]:
df_active_dataset = df_adressen_deduped[df_adressen_deduped['Naam gebiedsgerichtwerkengebied'] == "Westerpark"]
max_amount = float(df_active_dataset['Seconds to nearest wormhotel'].max())
hmap = folium.Map(location=[52.3824856,4.877951499999995], zoom_start=16, tiles='stamentoner',)
data = list(zip(df_active_dataset.lat.values, df_active_dataset.lng.values, df_active_dataset['Seconds to nearest wormhotel'].values))

hm_wide = HeatMap(data,
                  min_opacity=0.1,
                  max_val=max_amount,
                  radius=15, blur=12,
                  max_zoom=1,
                  ).add_to(hmap)


hmap

NameError: name 'df_adressen_deduped' is not defined

A bit better, but still not ideal. If there are no houses in a certain place, it looks like it's closeby to a hotel. I think we need markers and bins for this. Maybe calculate an average score per neighbourhood and cloropleth it!

In [24]:
# Example of a single cell
df_adressen_witten['Nearest wormhotel information'].iloc[0]
# Reach one of the arrays like this
df_adressen_witten['Nearest wormhotel information'].iloc[0][0]
# Reach inside the values of the hotel like this
df_adressen_witten['Nearest wormhotel information'].iloc[0][3][1]

NameError: name 'df_adressen_witten' is not defined

In [25]:
# Let's see how many hotels there are nearby
df_staatsliedenbuurt.nearest_hotel_index.unique()

NameError: name 'df_staatsliedenbuurt' is not defined

# Another approach: map Isochrones around the hotels
By looking up how far people can walk around all hotels, we can compute where it's most optimal to place a wormhotel.

In [26]:
InteractiveShell.ast_node_interactivity = "last_expr"

buurtcompostlocaties_markers = list(zip(df_buurtcompost.lat.values, df_buurtcompost.lng.values, df_buurtcompost.Straatnaam))
hmap = folium.Map(location=[52.3824856,4.877951499999995], zoom_start=13, tiles='stamentoner',)

feature_group = folium.FeatureGroup("Locations")
for lat, lon, name in buurtcompostlocaties_markers:
    feature_group.add_child(folium.Marker(location=[lat,lon],popup=name))

my_house = folium.FeatureGroup("Locations")
my_house.add_child(folium.Marker([52.381994, 4.876480], popup='My house (approx.)', icon=folium.Icon(color='red',icon='info-sign')))    
    
hmap.add_child(my_house)
hmap.add_child(feature_group)



<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75400>

<folium.map.FeatureGroup at 0x110c75be0>

I have not found a good way to plot Isochrones with OSRM, so I'll use the [Openrouteservice](https://openrouteservice.org/) instead. It only allows 5 calls at a time for their free tier, so this will be a bit clunky

In [27]:
buurtcompostlocaties_arrays
InteractiveShell.ast_node_interactivity = "all"
len(buurtcompostlocaties_arrays) / 5 # means I got to run this 8 times
buurtcompostlocaties_arrays_5 = [buurtcompostlocaties_arrays[i:i+5] for i  in range(0, len(buurtcompostlocaties_arrays), 5)]
len(buurtcompostlocaties_arrays_5) # This gives me 8 arrays of 3-5 entries which I can use to request the URLs
buurtcompostlocaties_arrays_5

NameError: name 'buurtcompostlocaties_arrays' is not defined

In [None]:
import geojson


def create_isochrone_json(range_seconds, filename):

    isochrone_features = []

    for i in range(len(buurtcompostlocaties_arrays_5)):
        coords = '|'.join(buurtcompostlocaties_arrays_5[i].flatten())
        requestURL = 'https://private-anon-e7e15c7342-openrouteservice.apiary-proxy.com/isochrones' + \
        '?api_key=58d904a497c67e00015b45fc892c214578cd4fae9ea5f36e23e8cb2f&profile=foot-walking' + \
        '&range=' + range_seconds + '&location_type=destination&locations=' + coords
        features = requests.get(requestURL).json()['features']
        isochrone_features.append(features)

        isochrone_featurecollection = {
            "type": "FeatureCollection",
            "features": []
        }

    for i in range(len(isochrone_features)):
                   isochrone_featurecollection['features'].extend(isochrone_features[i])

    with open(filename, 'w') as file:
        geojson.dump(isochrone_featurecollection, file, indent=4, sort_keys=False)

create_isochrone_json('300', 'isochrone_featurecollection_300.geojson')
create_isochrone_json('600', 'isochrone_featurecollection_600.geojson')

Cool! Let's plot it on the map!

In [None]:
InteractiveShell.ast_node_interactivity = "last_expr"

buurtcompostlocaties_markers = list(zip(df_buurtcompost.lat.values, df_buurtcompost.lng.values, df_buurtcompost.Straatnaam))
hmap = folium.Map(location=[52.3824856,4.877951499999995], zoom_start=13, tiles='stamentoner',)

feature_group = folium.FeatureGroup("Locations")
for lat, lon, name in buurtcompostlocaties_markers:
    feature_group.add_child(folium.Marker(location=[lat,lon],popup=name))
    hmap.add_child(feature_group)

folium.GeoJson(open("isochrone_featurecollection_300.geojson",encoding = "utf-8-sig").read(), name='geojson').add_to(hmap)

hmap

How about a ten minute walk?

In [None]:
InteractiveShell.ast_node_interactivity = "last_expr"

buurtcompostlocaties_markers = list(zip(df_buurtcompost.lat.values, df_buurtcompost.lng.values, df_buurtcompost.Straatnaam))
hmap = folium.Map(location=[52.3824856,4.877951499999995], zoom_start=13, tiles='stamentoner',)

feature_group = folium.FeatureGroup("Locations")
for lat, lon, name in buurtcompostlocaties_markers:
    feature_group.add_child(folium.Marker(location=[lat,lon],popup=name))
    hmap.add_child(feature_group)

folium.GeoJson(open("isochrone_featurecollection_600.geojson",encoding = "utf-8-sig").read(), name='geojson').add_to(hmap)

hmap

In [None]:
df_adressen