Bachelor thesis - Richard Borschke - 7337876 - University of Cologne - A spatio-temporal analysis of usage patterns in free-floating shared mobility


# Data Supplementation

This notebook contains the supplementation of data, needed for analysis. Following steps are performed:
* Data loading of prepared trip files
* Assignment of time bucket of trip
* Calculating distance to city center
* POI type supplementation (WHICH TYPES??)


time (18.11 to 29.2  +  05.11.) --> 5.11. excluded

data set misses 2 days (16. and 15. 01.)

car misses 4 days (06. to 09.12.)

### Imports

In [28]:
import pandas as pd
import glob
from haversine import haversine 

import numpy as np
from sklearn.neighbors import BallTree

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

import osmnx as ox
ox.config(log_console=True, use_cache=True)
ox.__version__

#import seaborn as sns

'0.16.2'

### Load Data

In [2]:
# load monthly data of mode based on selected months
def load_data(mode, months):
    first_read = True
    for month in months:
        data_month = pd.concat([pd.read_csv(file) for file in glob.glob('Data/Modes/data_prepared_{}_{}.csv'.format(mode, month))], ignore_index = True)
        if (first_read):
            data_full = data_month
            first_read = False
        else:
            data_full = pd.concat([data_full, data_month], ignore_index=True)
    return data_full

# load full data of mode
def load_data_mode_full(mode):
    data_mode_full = pd.read_csv('Data/Modes/data_prepared_full_{}.csv'.format(mode))
    return data_mode_full

In [3]:
# define months for data loading (YYYYMM)
months = ['201911', '201912', '202001', '202002']
#load data by months
load_monthly_data = False
if (load_monthly_data):
    data_car = load_data('car', months)
    data_bicycle = load_data('bicycle', months)
    data_scooter = load_data('scooter', months)
    data_full = pd.concat([data_car, data_bicycle, data_scooter], ignore_index=True)

In [33]:
# read in desired data
# set True if full data set load is desired
load_full_data = True
if (load_full_data):
    data_car = load_data_mode_full('car')
    data_bicycle = load_data_mode_full('bicycle')
    data_scooter = load_data_mode_full('scooter')
    data_full = pd.concat([data_car, data_bicycle, data_scooter], ignore_index=True)

### Time Bucket Assignment

### Calculating Distance to City Center

In [5]:
# calculate haversine distance to city center in m
def calculate_distance_to_city_center(data_full):
    data_full_d = data_full.copy()
    data_full_d['dist_center_start'] = data_full_d.apply(lambda r: int(haversine((r['latitude_start'], r['longitude_start']), (50.941724380890186, 6.958446824087053))*1000), axis=1) #convert Km to meter
    data_full_d['dist_center_end'] = data_full_d.apply(lambda r: int(haversine((r['latitude_end'], r['longitude_end']), (50.941724380890186, 6.958446824087053))*1000), axis=1) #convert Km to meter
    return data_full_d   

In [6]:
# calculate distance to city center (Dom) for both origin and destination
data_full_dist = calculate_distance_to_city_center(data_full)

In [7]:
data_full_dist.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 410295 entries, 0 to 134387
Data columns (total 23 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   id                 410295 non-null  object 
 1   provider           410295 non-null  object 
 2   vehicleType        410295 non-null  object 
 3   date_start         410295 non-null  object 
 4   time_start         410295 non-null  int64  
 5   date_end           410295 non-null  object 
 6   time_end           410295 non-null  int64  
 7   year               410295 non-null  int64  
 8   month              410295 non-null  int64  
 9   weekday            410295 non-null  int64  
 10  hour               410295 non-null  int64  
 11  weekend            410295 non-null  int64  
 12  longitude_start    410295 non-null  float64
 13  latitude_start     410295 non-null  float64
 14  longitude_end      410295 non-null  float64
 15  latitude_end       410295 non-null  float64
 16  co

### POI Type Supplementation

* Aeroway
* Arts, culture and entertainment
* Education
* Finance
* Food and drink
* Healthcare
* History
* Leisure
* Office
* Sport
* Shop
* Tourism
* Transporation

In [27]:
# prepare POI data for assignment to trip data
def prepare_POI_data(POI_data):
    # get names of indexes for which column element_type does not equal node
    indexNames = POI_data[ POI_data['element_type'] != 'node' ].index
    # delete these row indexes from dataFrame
    POI_data.drop(indexNames , inplace=True)
    # drop all columns except certain ones
    POI_data.drop(POI_data.columns.difference(['unique_id','osmid','geometry','name','amenity']), 1, inplace=True)
    # create longitude and latitude columns
    POI_data['longitude'] = POI_data.geometry.x
    POI_data['latitude'] = POI_data.geometry.y     
    # create used column (check if POI is used as neighbor of trip origin or destination)
    POI_data['used'] = 0
    return POI_data

# supplement data with POI types
def supplement_data(data_mode):
    
    return data_mode

In [9]:
place = 'Köln, Germany'

In [12]:
# aeroway POI for transportation but different radius for assignment (handeled seperately)
tags={'aeroway': 'terminal'}
# retrieving openstreetmap data and preparing data
POI_aero = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_aero

  and should_run_async(code)


1


Unnamed: 0,unique_id,osmid,name,geometry,longitude,latitude,used
0,node/27296045,27296045,Terminal 2 Fluggastbereich D,POINT (7.11970 50.88054),7.1197,50.8805,0


In [41]:
# Creates new columns converting coordinate degrees to radians.
for column in POI_education[["latitude", "longitude"]]:
    rad = np.deg2rad(POI_education[column].values)
    POI_education[f'{column}_rad'] = rad
    
POI_education

Unnamed: 0,unique_id,osmid,geometry,amenity,name,longitude,latitude,used,latitude_rad,longitude_rad
0,node/75874741,75874741,POINT (6.92635 50.95670),school,Lauder-Morijah-Grundschule,6.9263,50.9567,0,0.889362,0.120887
1,node/160695366,160695366,POINT (6.95199 50.94475),library,Erzbischöfliche Diözesan- und Dombibliothek,6.9520,50.9448,0,0.889154,0.121335
2,node/215423659,215423659,POINT (6.90924 50.96505),school,Montessori-Grundschule Ossendorf,6.9092,50.9651,0,0.889509,0.120588
3,node/221175135,221175135,POINT (6.89374 50.97834),school,Bildungszentrum Butzweiler Hof,6.8937,50.9783,0,0.889739,0.120318
4,node/223292416,223292416,POINT (6.92708 50.97106),kindergarten,,6.9271,50.9711,0,0.889614,0.120901
...,...,...,...,...,...,...,...,...,...,...
303,node/8126692679,8126692679,POINT (6.87622 50.94512),school,Anna-Freud-Förderschule,6.8762,50.9451,0,0.889160,0.120012
304,node/8156001437,8156001437,POINT (6.94488 50.93418),music_school,drummer's focus,6.9449,50.9342,0,0.888969,0.121211
305,node/8188016679,8188016679,POINT (6.88193 50.95509),kindergarten,Kindergruppe Sonnenstrahlen e.V.,6.8819,50.9551,0,0.889334,0.120112
306,node/8190632569,8190632569,POINT (6.94920 50.93439),library,Stadtbibliothek Köln,6.9492,50.9344,0,0.888973,0.121286


In [34]:
locations_b = data_full.copy()
for column in locations_b[["latitude_start", "longitude_start"]]:
    rad = np.deg2rad(locations_b[column].values)
    locations_b[f'{column}_rad'] = rad
locations_b

Unnamed: 0,id,provider,vehicleType,date_start,time_start,date_end,time_end,year,month,weekday,...,latitude_start,longitude_end,latitude_end,coordinates_start,coordinates_end,distance,duration,speed,latitude_start_rad,longitude_start_rad
0,ZnJlZWZsb2F0OjpjYXIyZ286OldNRTQ1MzM0MjFLMzE5MTQy,car2go,car,2019-11-18,1300,2019-11-18,1310,2019,11,0,...,50.947880,6.890660,50.947170,"(50.9479, 6.887)","(50.9472, 6.8907)",271.635363,595.0,1.643508,0.889208,0.120200
1,ZnJlZWZsb2F0OjpjYXIyZ286OldNRTQ1MzM0MjFLMzIxNTM2,car2go,car,2019-11-18,1300,2019-11-18,1320,2019,11,0,...,50.938210,6.944940,50.941190,"(50.9382, 6.9543)","(50.9412, 6.9449)",734.795721,1197.0,2.209912,0.889039,0.121375
2,ZnJlZWZsb2F0OjpjYXIyZ286OldNRTQ1MzM0MjFLMzcyNDA3,car2go,car,2019-11-18,1300,2019-11-18,1320,2019,11,0,...,50.949150,7.014890,50.966520,"(50.9492, 6.9852)","(50.9665, 7.0149)",2837.091541,1197.0,8.532606,0.889230,0.121915
3,ZnJlZWZsb2F0OjpjYXIyZ286OldNRTQ1MzM0MjFLMjkwNTc5,car2go,car,2019-11-18,1300,2019-11-18,1325,2019,11,0,...,50.946330,6.907470,50.951340,"(50.9463, 6.9171)","(50.9513, 6.9075)",873.828450,1496.0,2.102796,0.889181,0.120726
4,ZnJlZWZsb2F0OjpjYXIyZ286OldNRTQ1MzM0MjFLMjkzNjQy,car2go,car,2019-11-18,1300,2019-11-18,1325,2019,11,0,...,50.970100,6.996920,50.942590,"(50.9701, 7.0187)","(50.9426, 6.9969)",3417.963314,1496.0,8.225045,0.889596,0.122499
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410290,YmlrZTo6dGllcjo6YTFlMmYzZjEtNjkyZS00NjA4LWE5YT...,tier,scooter,2020-02-29,2340,2020-02-29,2350,2020,2,5,...,50.940467,6.956911,50.938427,"(50.9405, 6.9601)","(50.9384, 6.9569)",316.302763,600.0,1.897817,0.889079,0.121476
410291,YmlrZTo6dGllcjo6OGUxYjAwNDUtMDY3NS00Mzc4LTk3Yz...,tier,scooter,2020-02-29,2340,2020-02-29,2355,2020,2,5,...,50.915908,6.944665,50.904921,"(50.9159, 6.9274)","(50.9049, 6.9447)",1720.132594,902.0,6.865274,0.888650,0.120906
410292,YmlrZTo6dGllcjo6ZGE1NmNkMDgtN2Q5ZS00YzE0LWI2OT...,tier,scooter,2020-02-29,2340,2020-02-29,2355,2020,2,5,...,50.928810,6.928539,50.920578,"(50.9288, 6.9384)","(50.9206, 6.9285)",1148.006312,902.0,4.581843,0.888875,0.121098
410293,YmlrZTo6dGllcjo6NzRlY2JjZTYtZWQ3Yi00ZGY3LWJmMG...,tier,scooter,2020-02-29,2345,2020-02-29,2350,2020,2,5,...,50.964634,6.969438,50.967416,"(50.9646, 6.9751)","(50.9674, 6.9694)",501.354847,300.0,6.016258,0.889501,0.121738


In [43]:
# Takes the first group's latitude and longitude values to construct
# the ball tree.
ball = BallTree(POI_education[["latitude_rad", "longitude_rad"]].values, metric='haversine')
# The amount of neighbors to return.
k = 1
# Executes a query with the second group. This will also return two
# arrays.
distances, indices = ball.query(locations_b[["latitude_start_rad", "longitude_start_rad"]].values, k = k)

len(distances)
distances


array([[1.54472109e-05],
       [2.93378719e-05],
       [1.25208218e-04],
       ...,
       [2.60971898e-05],
       [2.38431841e-05],
       [2.70311393e-05]])

In [13]:
# arts, culture and entertainment POI
tags = {'amenity': ['arts_centre','cinema', 'brothel', 'casino', 'community_centre', 'gambling', 'love_hotel', 'nightclub', 'planetarium', 'public_bookcase', 'social_centre', 'stripclub', 'studio', 'swingerclub', 'theatre']}
POI_art_culture_entertainment = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_art_culture_entertainment

  and should_run_async(code)


205


Unnamed: 0,unique_id,osmid,amenity,name,geometry,longitude,latitude,used
0,node/54020150,54020150,theatre,Atelier Theater,POINT (6.93548 50.93259),6.9355,50.9326,0
1,node/247382429,247382429,planetarium,Planetarium und Sternwarte Köln,POINT (6.95649 50.96660),6.9565,50.9666,0
2,node/256221801,256221801,cinema,Metropolis,POINT (6.95821 50.95108),6.9582,50.9511,0
3,node/257905592,257905592,theatre,Gloria Theater,POINT (6.94495 50.93749),6.9449,50.9375,0
4,node/258183467,258183467,community_centre,Don-Bosco-Club Köln-Mülheim,POINT (7.01081 50.97438),7.0108,50.9744,0


In [15]:
# education POI
tags = {'amenity': ['college','driving_school', 'kindergarten', 'language_school', 'library', 'music_school', 'school', 'university']}
POI_education = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_education

  and should_run_async(code)


308


Unnamed: 0,unique_id,osmid,geometry,amenity,name,longitude,latitude,used
0,node/75874741,75874741,POINT (6.92635 50.95670),school,Lauder-Morijah-Grundschule,6.9263,50.9567,0
1,node/160695366,160695366,POINT (6.95199 50.94475),library,Erzbischöfliche Diözesan- und Dombibliothek,6.9520,50.9448,0
2,node/215423659,215423659,POINT (6.90924 50.96505),school,Montessori-Grundschule Ossendorf,6.9092,50.9651,0
3,node/221175135,221175135,POINT (6.89374 50.97834),school,Bildungszentrum Butzweiler Hof,6.8937,50.9783,0
4,node/223292416,223292416,POINT (6.92708 50.97106),kindergarten,,6.9271,50.9711,0
...,...,...,...,...,...,...,...,...
303,node/8126692679,8126692679,POINT (6.87622 50.94512),school,Anna-Freud-Förderschule,6.8762,50.9451,0
304,node/8156001437,8156001437,POINT (6.94488 50.93418),music_school,drummer's focus,6.9449,50.9342,0
305,node/8188016679,8188016679,POINT (6.88193 50.95509),kindergarten,Kindergruppe Sonnenstrahlen e.V.,6.8819,50.9551,0
306,node/8190632569,8190632569,POINT (6.94920 50.93439),library,Stadtbibliothek Köln,6.9492,50.9344,0


In [16]:
# finance POI
tags = {'amenity': ['atm','bank', 'bureau_de_change']}
POI_finance = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_finance

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,amenity,name,geometry,longitude,latitude,used
0,node/54749669,54749669,bank,Sparkasse KölnBonn,POINT (6.94095 50.93541),6.9409,50.9354,0
1,node/196170706,196170706,bank,Sparkasse,POINT (6.91989 50.95370),6.9199,50.9537,0
2,node/224445220,224445220,bank,Sparkasse,POINT (6.90095 50.95752),6.9009,50.9575,0
3,node/230226555,230226555,bank,Sparkasse,POINT (6.89975 50.96789),6.8998,50.9679,0
4,node/232284761,232284761,atm,Sparkasse KölnBonn,POINT (6.95012 50.96001),6.9501,50.9600,0
...,...,...,...,...,...,...,...,...
358,node/8131880605,8131880605,atm,,POINT (7.00520 50.96093),7.0052,50.9609,0
359,node/8132374205,8132374205,atm,,POINT (6.93897 50.93838),6.9390,50.9384,0
360,node/8132374206,8132374206,atm,,POINT (6.93907 50.93876),6.9391,50.9388,0
361,node/8132435066,8132435066,atm,,POINT (6.94059 50.93533),6.9406,50.9353,0


In [17]:
# food and drink POI
tags = {'amenity': ['bar','biergarten','cafe','drinking_water','fast_food','food_court','ice_cream','pub','restaurant', 'internet_cafe']}
POI_food_drink = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_food_drink

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,amenity,name,geometry,longitude,latitude,used
0,node/359460,359460,fast_food,Papa-Pizza,POINT (6.94125 50.91559),6.9412,50.9156,0
1,node/359832,359832,restaurant,Campus,POINT (6.96375 50.90516),6.9638,50.9052,0
2,node/39606803,39606803,fast_food,Burger King,POINT (6.91508 50.94915),6.9151,50.9492,0
3,node/54020544,54020544,biergarten,Biergarten Rathenauplatz Veedelstreff,POINT (6.93697 50.93134),6.9370,50.9313,0
4,node/55441368,55441368,restaurant,Blauer König,POINT (7.00649 50.94040),7.0065,50.9404,0
...,...,...,...,...,...,...,...,...
2840,node/8143715869,8143715869,restaurant,Jonny Turista,POINT (6.94539 50.93330),6.9454,50.9333,0
2841,node/8182655788,8182655788,restaurant,alla Pappa,POINT (6.95854 50.92428),6.9585,50.9243,0
2842,node/8235340508,8235340508,restaurant,DOWN TO EARTH NOODLES,POINT (6.92774 50.92020),6.9277,50.9202,0
2843,node/8254554285,8254554285,pub,Kupferkanne,POINT (6.96652 50.97757),6.9665,50.9776,0


In [18]:
# healthcare POI
tags = {'amenity': ['clinic','dentist', 'doctors', 'hospital', 'nursing_home', 'pharmacy', 'social_facility', 'veterinary'],
       'emergency': ['emergency_ward_entrance']}
POI_healthcare = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_healthcare

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,amenity,name,geometry,longitude,latitude,used
0,node/242516129,242516129,pharmacy,Severin-Apotheke,POINT (6.95884 50.92396),6.9588,50.9240,0
1,node/245988507,245988507,doctors,Andreas Koch,POINT (6.92525 50.95527),6.9253,50.9553,0
2,node/246271893,246271893,pharmacy,Apotheke am Bilderstöckchen,POINT (6.92849 50.97136),6.9285,50.9714,0
3,node/249759181,249759181,pharmacy,Linden Apotheke,POINT (6.85879 51.06506),6.8588,51.0651,0
4,node/259198454,259198454,pharmacy,Gürzenich-Apotheke,POINT (6.95712 50.93615),6.9571,50.9362,0
...,...,...,...,...,...,...,...,...
698,node/8241405142,8241405142,,,POINT (6.92405 50.93183),6.9240,50.9318,0
699,node/8241405144,8241405144,,,POINT (6.92419 50.93169),6.9242,50.9317,0
700,node/8241405145,8241405145,,,POINT (6.92413 50.93184),6.9241,50.9318,0
701,node/8241405146,8241405146,,,POINT (6.92418 50.93174),6.9242,50.9317,0


In [19]:
# history POI
tags = {'historic': True}
POI_history = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_history

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,geometry,name,amenity,longitude,latitude,used
0,node/28121883,28121883,POINT (6.95713 50.94129),,,6.9571,50.9413,0
1,node/177488956,177488956,POINT (6.97035 50.93787),Römisches Osttor,,6.9703,50.9379,0
2,node/267352592,267352592,POINT (6.95923 50.94143),St. Maria ad Gradus,,6.9592,50.9414,0
3,node/292400378,292400378,POINT (6.96873 50.93823),Kürassier Denkmal,,6.9687,50.9382,0
4,node/298083386,298083386,POINT (6.96928 50.92064),Hafenkran 31a,,6.9693,50.9206,0
...,...,...,...,...,...,...,...,...
2052,node/8141400510,8141400510,POINT (6.91091 50.93676),Alexander Weinberg,,6.9109,50.9368,0
2053,node/8141400511,8141400511,POINT (6.91091 50.93676),Johanna Weinberg,,6.9109,50.9368,0
2054,node/8141400512,8141400512,POINT (6.91091 50.93676),Ernst Jacob,,6.9109,50.9368,0
2055,node/8158308957,8158308957,POINT (6.98733 50.95379),Miriam,,6.9873,50.9538,0


In [20]:
# leisure POI
tags = {'leisure': True}
POI_leisure = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_leisure

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,geometry,name,amenity,longitude,latitude,used
0,node/28122271,28122271,POINT (6.96900 50.94101),Kletteranlage Hohenzollernbrücke,,6.9690,50.9410,0
1,node/97753701,97753701,POINT (6.92603 50.96285),,,6.9260,50.9628,0
2,node/98180411,98180411,POINT (6.92586 50.96427),,,6.9259,50.9643,0
3,node/137028641,137028641,POINT (6.93278 50.94508),,,6.9328,50.9451,0
4,node/238028510,238028510,POINT (6.90642 50.97715),,,6.9064,50.9771,0
...,...,...,...,...,...,...,...,...
722,node/8158366351,8158366351,POINT (6.91755 50.94653),,,6.9175,50.9465,0
723,node/8175168217,8175168217,POINT (6.90674 50.96552),,,6.9067,50.9655,0
724,node/8187767537,8187767537,POINT (6.93928 50.95491),Bogenlust GbR Eventlocation,,6.9393,50.9549,0
725,node/8238174660,8238174660,POINT (6.94805 50.97986),Außenterrasse,,6.9481,50.9799,0


In [21]:
# office POI
tags = {'office': True}
POI_office = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_office

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,name,geometry,amenity,longitude,latitude,used
0,node/285569148,285569148,ampido,POINT (6.93751 50.94191),,6.9375,50.9419,0
1,node/297870881,297870881,Edelgrün,POINT (6.92405 50.94663),cafe,6.9241,50.9466,0
2,node/304561103,304561103,align media,POINT (6.92214 50.95597),,6.9221,50.9560,0
3,node/306136792,306136792,GIGATRONIK Köln GmbH,POINT (6.92347 50.97676),,6.9235,50.9768,0
4,node/332418979,332418979,Lieferando,POINT (6.91732 50.95049),,6.9173,50.9505,0
...,...,...,...,...,...,...,...,...
771,node/8219835707,8219835707,Leidens & Effert,POINT (7.09764 50.88226),,7.0976,50.8823,0
772,node/8219835708,8219835708,XNC GmbH,POINT (7.09746 50.88227),,7.0975,50.8823,0
773,node/8242684660,8242684660,ms Immobilien,POINT (6.88192 50.95550),,6.8819,50.9555,0
774,node/8242684664,8242684664,AXA - Butz&Hauke,POINT (6.88138 50.95542),,6.8814,50.9554,0


In [22]:
# sport POI
tags = {'sport': True}
POI_sport = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_sport

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,name,geometry,amenity,longitude,latitude,used
0,node/28122271,28122271,Kletteranlage Hohenzollernbrücke,POINT (6.96900 50.94101),,6.9690,50.9410,0
1,node/98180411,98180411,,POINT (6.92586 50.96427),,6.9259,50.9643,0
2,node/246271888,246271888,Olympia Sporthalle,POINT (6.94539 50.96193),,6.9454,50.9619,0
3,node/256542878,256542878,,POINT (6.95317 50.96213),,6.9532,50.9621,0
4,node/256742960,256742960,,POINT (6.95839 50.95928),,6.9584,50.9593,0
...,...,...,...,...,...,...,...,...
274,node/8137886073,8137886073,,POINT (6.87505 50.95417),,6.8750,50.9542,0
275,node/8185140561,8185140561,Bogenschule Köln,POINT (6.87429 50.98442),,6.8743,50.9844,0
276,node/8187767537,8187767537,Bogenlust GbR Eventlocation,POINT (6.93928 50.95491),,6.9393,50.9549,0
277,node/8233414558,8233414558,Cologne Dartshop,POINT (6.95101 50.97378),,6.9510,50.9738,0


In [23]:
# shop POI
tags = {'shop': True}
POI_shop = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_shop

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,name,geometry,amenity,longitude,latitude,used
0,node/28123308,28123308,Eckert,POINT (6.97405 50.94046),,6.9741,50.9405,0
1,node/53980042,53980042,JET-Tankstelle,POINT (6.90746 50.94506),fuel,6.9075,50.9451,0
2,node/55441040,55441040,Netto,POINT (7.00814 50.94087),,7.0081,50.9409,0
3,node/55448627,55448627,,POINT (7.00658 50.94038),,7.0066,50.9404,0
4,node/95083415,95083415,Kamps,POINT (6.92143 50.95448),,6.9214,50.9545,0
...,...,...,...,...,...,...,...,...
5241,node/8233414558,8233414558,Cologne Dartshop,POINT (6.95101 50.97378),,6.9510,50.9738,0
5242,node/8242491620,8242491620,Doña pelos,POINT (6.95789 50.95305),,6.9579,50.9531,0
5243,node/8242684654,8242684654,Mercato Olio e Vino,POINT (6.88569 50.95537),,6.8857,50.9554,0
5244,node/8253864159,8253864159,Cut World,POINT (6.92404 50.94690),,6.9240,50.9469,0


In [24]:
# tourism POI
tags = {'tourism': True}
POI_tourism = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_tourism

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,geometry,name,amenity,longitude,latitude,used
0,node/28121900,28121900,POINT (6.96862 50.94108),,,6.9686,50.9411,0
1,node/36752479,36752479,POINT (6.90365 50.93283),,,6.9036,50.9328,0
2,node/78605265,78605265,POINT (6.95024 50.94056),NS-Dokumentationszentrum,,6.9502,50.9406,0
3,node/215210722,215210722,POINT (6.89459 50.98739),Coloneum,,6.8946,50.9874,0
4,node/246466024,246466024,POINT (6.97824 50.96344),,,6.9782,50.9634,0
...,...,...,...,...,...,...,...,...
707,node/8197223417,8197223417,POINT (6.83737 50.93542),,,6.8374,50.9354,0
708,node/8220385917,8220385917,POINT (6.83292 50.96557),,,6.8329,50.9656,0
709,node/8231513224,8231513224,POINT (6.96884 50.95722),,,6.9688,50.9572,0
710,node/8231513225,8231513225,POINT (6.97382 50.95592),,,6.9738,50.9559,0


In [25]:
# transportation POI
tags = {'amenity': ['bicycle_rental','boat_rental', 'boat_sharing', 'bus_station', 'car_rental', 'car_sharing', 'ferry_terminal', 'taxi'],
       'public_transport':['station'],
       'railway':['platform', 'station', 'tram_stop', 'subway_entrance']}
POI_transport = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
POI_transport

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,amenity,name,geometry,longitude,latitude,used
0,node/359829,359829,car_rental,Starcar Autovermietung,POINT (6.96395 50.90492),6.9640,50.9049,0
1,node/359831,359831,car_sharing,,POINT (6.96358 50.90565),6.9636,50.9057,0
2,node/361716,361716,,Eifelplatz,POINT (6.94351 50.92335),6.9435,50.9233,0
3,node/28122005,28122005,,Heumarkt,POINT (6.95992 50.93570),6.9599,50.9357,0
4,node/28122013,28122013,,Deutzer Freiheit,POINT (6.97137 50.93809),6.9714,50.9381,0
...,...,...,...,...,...,...,...,...
833,node/7022705866,7022705866,,Poststraße,POINT (6.94992 50.93205),6.9499,50.9320,0
834,node/7110567529,7110567529,car_rental,RKG Autovermietung,POINT (6.98132 50.92308),6.9813,50.9231,0
835,node/7334512627,7334512627,bicycle_rental,Charles-de-Gaulle Platz,POINT (6.97234 50.94165),6.9723,50.9417,0
836,node/7673526493,7673526493,,Arnoldshöhe,POINT (6.96700 50.89463),6.9670,50.8946,0


In [12]:
# load POI type data sets

### Save data sets

In [None]:
# save data by mode and months
def save_supplemented_data(months, data_supplemented):
    for month in months:
        # extract month, year and mode
        only_month = int(month[4:])
        only_year = int(month[:4])
        mode_name_file = data_supplemented['vehicleType'].iloc[0]
        # create dataframe and save it as csv file
        split_cond = [(data_supplemented['month'] == only_month) & (data_supplemented['year'] == only_year)]
        data_supplemented[split_cond[0]].to_csv('Data/Modes/data_supplemented_{}_{}.csv'.format(mode_name_file, month), index = False)

# save data by mode only
def save_supplemented_data_full(data_supplemented):
    mode_name_file = data_supplemented['vehicleType'].iloc[0]
    # create dataframe and save it as csv file
    data_supplemented.to_csv('Data/Modes/data_supplemented_full_{}.csv'.format(mode_name_file), index = False)

In [24]:
# save supplemented data sets based on selected months
# split data by month to avoid file size > 100 MB due to GitHub rules
# set True if saving is desired
save_data = False
if (save_data):
    save_supplemented_data(months, data_car)
    save_supplemented_data(months, data_bicycle)
    save_supplemented_data(months, data_scooter)

In [25]:
# save supplemented data sets in one file per mode
# set True if saving is desired
save_data_full = False
if (save_data_full):
    save_supplemented_data_full(data_car)
    save_supplemented_data_full(data_bicycle)
    save_supplemented_data_full(data_scooter)

### Descriptive statistics

### Test

In [26]:
data_bicycle.isnull().values.any()

False

In [27]:
len(data_car[(data_car['weekday'] == 0) & (data_car['hour'] == 2)])

229

In [28]:
type(data_full["time_start"])

  and should_run_async(code)


pandas.core.series.Series

In [29]:
data_car['provider'].unique()

array(['car2go'], dtype=object)

In [30]:
data_car['duration'].min()

234.0

In [31]:
data_car['coordinates_start'][0]

'(50.9479, 6.887)'