Bachelor thesis - 7337876 - University of Cologne - A spatio-temporal analysis of usage patterns in free-floating shared mobility


# Data Supplementation

This notebook contains the supplementation of data, needed for analysis. Following steps are performed:
* Data loading of prepared trip files
* Assignment of time basket of trip (NOT needed)
* Calculating distance to city center
* POI type supplementation
* Save supplemented data and POI data


### Imports

In [1]:
import pandas as pd
import glob
from haversine import haversine 

import numpy as np
from sklearn.neighbors import BallTree

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

import osmnx as ox
ox.config(log_console=True, use_cache=True)
ox.__version__

'0.16.2'

### Load Data

In [2]:
# load monthly data of mode based on selected months
def load_data(mode, months):
    first_read = True
    
    for month in months:
        data_month = pd.concat([pd.read_csv(file) for file in glob.glob('Data/Modes/data_prepared_{}_{}.csv'.format(mode, month))], ignore_index = True)
        if (first_read):
            data_full = data_month
            first_read = False
        else:
            data_full = pd.concat([data_full, data_month], ignore_index=True)
    
    return data_full

# load full data of mode
def load_data_mode_full(mode):
    data_mode_full = pd.read_csv('Data/Modes/data_prepared_full_{}.csv'.format(mode))
    
    return data_mode_full

In [3]:
# define months for data loading (YYYYMM)
months = ['201911', '201912', '202001', '202002']

#load data by months
load_monthly_data = False

if (load_monthly_data):
    data_car = load_data('car', months)
    data_bicycle = load_data('bicycle', months)
    data_scooter = load_data('scooter', months)
    data_full = pd.concat([data_car, data_bicycle, data_scooter], ignore_index=True)

In [4]:
# read in desired data
# set True if full data set load is desired
load_full_data = True

if (load_full_data):
    data_car = load_data_mode_full('car')
    data_bicycle = load_data_mode_full('bicycle')
    data_scooter = load_data_mode_full('scooter')
    data_full = pd.concat([data_car, data_bicycle, data_scooter], ignore_index=True)

### Time Basket Assignment

Time baskets / Day of Week / Time of Day
* Morning peak time / Mon - Fri / 5:00 - 8:59
* Weekday daytime / Mon - Fri / 9:00 - 15:59
* Evening peak time / Mon - Fri / 16:00 - 18:59
* Weekday evening / Mon - Fri / 19:00 - 22:59
* Weekday nighttime / Mon - Fri / 23:00 - 04:59


* Weekend daytime / Sat & Sun / 6:00 - 17:59
* Weekend evening / Sat & Sun / 18:00 - 22:59
* Weekend nighttime / Sat & Sun / 23:00 - 05:59

In [5]:
def create_time_baskets(df):
    # create time basket column
    df['time_basket'] = 0
    # assign time baskets
    # for weekday time baskets
    df['time_basket'] = np.where(((df['weekend'] == 0) & (df['hour']>=5) & (df['hour']<9)),'morning_peak_time', df['time_basket'])
    df['time_basket'] = np.where(((df['weekend'] == 0) & (df['hour']>=9) & (df['hour']<16)),'weekday_daytime', df['time_basket'])
    df['time_basket'] = np.where(((df['weekend'] == 0) & (df['hour']>=16) & (df['hour']<19)),'evening_peak_time', df['time_basket'])
    df['time_basket'] = np.where(((df['weekend'] == 0) & (df['hour']>=19) & (df['hour']<23)),'weekday_evening', df['time_basket'])
    df['time_basket'] = np.where(((df['weekend'] == 0) & ((df['hour']>=23) | (df['hour']<5))),'weekday_nighttime', df['time_basket'])

    # for weekend time baskets
    df['time_basket'] = np.where(((df['weekend'] == 1) & (df['hour']>=6) & (df['hour']<18)),'weekend_daytime', df['time_basket'])
    df['time_basket'] = np.where(((df['weekend'] == 1) & (df['hour']>=18) & (df['hour']<23)),'weekend_evening', df['time_basket'])
    df['time_basket'] = np.where(((df['weekend'] == 1) & ((df['hour']>=23) | (df['hour']<6))),'weekend_nighttime', df['time_basket'])
    
    return df

In [6]:
data_full = create_time_baskets(data_full)
data_full['time_basket'].unique()

array(['weekday_daytime', 'evening_peak_time', 'weekday_evening',
       'weekday_nighttime', 'morning_peak_time', 'weekend_nighttime',
       'weekend_daytime', 'weekend_evening'], dtype=object)

### Calculating Distance to City Center

* Distance to city center from trip start
* Distance to city center from trip end

In [7]:
# calculate haversine distance to city center in m
def calculate_distance_to_city_center(data_full):    
    data_full_d = data_full.copy()
    data_full_d['dist_center_start'] = data_full_d.apply(lambda r: int(haversine((r['latitude_start'], r['longitude_start']), (50.941724380890186, 6.958446824087053))*1000), axis=1) #convert Km to meter
    data_full_d['dist_center_end'] = data_full_d.apply(lambda r: int(haversine((r['latitude_end'], r['longitude_end']), (50.941724380890186, 6.958446824087053))*1000), axis=1) #convert Km to meter
    
    return data_full_d   

In [8]:
# calculate distance to city center (Dom) for both origin and destination
data_full = calculate_distance_to_city_center(data_full)

### POI Type Supplementation

* Aeroway
* Arts, culture and entertainment
* Education
* Finance
* Food and drink
* Healthcare
* History
* Leisure
* Office
* Sport
* Shop
* Tourism
* Transporation

In [9]:
# prepare POI data for assignment to trip data
def prepare_POI_data(POI_data):
    # get names of indexes for which column element_type does not equal node
    indexNames = POI_data[ POI_data['element_type'] != 'node' ].index
    
    # delete these row indexes from dataFrame
    POI_data.drop(indexNames , inplace=True)
    
    # drop all columns except certain ones
    POI_data.drop(POI_data.columns.difference(['unique_id','osmid','geometry','name','amenity']), 1, inplace=True)
    
    # create longitude and latitude columns
    POI_data['longitude'] = POI_data.geometry.x
    POI_data['latitude'] = POI_data.geometry.y     
    
    # create usage count column (check how often and if POI is used as neighbor of trip origin or destination)
    POI_data['usage_count'] = 0
    
    return POI_data

# supplement data with POI types
def supplement_trip_data_with_POI_data(data_trips, trips_start_radians, trips_end_radians, POI_data, POI_name):    
    # extract lat/long pairs as numpy array for POI data
    POI_gps = POI_data[["latitude", "longitude"]].values

    # create the ball tree with haversine metric
    POI_radians = np.radians(POI_gps)
    tree = BallTree(POI_radians, metric='haversine')

    # all POIs within a radius of 300 meters
    distance_in_meters = 300
    
    # approx. mean radius
    earth_radius_in_meters = 6371000
    radius = distance_in_meters / earth_radius_in_meters

    # apply query_radius()
    # distances are the great circle distance on the unit sphere
    is_within_start, distances_start = tree.query_radius(trips_start_radians, r=radius, count_only=False, return_distance=True) 
    is_within_end, distances_end = tree.query_radius(trips_end_radians, r=radius, count_only=False, return_distance=True) 

    # convert distances back to meters
    distances_in_meters_start = distances_start * earth_radius_in_meters
    distances_in_meters_end = distances_end * earth_radius_in_meters
    
    # transform to count value with list comprehension
    distances_in_meters_start[:] = [len(array) for array in distances_in_meters_start]
    distances_in_meters_end[:] = [len(array) for array in distances_in_meters_end]

    #create count column for POI for trip data start and end
    data_trips['{}_start'.format(POI_name)] = distances_in_meters_start
    data_trips['{}_end'.format(POI_name)] = distances_in_meters_end

    # calculate usage count for POI data
    is_within = np.concatenate((is_within_start, is_within_end), axis=0)
    for array in is_within:
        for element in array:
            POI_data.loc[element, 'usage_count'] = POI_data.loc[element, 'usage_count'] + 1
    
    # add identifier column
    POI_data['POI_type'] = POI_name
    
    return data_trips, POI_data

In [10]:
# extract lat/long pairs as numpy array for trip data start and end
trips_start_gps = data_full[["latitude_start", "longitude_start"]].values
trips_end_gps = data_full[["latitude_end", "longitude_end"]].values

# transform lat/long pairs to radians
trips_start_radians = np.radians(trips_start_gps)
trips_end_radians = np.radians(trips_end_gps)

In [11]:
# define place for data mining
place = 'Köln, Germany'

In [12]:
import time

start = time.time()

# aeroway POI for transportation but different radius for assignment (handeled seperately)
tags={'aeroway': 'terminal'}
# retrieving openstreetmap data and preparing data
POI_aero = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_aero = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_aero, 'POI_aero')  

end = time.time()
print(end - start)

POI_aero



4.867861270904541


Unnamed: 0,unique_id,osmid,name,geometry,longitude,latitude,usage_count,POI_type
0,node/27296045,27296045,Terminal 2 Fluggastbereich D,POINT (7.11970 50.88054),7.119703,50.880543,3035,POI_aero


In [13]:
# arts, culture and entertainment POI
tags = {'amenity': ['arts_centre','cinema', 'brothel', 'casino', 'community_centre', 'gambling', 'love_hotel', 'nightclub', 'planetarium', 'public_bookcase', 'social_centre', 'stripclub', 'studio', 'swingerclub', 'theatre']}
POI_art_culture_entertainment = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_art_culture_entertainment = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_art_culture_entertainment, 'POI_art_culture_entertainment')  
POI_art_culture_entertainment

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,amenity,name,geometry,longitude,latitude,usage_count,POI_type
0,node/54020150,54020150,theatre,Atelier Theater,POINT (6.93548 50.93259),6.935482,50.932594,11058,POI_art_culture_entertainment
1,node/247382429,247382429,planetarium,Planetarium und Sternwarte Köln,POINT (6.95649 50.96660),6.956487,50.966600,4909,POI_art_culture_entertainment
2,node/256221801,256221801,cinema,Metropolis,POINT (6.95821 50.95108),6.958213,50.951083,9091,POI_art_culture_entertainment
3,node/257905592,257905592,theatre,Gloria Theater,POINT (6.94495 50.93749),6.944948,50.937488,15074,POI_art_culture_entertainment
4,node/258183467,258183467,community_centre,Don-Bosco-Club Köln-Mülheim,POINT (7.01081 50.97438),7.010811,50.974384,1784,POI_art_culture_entertainment
...,...,...,...,...,...,...,...,...,...
200,node/7704871144,7704871144,public_bookcase,,POINT (6.90917 50.94801),6.909171,50.948012,4798,POI_art_culture_entertainment
201,node/7812771513,7812771513,gambling,Lido Spielhalle,POINT (7.06945 50.97841),7.069447,50.978415,0,POI_art_culture_entertainment
202,node/7861149238,7861149238,studio,Filmproduktion Peter Schüttemeyer,POINT (6.93241 50.94004),6.932407,50.940044,7769,POI_art_culture_entertainment
203,node/8097066616,8097066616,community_centre,Bürgerzentrum Nippes - Turmstraße,POINT (6.95050 50.96453),6.950498,50.964528,5972,POI_art_culture_entertainment


In [14]:
# education POI
tags = {'amenity': ['college','driving_school', 'kindergarten', 'language_school', 'library', 'music_school', 'school', 'university']}
POI_education = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_education = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_education, 'POI_education')  
POI_education

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,geometry,amenity,name,longitude,latitude,usage_count,POI_type
0,node/75874741,75874741,POINT (6.92635 50.95670),school,Lauder-Morijah-Grundschule,6.926346,50.956699,2956,POI_education
1,node/160695366,160695366,POINT (6.95199 50.94475),library,Erzbischöfliche Diözesan- und Dombibliothek,6.951991,50.944755,6210,POI_education
2,node/215423659,215423659,POINT (6.90924 50.96505),school,Montessori-Grundschule Ossendorf,6.909241,50.965055,1690,POI_education
3,node/221175135,221175135,POINT (6.89374 50.97834),school,Bildungszentrum Butzweiler Hof,6.893744,50.978343,419,POI_education
4,node/223292416,223292416,POINT (6.92708 50.97106),kindergarten,,6.927081,50.971057,1281,POI_education
...,...,...,...,...,...,...,...,...,...
303,node/8126692679,8126692679,POINT (6.87622 50.94512),school,Anna-Freud-Förderschule,6.876224,50.945117,504,POI_education
304,node/8156001437,8156001437,POINT (6.94488 50.93418),music_school,drummer's focus,6.944881,50.934182,10570,POI_education
305,node/8188016679,8188016679,POINT (6.88193 50.95509),kindergarten,Kindergruppe Sonnenstrahlen e.V.,6.881928,50.955092,226,POI_education
306,node/8190632569,8190632569,POINT (6.94920 50.93439),library,Stadtbibliothek Köln,6.949196,50.934391,11800,POI_education


In [15]:
# finance POI
tags = {'amenity': ['atm','bank', 'bureau_de_change']}
POI_finance = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_finance = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_finance, 'POI_finance')  
POI_finance

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,amenity,name,geometry,longitude,latitude,usage_count,POI_type
0,node/54749669,54749669,bank,Sparkasse KölnBonn,POINT (6.94095 50.93541),6.940948,50.935411,17593,POI_finance
1,node/196170706,196170706,bank,Sparkasse,POINT (6.91989 50.95370),6.919886,50.953704,5862,POI_finance
2,node/224445220,224445220,bank,Sparkasse,POINT (6.90095 50.95752),6.900947,50.957519,3482,POI_finance
3,node/230226555,230226555,bank,Sparkasse,POINT (6.89975 50.96789),6.899752,50.967888,1022,POI_finance
4,node/232284761,232284761,atm,Sparkasse KölnBonn,POINT (6.95012 50.96001),6.950118,50.960011,4329,POI_finance
...,...,...,...,...,...,...,...,...,...
358,node/8131880605,8131880605,atm,,POINT (7.00520 50.96093),7.005200,50.960926,2244,POI_finance
359,node/8132374205,8132374205,atm,,POINT (6.93897 50.93838),6.938973,50.938383,19911,POI_finance
360,node/8132374206,8132374206,atm,,POINT (6.93907 50.93876),6.939065,50.938760,19862,POI_finance
361,node/8132435066,8132435066,atm,,POINT (6.94059 50.93533),6.940585,50.935334,17957,POI_finance


In [16]:
# food and drink POI
tags = {'amenity': ['bar','biergarten','cafe','drinking_water','fast_food','food_court','ice_cream','pub','restaurant', 'internet_cafe']}
POI_food_drink = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_food_drink = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_food_drink, 'POI_food_drink')  
POI_food_drink

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,amenity,name,geometry,longitude,latitude,usage_count,POI_type
0,node/359460,359460,fast_food,Papa-Pizza,POINT (6.94125 50.91559),6.941247,50.915591,4393,POI_food_drink
1,node/359832,359832,restaurant,Campus,POINT (6.96375 50.90516),6.963755,50.905156,3065,POI_food_drink
2,node/39606803,39606803,fast_food,Burger King,POINT (6.91508 50.94915),6.915085,50.949151,7630,POI_food_drink
3,node/54020544,54020544,biergarten,Biergarten Rathenauplatz Veedelstreff,POINT (6.93697 50.93134),6.936973,50.931336,13363,POI_food_drink
4,node/55441368,55441368,restaurant,Blauer König,POINT (7.00649 50.94040),7.006490,50.940398,2721,POI_food_drink
...,...,...,...,...,...,...,...,...,...
2840,node/8143715869,8143715869,restaurant,Jonny Turista,POINT (6.94539 50.93330),6.945393,50.933296,7910,POI_food_drink
2841,node/8182655788,8182655788,restaurant,alla Pappa,POINT (6.95854 50.92428),6.958543,50.924280,7978,POI_food_drink
2842,node/8235340508,8235340508,restaurant,DOWN TO EARTH NOODLES,POINT (6.92774 50.92020),6.927742,50.920196,5574,POI_food_drink
2843,node/8254554285,8254554285,pub,Kupferkanne,POINT (6.96652 50.97757),6.966516,50.977573,903,POI_food_drink


In [17]:
# healthcare POI
tags = {'amenity': ['clinic','dentist', 'doctors', 'hospital', 'nursing_home', 'pharmacy', 'social_facility', 'veterinary'],
       'emergency': ['emergency_ward_entrance']}
POI_healthcare = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_healthcare = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_healthcare, 'POI_healthcare')  
POI_healthcare

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,amenity,name,geometry,longitude,latitude,usage_count,POI_type
0,node/242516129,242516129,pharmacy,Severin-Apotheke,POINT (6.95884 50.92396),6.958838,50.923955,8551,POI_healthcare
1,node/245988507,245988507,doctors,Andreas Koch,POINT (6.92525 50.95527),6.925252,50.955274,3534,POI_healthcare
2,node/246271893,246271893,pharmacy,Apotheke am Bilderstöckchen,POINT (6.92849 50.97136),6.928493,50.971362,1286,POI_healthcare
3,node/249759181,249759181,pharmacy,Linden Apotheke,POINT (6.85879 51.06506),6.858786,51.065064,0,POI_healthcare
4,node/259198454,259198454,pharmacy,Gürzenich-Apotheke,POINT (6.95712 50.93615),6.957118,50.936152,15388,POI_healthcare
...,...,...,...,...,...,...,...,...,...
698,node/8241405142,8241405142,,,POINT (6.92405 50.93183),6.924046,50.931834,5368,POI_healthcare
699,node/8241405144,8241405144,,,POINT (6.92419 50.93169),6.924193,50.931687,5368,POI_healthcare
700,node/8241405145,8241405145,,,POINT (6.92413 50.93184),6.924130,50.931844,5305,POI_healthcare
701,node/8241405146,8241405146,,,POINT (6.92418 50.93174),6.924180,50.931738,5365,POI_healthcare


In [18]:
# history POI
tags = {'historic': True}
POI_history = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_history = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_history, 'POI_history')  
POI_history

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,geometry,name,amenity,longitude,latitude,usage_count,POI_type
0,node/28121883,28121883,POINT (6.95713 50.94129),,,6.957128,50.941291,17269,POI_history
1,node/177488956,177488956,POINT (6.97035 50.93787),Römisches Osttor,,6.970346,50.937874,4470,POI_history
2,node/267352592,267352592,POINT (6.95923 50.94143),St. Maria ad Gradus,,6.959232,50.941433,18761,POI_history
3,node/292400378,292400378,POINT (6.96873 50.93823),Kürassier Denkmal,,6.968729,50.938228,2507,POI_history
4,node/298083386,298083386,POINT (6.96928 50.92064),Hafenkran 31a,,6.969281,50.920639,2204,POI_history
...,...,...,...,...,...,...,...,...,...
2052,node/8141400510,8141400510,POINT (6.91091 50.93676),Alexander Weinberg,,6.910912,50.936760,4006,POI_history
2053,node/8141400511,8141400511,POINT (6.91091 50.93676),Johanna Weinberg,,6.910911,50.936762,4003,POI_history
2054,node/8141400512,8141400512,POINT (6.91091 50.93676),Ernst Jacob,,6.910909,50.936764,4002,POI_history
2055,node/8158308957,8158308957,POINT (6.98733 50.95379),Miriam,,6.987335,50.953793,365,POI_history


In [19]:
# leisure POI
tags = {'leisure': True}
POI_leisure = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_leisure = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_leisure, 'POI_leisure')  
POI_leisure

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,geometry,name,amenity,longitude,latitude,usage_count,POI_type
0,node/28122271,28122271,POINT (6.96900 50.94101),Kletteranlage Hohenzollernbrücke,,6.969004,50.941007,4990,POI_leisure
1,node/97753701,97753701,POINT (6.92603 50.96285),,,6.926035,50.962850,341,POI_leisure
2,node/98180411,98180411,POINT (6.92586 50.96427),,,6.925857,50.964272,163,POI_leisure
3,node/137028641,137028641,POINT (6.93278 50.94508),,,6.932783,50.945077,3815,POI_leisure
4,node/238028510,238028510,POINT (6.90642 50.97715),,,6.906418,50.977150,11,POI_leisure
...,...,...,...,...,...,...,...,...,...
722,node/8158366351,8158366351,POINT (6.91755 50.94653),,,6.917550,50.946532,6105,POI_leisure
723,node/8175168217,8175168217,POINT (6.90674 50.96552),,,6.906745,50.965521,1274,POI_leisure
724,node/8187767537,8187767537,POINT (6.93928 50.95491),Bogenlust GbR Eventlocation,,6.939280,50.954912,2189,POI_leisure
725,node/8238174660,8238174660,POINT (6.94805 50.97986),Außenterrasse,,6.948053,50.979856,1357,POI_leisure


In [20]:
# office POI
tags = {'office': True}
POI_office = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_office = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_office, 'POI_office')  
POI_office

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,name,geometry,amenity,longitude,latitude,usage_count,POI_type
0,node/285569148,285569148,ampido,POINT (6.93751 50.94191),,6.937511,50.941909,13172,POI_office
1,node/297870881,297870881,Edelgrün,POINT (6.92405 50.94663),cafe,6.924052,50.946627,8543,POI_office
2,node/304561103,304561103,align media,POINT (6.92214 50.95597),,6.922139,50.955971,4975,POI_office
3,node/306136792,306136792,GIGATRONIK Köln GmbH,POINT (6.92347 50.97676),,6.923469,50.976762,396,POI_office
4,node/332418979,332418979,Lieferando,POINT (6.91732 50.95049),,6.917324,50.950488,8935,POI_office
...,...,...,...,...,...,...,...,...,...
771,node/8219835707,8219835707,Leidens & Effert,POINT (7.09764 50.88226),,7.097643,50.882261,0,POI_office
772,node/8219835708,8219835708,XNC GmbH,POINT (7.09746 50.88227),,7.097463,50.882268,0,POI_office
773,node/8242684660,8242684660,ms Immobilien,POINT (6.88192 50.95550),,6.881925,50.955504,231,POI_office
774,node/8242684664,8242684664,AXA - Butz&Hauke,POINT (6.88138 50.95542),,6.881377,50.955418,228,POI_office


In [21]:
# sport POI
tags = {'sport': True}
POI_sport = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_sport = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_sport, 'POI_sport')  
POI_sport

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,name,geometry,amenity,longitude,latitude,usage_count,POI_type
0,node/28122271,28122271,Kletteranlage Hohenzollernbrücke,POINT (6.96900 50.94101),,6.969004,50.941007,4990,POI_sport
1,node/98180411,98180411,,POINT (6.92586 50.96427),,6.925857,50.964272,163,POI_sport
2,node/246271888,246271888,Olympia Sporthalle,POINT (6.94539 50.96193),,6.945391,50.961930,2200,POI_sport
3,node/256542878,256542878,,POINT (6.95317 50.96213),,6.953172,50.962133,6767,POI_sport
4,node/256742960,256742960,,POINT (6.95839 50.95928),,6.958392,50.959285,2654,POI_sport
...,...,...,...,...,...,...,...,...,...
274,node/8137886073,8137886073,,POINT (6.87505 50.95417),,6.875049,50.954168,130,POI_sport
275,node/8185140561,8185140561,Bogenschule Köln,POINT (6.87429 50.98442),,6.874288,50.984417,0,POI_sport
276,node/8187767537,8187767537,Bogenlust GbR Eventlocation,POINT (6.93928 50.95491),,6.939280,50.954912,2189,POI_sport
277,node/8233414558,8233414558,Cologne Dartshop,POINT (6.95101 50.97378),,6.951015,50.973782,2628,POI_sport


In [22]:
# shop POI
tags = {'shop': True}
POI_shop = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_shop = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_shop, 'POI_shop')  
POI_shop

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,name,geometry,amenity,longitude,latitude,usage_count,POI_type
0,node/28123308,28123308,Eckert,POINT (6.97405 50.94046),,6.974051,50.940458,11162,POI_shop
1,node/53980042,53980042,JET-Tankstelle,POINT (6.90746 50.94506),fuel,6.907464,50.945061,4402,POI_shop
2,node/55441040,55441040,Netto,POINT (7.00814 50.94087),,7.008140,50.940872,2581,POI_shop
3,node/55448627,55448627,,POINT (7.00658 50.94038),,7.006583,50.940379,2758,POI_shop
4,node/95083415,95083415,Kamps,POINT (6.92143 50.95448),,6.921427,50.954484,4822,POI_shop
...,...,...,...,...,...,...,...,...,...
5241,node/8233414558,8233414558,Cologne Dartshop,POINT (6.95101 50.97378),,6.951015,50.973782,2628,POI_shop
5242,node/8242491620,8242491620,Doña pelos,POINT (6.95789 50.95305),,6.957891,50.953053,8374,POI_shop
5243,node/8242684654,8242684654,Mercato Olio e Vino,POINT (6.88569 50.95537),,6.885687,50.955375,798,POI_shop
5244,node/8253864159,8253864159,Cut World,POINT (6.92404 50.94690),,6.924045,50.946896,8129,POI_shop


In [23]:
# tourism POI
tags = {'tourism': True}
POI_tourism = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_tourism = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_tourism, 'POI_tourism')  
POI_tourism

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,geometry,name,amenity,longitude,latitude,usage_count,POI_type
0,node/28121900,28121900,POINT (6.96862 50.94108),,,6.968617,50.941085,4338,POI_tourism
1,node/36752479,36752479,POINT (6.90365 50.93283),,,6.903648,50.932828,801,POI_tourism
2,node/78605265,78605265,POINT (6.95024 50.94056),NS-Dokumentationszentrum,,6.950238,50.940565,10738,POI_tourism
3,node/215210722,215210722,POINT (6.89459 50.98739),Coloneum,,6.894589,50.987394,532,POI_tourism
4,node/246466024,246466024,POINT (6.97824 50.96344),,,6.978237,50.963439,1987,POI_tourism
...,...,...,...,...,...,...,...,...,...
707,node/8197223417,8197223417,POINT (6.83737 50.93542),,,6.837369,50.935416,840,POI_tourism
708,node/8220385917,8220385917,POINT (6.83292 50.96557),,,6.832921,50.965575,0,POI_tourism
709,node/8231513224,8231513224,POINT (6.96884 50.95722),,,6.968843,50.957217,1661,POI_tourism
710,node/8231513225,8231513225,POINT (6.97382 50.95592),,,6.973823,50.955919,977,POI_tourism


In [24]:
# transportation POI
tags = {'amenity': ['bicycle_rental','boat_rental', 'boat_sharing', 'bus_station', 'car_rental', 'car_sharing', 'ferry_terminal', 'taxi'],
       'public_transport':['station'],
       'railway':['platform', 'station', 'tram_stop', 'subway_entrance']}
POI_transport = prepare_POI_data(ox.pois.pois_from_place(place=place, tags=tags))
# supplement trip data with POI data selection
data_full, POI_transport = supplement_trip_data_with_POI_data(data_full, trips_start_radians, trips_end_radians, POI_transport, 'POI_transport')  
POI_transport

  and should_run_async(code)


Unnamed: 0,unique_id,osmid,amenity,name,geometry,longitude,latitude,usage_count,POI_type
0,node/359829,359829,car_rental,Starcar Autovermietung,POINT (6.96395 50.90492),6.963953,50.904916,2907,POI_transport
1,node/359831,359831,car_sharing,,POINT (6.96358 50.90565),6.963581,50.905654,3340,POI_transport
2,node/361716,361716,,Eifelplatz,POINT (6.94351 50.92335),6.943506,50.923349,3398,POI_transport
3,node/28122005,28122005,,Heumarkt,POINT (6.95992 50.93570),6.959924,50.935698,15665,POI_transport
4,node/28122013,28122013,,Deutzer Freiheit,POINT (6.97137 50.93809),6.971368,50.938090,6434,POI_transport
...,...,...,...,...,...,...,...,...,...
833,node/7022705866,7022705866,,Poststraße,POINT (6.94992 50.93205),6.949917,50.932046,5804,POI_transport
834,node/7110567529,7110567529,car_rental,RKG Autovermietung,POINT (6.98132 50.92308),6.981321,50.923081,2092,POI_transport
835,node/7334512627,7334512627,bicycle_rental,Charles-de-Gaulle Platz,POINT (6.97234 50.94165),6.972337,50.941653,8513,POI_transport
836,node/7673526493,7673526493,,Arnoldshöhe,POINT (6.96700 50.89463),6.967003,50.894625,984,POI_transport


In [25]:
# show supplemented trip data
pd.options.display.max_columns = None
display(data_full)

  and should_run_async(code)


Unnamed: 0,id,provider,vehicleType,date_start,time_start,date_end,time_end,year,month,weekday,hour,weekend,longitude_start,latitude_start,longitude_end,latitude_end,coordinates_start,coordinates_end,distance,duration,speed,time_basket,dist_center_start,dist_center_end,POI_aero_start,POI_aero_end,POI_art_culture_entertainment_start,POI_art_culture_entertainment_end,POI_education_start,POI_education_end,POI_finance_start,POI_finance_end,POI_food_drink_start,POI_food_drink_end,POI_healthcare_start,POI_healthcare_end,POI_history_start,POI_history_end,POI_leisure_start,POI_leisure_end,POI_office_start,POI_office_end,POI_sport_start,POI_sport_end,POI_shop_start,POI_shop_end,POI_tourism_start,POI_tourism_end,POI_transport_start,POI_transport_end
0,ZnJlZWZsb2F0OjpjYXIyZ286OldNRTQ1MzM0MjFLMzE5MTQy,car2go,car,2019-11-18,1300,2019-11-18,1310,2019,11,0,13,0,6.886950,50.947880,6.890660,50.947170,"(50.9479, 6.887)","(50.9472, 6.8907)",271.635363,595.0,1.643508,weekday_daytime,5055,4787,0,0,0,0,2,1,1,0,7,5,0,0,0,0,1,1,4,2,0,0,9,2,1,2,3,3
1,ZnJlZWZsb2F0OjpjYXIyZ286OldNRTQ1MzM0MjFLMzIxNTM2,car2go,car,2019-11-18,1300,2019-11-18,1320,2019,11,0,13,0,6.954300,50.938210,6.944940,50.941190,"(50.9382, 6.9543)","(50.9412, 6.9449)",734.795721,1197.0,2.209912,weekday_daytime,486,948,0,0,5,7,2,1,16,4,63,75,4,2,35,38,4,3,14,22,2,0,277,131,14,8,7,4
2,ZnJlZWZsb2F0OjpjYXIyZ286OldNRTQ1MzM0MjFLMzcyNDA3,car2go,car,2019-11-18,1300,2019-11-18,1320,2019,11,0,13,0,6.985220,50.949150,7.014890,50.966520,"(50.9492, 6.9852)","(50.9665, 7.0149)",2837.091541,1197.0,8.532606,weekday_daytime,2049,4820,0,0,1,2,0,1,0,1,2,5,0,1,0,0,0,1,0,4,0,1,5,2,0,0,1,0
3,ZnJlZWZsb2F0OjpjYXIyZ286OldNRTQ1MzM0MjFLMjkwNTc5,car2go,car,2019-11-18,1300,2019-11-18,1325,2019,11,0,13,0,6.917080,50.946330,6.907470,50.951340,"(50.9463, 6.9171)","(50.9513, 6.9075)",873.828450,1496.0,2.102796,weekday_daytime,2943,3727,0,0,0,1,2,0,0,0,18,4,3,0,12,1,7,0,7,3,4,0,38,5,0,1,1,0
4,ZnJlZWZsb2F0OjpjYXIyZ286OldNRTQ1MzM0MjFLMjkzNjQy,car2go,car,2019-11-18,1300,2019-11-18,1325,2019,11,0,13,0,7.018690,50.970100,6.996920,50.942590,"(50.9701, 7.0187)","(50.9426, 6.9969)",3417.963314,1496.0,8.225045,weekday_daytime,5268,2697,0,0,0,0,1,0,0,0,1,3,0,1,0,0,0,4,4,0,0,1,0,6,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
404858,YmlrZTo6dGllcjo6YTFlMmYzZjEtNjkyZS00NjA4LWE5YT...,tier,scooter,2020-02-29,2340,2020-02-29,2350,2020,2,5,23,1,6.960057,50.940467,6.956911,50.938427,"(50.9405, 6.9601)","(50.9384, 6.9569)",316.302763,600.0,1.897817,weekend_nighttime,179,382,0,0,8,7,2,1,19,21,85,109,7,10,25,37,6,9,10,17,2,2,97,278,30,23,18,6
404859,YmlrZTo6dGllcjo6OGUxYjAwNDUtMDY3NS00Mzc4LTk3Yz...,tier,scooter,2020-02-29,2340,2020-02-29,2355,2020,2,5,23,1,6.927394,50.915908,6.944665,50.904921,"(50.9159, 6.9274)","(50.9049, 6.9447)",1720.132594,902.0,6.865274,weekend_nighttime,3602,4204,0,0,1,0,1,0,4,0,26,2,6,1,15,0,2,0,2,0,1,0,67,3,1,0,5,2
404860,YmlrZTo6dGllcjo6ZGE1NmNkMDgtN2Q5ZS00YzE0LWI2OT...,tier,scooter,2020-02-29,2340,2020-02-29,2355,2020,2,5,23,1,6.938424,50.928810,6.928539,50.920578,"(50.9288, 6.9384)","(50.9206, 6.9285)",1148.006312,902.0,4.581843,weekend_nighttime,2007,3149,0,0,5,1,6,3,12,1,120,17,5,4,129,8,6,3,3,4,3,1,70,26,8,0,12,2
404861,YmlrZTo6dGllcjo6NzRlY2JjZTYtZWQ3Yi00ZGY3LWJmMG...,tier,scooter,2020-02-29,2345,2020-02-29,2350,2020,2,5,23,1,6.975072,50.964634,6.969438,50.967416,"(50.9646, 6.9751)","(50.9674, 6.9694)",501.354847,300.0,6.016258,weekend_nighttime,2801,2958,0,0,0,0,2,1,2,0,9,0,6,0,0,0,2,2,1,0,2,1,9,2,2,1,1,0


In [26]:
data_full.info()

  and should_run_async(code)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 404863 entries, 0 to 404862
Data columns (total 50 columns):
 #   Column                               Non-Null Count   Dtype  
---  ------                               --------------   -----  
 0   id                                   404863 non-null  object 
 1   provider                             404863 non-null  object 
 2   vehicleType                          404863 non-null  object 
 3   date_start                           404863 non-null  object 
 4   time_start                           404863 non-null  int64  
 5   date_end                             404863 non-null  object 
 6   time_end                             404863 non-null  int64  
 7   year                                 404863 non-null  int64  
 8   month                                404863 non-null  int64  
 9   weekday                              404863 non-null  int64  
 10  hour                                 404863 non-null  int64  
 11  weekend      

### Save data sets

In [27]:
# save data by mode and months
def save_supplemented_data(months, data_supplemented):
    for month in months:
        # extract month, year and mode
        only_month = int(month[4:])
        only_year = int(month[:4])
        mode_name_file = data_supplemented['vehicleType'].iloc[0]
        
        # create dataframe and save it as csv file
        split_cond = [(data_supplemented['month'] == only_month) & (data_supplemented['year'] == only_year)]
        data_supplemented[split_cond[0]].to_csv('Data/Modes/data_supplemented_{}_{}.csv'.format(mode_name_file, month), index = False)

# save data by mode only
def save_supplemented_data_full(data_supplemented):
    # create dataframe and save it as csv file
    data_supplemented[data_supplemented['vehicleType']=='car'].to_csv('Data/Modes/data_supplemented_full_car.csv', index = False)
    data_supplemented[data_supplemented['vehicleType']=='bicycle'].to_csv('Data/Modes/data_supplemented_full_bicycle.csv', index = False)
    data_supplemented[data_supplemented['vehicleType']=='scooter'].to_csv('Data/Modes/data_supplemented_full_scooter.csv', index = False)

In [31]:
# save POI datasets
# set True if saving is desired
save_data_POI = False
if (save_data_POI):
    POI_data = pd.concat([POI_aero, POI_art_culture_entertainment, POI_education, POI_finance, 
                          POI_food_drink, POI_healthcare, POI_history, POI_leisure, POI_office,
                          POI_sport, POI_shop, POI_tourism, POI_transport], ignore_index=True)
    POI_data.to_csv('Data/POI_data.csv', index = False)

In [29]:
# save supplemented data sets based on selected months
# split data by month to avoid file size > 100 MB due to GitHub rules
# set True if saving is desired
save_data = False
if (save_data):
    save_supplemented_data(months, data_full[data_full['vehicleType']=='car'])
    save_supplemented_data(months, data_full[data_full['vehicleType']=='bicycle'])
    save_supplemented_data(months, data_full[data_full['vehicleType']=='scooter'])

In [30]:
# save supplemented data sets in one file per mode
# set True if saving is desired
save_data_full = False
if (save_data_full):
    save_supplemented_data_full(data_full)

  and should_run_async(code)
