In [13]:
import requests
import pandas as pd
import csv

#### Get geoJSON world cities polygons
Getting polygon lat-long bounding data for world cities from the json file linked below. This will be used for the geometry bounding polygons on the OpenStreetMap ohsome API calls for the bike lane and bike dock data. 

URL for geoJSON with geometry polygons for world cities: https://raw.githubusercontent.com/drei01/geojson-world-cities/master/cities.geojson

In [14]:
# url with geoJSON lat long polygons for major world cities
geoJSON_URL = 'https://raw.githubusercontent.com/drei01/geojson-world-cities/master/cities.geojson'

# extract data into json 
r = requests.get(geoJSON_URL)
data = r.json()

# convert to dataframe with nested keys
df = pd.json_normalize(data, record_path=['features'])

In [23]:
# only look at cities within the European continent (eg. there will be multiple cities named Paris throughout the world, only want the one in France)

# lat long "bounding box" to filter for subest of cities - usa specific
# http://en.wikipedia.org/wiki/Extreme_points_of_the_United_States#Westernmost
top = 49.3457868 # north lat
left = -124.7844079 # west long
right = -66.9513812 # east long
bottom =  24.7433195 # south lat

# check to see if first lat long pair is within the bounding box and append to new dataframe
cities =[]
for i in range(0,len(df)):
    if ((df['geometry.coordinates'][i][0][0][0] > left) & (df['geometry.coordinates'][i][0][0][0] < right) 
        & (df['geometry.coordinates'][i][0][0][1] > bottom) & (df['geometry.coordinates'][i][0][0][1] < top)): # looking at first lat long pair for filtering
        cities.append(df.iloc[i])

df_cities = pd.DataFrame(cities)
df_cities['properties.NAME'] = df_cities['properties.NAME'].str.lower()

df_cities

Unnamed: 0,type,properties.NAME,geometry.type,geometry.coordinates
18069,Feature,parksville,Polygon,"[[[-124.27, 49.32745], [-124.26833, 49.32522],..."
18070,Feature,north vancouver,Polygon,"[[[-123.22696, 49.33821], [-123.23352, 49.337]..."
18071,Feature,lantzville bay,Polygon,"[[[-124.0724, 49.25216], [-124.06677, 49.25056..."
18072,Feature,coquitlam,Polygon,"[[[-122.76819, 49.23072], [-122.76998, 49.2297..."
18073,Feature,vancouver,Polygon,"[[[-123.13201, 49.30216], [-123.12675, 49.2992..."
...,...,...,...,...
25008,Feature,homestead,Polygon,"[[[-80.48573, 25.44627], [-80.49386, 25.44847]..."
25009,Feature,nassau,Polygon,"[[[-77.34328, 25.08439], [-77.33479, 25.08429]..."
25010,Feature,nassau,Polygon,"[[[-77.3219, 25.07376], [-77.30672, 25.07204],..."
25108,Feature,new orleans,Polygon,"[[[-90.0, 29.95589], [-90.0, 29.97137], [-89.9..."


In [24]:
# note - sometimes there are still multiple cities with the same name. However, the geometry coordinates are very similar. Assumed to be the same place and use the first entry in the dataframe
df_cities[df_cities['properties.NAME'] == 'new york']

Unnamed: 0,type,properties.NAME,geometry.type,geometry.coordinates
24264,Feature,new york,Polygon,"[[[-73.65578, 40.99863], [-73.65729, 41.00147]..."
24289,Feature,new york,Polygon,"[[[-73.93192, 40.85145], [-73.93395, 40.84948]..."
24297,Feature,new york,Polygon,"[[[-73.741, 40.65503], [-73.74154, 40.65577], ..."
24308,Feature,new york,Polygon,"[[[-74.0821, 40.6455], [-74.07949, 40.63943], ..."
24309,Feature,new york,Polygon,"[[[-73.72982, 40.61447], [-73.73421, 40.61395]..."
24315,Feature,new york,Polygon,"[[[-73.88618, 40.57328], [-73.87991, 40.57655]..."


In [31]:
city_list = [
    'new york',
    'los angeles',
    'chicago',
    'houston',
    'phoenix',
    'philadelphia',
    'pittsburgh',
    'las vegas',
    'atlanta',
    'austin',
    'san francisco',
    'portland',
    'washington',
    'boston',
]

df_subset = df_cities[df_cities['properties.NAME'].isin(city_list)]
df_subset['properties.NAME'].unique()

array(['portland', 'phoenix', 'san francisco', 'washington', 'las vegas',
       'los angeles', 'austin', 'houston', 'atlanta', 'philadelphia',
       'chicago', 'pittsburgh', 'boston', 'new york'], dtype=object)

In [33]:
# for each city we are analyzing, get lat long geometry polygon for ohsome API input

# list of cities in our analysis
# note - needed to change san francisco bay area to san francisco
city_list = [
    'new york',
    'los angeles',
    'chicago',
    'houston',
    'phoenix',
    'philadelphia',
    'pittsburgh',
    'las vegas',
    'atlanta',
    'austin',
    'san francisco',
    'portland',
    'washington',
    'boston',
]
city_geom = []

# for each city, get the geometry polygon and convert it to a string in the format for the ohsome API
for city in city_list: # loop through list of cities

    row = df_cities[df_cities['properties.NAME'] == city].index[0] # getting first row where the city name matches 
    city_geom_string = ''

    # for that city, loop through the coordinates column and convert to string
    for i in range(0,len(df['geometry.coordinates'][row][0])):
        s = ', '.join(str(x) for x in df['geometry.coordinates'][row][0][i])
        city_geom_string += s + ','
    
    city_geom.append(city_geom_string) # append geometry strings to list

# create dataframe of cities and their geometry strings
city_geom_df = pd.DataFrame({'Location': city_list, 'Geometry String': city_geom})
city_geom_df

Unnamed: 0,Location,Geometry String
0,new york,"-73.65578, 40.99863,-73.65729, 41.00147,-73.65..."
1,los angeles,"-118.60927, 34.23723,-118.60826, 34.2381,-118...."
2,chicago,"-87.80556, 42.23299,-87.80475, 42.23193,-87.80..."
3,houston,"-91.95609, 37.31612,-91.96182, 37.31662,-91.96..."
4,phoenix,"-122.80705, 42.27825,-122.81886, 42.28494,-122..."
5,philadelphia,"-75.73521, 44.15348,-75.7362, 44.15438,-75.724..."
6,pittsburgh,"-80.10685, 40.50806,-80.10519, 40.51473,-80.09..."
7,las vegas,"-115.05851, 36.08611,-115.06244, 36.08667,-115..."
8,atlanta,"-94.17622, 33.10941,-94.18225, 33.11163,-94.18..."
9,austin,"-92.94917, 43.64862,-92.96111, 43.64773,-92.97..."


#### API calls for ohsome OpenStreetMap bike lane and bike dock data

##### Bike Lane Length Extraction 
Do not use because to many different filter parameters we need to account for. Use next section based on detailed filter from paper.

In [6]:
# function for getting the total length of roadway in a city
def roadwayLength_extraction(city, geometry, groupByKey, time, filterKey='type:way'):
    '''Function that creates a dataframe with total length of roadway type
    city - city name
    geometry - boundary area from which to pull that data for
    filterKey - how to filter the results; can use multiple attributes; ex geometry type (type:way)
    grouByKey - how to seperate results from within the filter tag (ex. cycleway)
    time - the end point timestamp from when the data for OpenStreetMap is pulled (ex. 2010-01-01 will pull data existing at Jan 1 2010)
    Function specifically use for length extraction with a groupBy tag. Other API calls will have different URLs
    '''
    URL = 'https://api.ohsome.org/v1/elements/length/groupBy/tag'
    data = {"bpolys": geometry, "format": 'csv', "filter": filterKey, "groupByKey": groupByKey, "time": time}
    response = requests.post(URL, data=data)
    decoded_content = response.content.decode('utf-8')
    
    # create and format dataframe from csv file
    cr = csv.reader(decoded_content.splitlines(), delimiter=';')
    data_list = list(cr)
    df = pd.DataFrame({'Road Type': data_list[3], city: data_list[4]})
    df = df[1:] # remove timestamp row
    df[city] = df[city].astype(float) # convert length column to float
    
    return df

In [7]:
# create dataframe of summation of bike lanes, seperated by type, for each city

groupByKey = 'bicycle' # key for grouping roadway type
time = '2010-01-01' # will need to update this for each year we're pulling data

# make dataframe for one city as starter df for merge - figure out how to create df without doing this??
df_bike = roadwayLength_extraction('London', city_geom_df['Geometry String'][0], groupByKey, time) 

# iterate through list of cities
for index, row in city_geom_df.iterrows():
    city = row['Location']
    geom_str = row['Geometry String']
    df = roadwayLength_extraction(city, geom_str, groupByKey, time) # pull roadway data for specific city
    df_bike = pd.merge(df_bike, df, how='outer') # merge city data into one dataframe 

df_bike

Unnamed: 0,Road Type,London,LONDON,AMSTERDAM,PARIS,BERLIN,MUNICH,BARCELONA,ROMA,LISBOA
0,remainder,16593343.37,16593343.37,2016253.37,8683088.36,10637693.45,4335702.57,1573272.91,44999.22,1389895.52
1,bicycle=yes,147009.56,147009.56,10205.39,79815.04,426983.01,190654.95,8255.36,93.03,1052.29
2,bicycle=no,72175.36,72175.36,52444.55,3081.25,47867.38,116068.94,,,
3,bicycle=designated,803.38,803.38,228.8,,46778.47,80344.07,,,240.03
4,bicycle=dismount,632.62,632.62,,,,,,,
5,bicycle=permissive,37358.67,37358.67,474.35,10955.03,7290.09,14512.46,,,
6,bicycle=unknown,1615.54,1615.54,,,891.21,,,,
7,bicycle=opposite,774.4,774.4,,,,1311.66,,,
8,bicycle=opposite_lane,156.52,156.52,,,,,,,
9,bicycle=true,,,,392.24,,,,,


In [8]:
# create dataframe of summation of cycle lanes, seperated by type, for each city
groupByKey = 'cycleway'
time = '2010-01-01' # will need to update this for each year we're pulling data

df_cycle = roadwayLength_extraction('London', city_geom_df['Geometry String'][0], groupByKey, time) # make dataframe for one city as starter df for merge

# iterate through list of cities
for index, row in city_geom_df.iterrows():
    city = row['Location']
    geom_str = row['Geometry String']
    df = roadwayLength_extraction(city, geom_str, groupByKey, time) # pull roadway data for specific city
    df_cycle = pd.merge(df_cycle, df, how='outer') # merge city data into one dataframe 

df_cycle

Unnamed: 0,Road Type,London,LONDON,AMSTERDAM,PARIS,BERLIN,MUNICH,BARCELONA,ROMA,LISBOA
0,remainder,16714933.12,16714933.12,1956805.91,8536313.07,10756573.18,4437578.81,1566662.59,45092.24,1391187.84
1,cycleway=lane,33299.6,33299.6,5686.06,57817.79,95349.79,11323.64,13386.94,,
2,cycleway=track,68172.88,68172.88,86526.25,95914.82,285357.76,273326.58,1222.97,,
3,cycleway=opposite,3862.7,3862.7,27808.99,14403.93,22219.8,12196.02,,,
4,cycleway=shared,6846.57,6846.57,,,,,,,
5,cycleway=opposite_lane,14363.31,14363.31,1185.72,22638.61,,1720.39,,,
6,cycleway=yes,1340.41,1340.41,,287.47,5205.69,3.97,,,
7,cycleway=opposite_track,3864.01,3864.01,1593.54,7132.99,1166.24,1264.04,255.78,,
8,cycleway=segregated,7005.3,7005.3,,,,,,,
9,cycleway=lane_right;opposite_track,30.14,30.14,,,,,,,


#### Bike Lane length extraction - Filter
Based on detailed filter for bike/cycle ways, since they can be tagged differently in OpenStreetMap. 
The filter is based on Hochmair, Zielstra, and Neis’s paper "Assessing the completeness of bicycle trails and designated lane features in OpenStreetMap for the United States and Europe". 
In their study they explored the cycling features in the United States and Europe. The filter for this layer consists of their filter combination and is extended with other tags of cycling infrastructure mapping methods listed on the corresponding OSM wiki pages.

Filter is taken from here: 
https://hex.ohsome.org/#/cycleways_w/2020-06-01T00:00:00Z/4/50.28251213872028/16.31249248981477

In [9]:
# filterKey = 'type:way and sidewalk:left:bicycle=yes or cycleway:left=shared_lane or cyclestreet=yes or cycleway:left=shared_busway or cycleway:right=shared_busway or cycleway=shared_busway 
# or cycleway=opposite_lane or highway=bridleway and bicycle=no or highway=track and bicycle=designated and motor_vehicle=no or bicycle=use_sidepath or cycleway=opposite and oneway:bicycle=no 
# or sidewalk:right:bicycle=yes or cycleway:right=shared_lane or cycleway:left=track or cycleway:right=track or highway=track and bicycle=designated and motor_vehicle=no
# or highway=path and bicycle=yes or highway=path and bicycle=designated or bicycle=official or highway=service and bicycle=designated or motor_vehicle=no or highway=pedestrian and bicycle=yes 
# or bicycle=official or highway=footway and bicycle=yes or bicycle=official or highway=cycleway 
# or cycleway in lane, opposite_lane, shared_busway, track, opposite_track or cycleway:left in lane, shared_busway or cycleway:right in lane, shared_busway # didn;t include this data
# or cycleway:both=lane or bicycle_road=yes and motor_vehicle=no or bicycle=designated or cyclestreet=yes'

# cycleway=lane or cycleway=opposite_lane or cycleway=shared_busway or cycleway=track or cycleway=opposite_track or cycleway:left=lane or cycleway:left=shared_busway or cycleway:right=lane or cycleway:right=shared_busway

##### Length or count extraction

In [34]:
# function for getting the total count or length of a parameter in a city
def data_extraction(method, city, geometry, filterKey):
    '''Function that creates a dataframe with count of bicycle amenity 
    method - how to agregrate the data (count or length)
    city - city name
    geometry - boundary area from which to pull that data for
    filterKey - how to filter the results; can use multiple attributes; ex geometry type (type:way)
    '''
    URL = 'https://api.ohsome.org/v1/elements/' + method
    data = {"bpolys": geometry, "format": 'csv', "filter": filterKey, "time": "2008-01-01/2023-01-01/P1Y"}
    
    response = requests.post(URL, data=data)
    decoded_content = response.content.decode('utf-8')
    
    # create and format dataframe from csv file
    cr = csv.reader(decoded_content.splitlines(), delimiter=';')
    data_list = list(cr)
    df = pd.DataFrame(data_list[4:], columns=['Date', city])
    df[city] = df[city].astype(float) # convert length column to float
    
    return df

In [47]:
# create a dataframe summing amount of bike lanes for each city for each year from 2008 to 2023

# filter parameters (can remove each clause after and or or, but keep type:way)
filterKey = 'type:way and sidewalk:left:bicycle=yes or cycleway:left=shared_lane or cyclestreet=yes or cycleway:left=shared_busway or cycleway:right=shared_busway or cycleway=shared_busway or cycleway=opposite_lane or highway=bridleway and bicycle=no or highway=track and bicycle=designated and motor_vehicle=no or bicycle=use_sidepath or cycleway=opposite and oneway:bicycle=no or sidewalk:right:bicycle=yes or cycleway:right=shared_lane or cycleway:left=track or cycleway:right=track or highway=track and bicycle=designated and motor_vehicle=no or highway=path and bicycle=yes or highway=path and bicycle=designated or bicycle=official or highway=service and bicycle=designated or motor_vehicle=no or highway=pedestrian and bicycle=yes or bicycle=official or highway=footway and bicycle=yes or bicycle=official or highway=cycleway or cycleway:both=lane or bicycle_road=yes and motor_vehicle=no or bicycle=designated or cyclestreet=yes or cycleway=lane or cycleway=opposite_lane or cycleway=shared_busway or cycleway=track or cycleway=opposite_track or cycleway:left=lane or cycleway:left=shared_busway or cycleway:right=lane or cycleway:right=shared_busway'

city = "London" # this will need to change for each city
df_lanes = data_extraction('length', city, city_geom_df['Geometry String'][0], filterKey) # make dataframe for one city as starter df for merge

# iterate through list of cities
for index, row in city_geom_df.iterrows():
    city = row['Location']
    geom_str = row['Geometry String']
    df = data_extraction('length', city, geom_str, filterKey) # pull data for specific city
    df_lanes = pd.merge(df_lanes, df, how='outer') # merge city data into one dataframe 

df_lanes = df_lanes.drop(df_lanes.columns[1], axis=1)
df_lanes

Unnamed: 0,Date,new york,los angeles,chicago,houston,phoenix,philadelphia,pittsburgh,las vegas,atlanta,austin,san francisco,portland,washington,boston
0,2008-01-01T00:00:00Z,0.0,51103.98,3571.88,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2009-01-01T00:00:00Z,0.0,207159.88,161962.29,0.0,0.0,0.0,4180.79,2361.55,0.0,0.0,54568.02,145.48,0.0,0.0
2,2010-01-01T00:00:00Z,558.92,457107.97,389798.46,0.0,0.0,0.0,4180.79,2361.55,0.0,0.0,89324.36,106211.92,0.0,0.0
3,2011-01-01T00:00:00Z,21719.48,583660.4,525484.97,0.0,0.0,0.0,4180.68,2361.55,0.0,0.0,127772.24,133931.22,0.0,0.0
4,2012-01-01T00:00:00Z,43544.43,1447138.77,635546.87,0.0,0.0,0.0,10800.82,3314.07,0.0,14767.18,128606.01,506982.45,263.96,0.0
5,2013-01-01T00:00:00Z,52912.76,1622377.49,1271084.12,0.0,893.19,0.0,11136.82,3337.65,0.0,14986.45,132933.97,537532.93,263.96,0.0
6,2014-01-01T00:00:00Z,134747.85,1718126.39,1284102.44,0.0,893.19,0.0,14178.93,3337.65,0.0,14986.45,138649.47,540927.32,263.96,0.0
7,2015-01-01T00:00:00Z,138097.11,1830833.92,1388615.19,0.0,893.19,0.0,16843.77,5361.18,0.0,26947.02,154933.81,544226.0,263.96,0.0
8,2016-01-01T00:00:00Z,170285.72,2014559.78,1397237.99,0.0,893.19,0.0,29161.87,15110.63,0.0,26947.02,178528.12,544649.14,310.45,0.0
9,2017-01-01T00:00:00Z,188269.65,2206374.71,1481719.83,0.0,893.19,0.0,114031.39,37366.41,0.0,26947.02,195044.42,548927.37,310.45,0.0


In [45]:
# create a dataframe on the total number of bicycle rental locations for each city for each year from 2008 to 2023

# options for filters: (can remove each clause after and, but keep type:node)
# filterKey = 'amenity=bicycle_rental and type:node'
filterKey = 'amenity=bicycle_parking and type:node'

city = "London" # this will need to change for each city
df_docks = data_extraction('count', city, city_geom_df['Geometry String'][0], filterKey) # make dataframe for one city as starter df for merge

# iterate through list of cities
for index, row in city_geom_df.iterrows():
    city = row['Location']
    geom_str = row['Geometry String']
    df = data_extraction('count', city, geom_str, filterKey) # pull data for specific city
    df_docks = pd.merge(df_docks, df, how='outer') # merge city data into one dataframe 

df_docks = df_docks.drop(df_docks.columns[1], axis=1)
df_docks

Unnamed: 0,Date,new york,los angeles,chicago,houston,phoenix,philadelphia,pittsburgh,las vegas,atlanta,austin,san francisco,portland,washington,boston
0,2008-01-01T00:00:00Z,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2009-01-01T00:00:00Z,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0
2,2010-01-01T00:00:00Z,242.0,8.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,82.0,4.0,0.0,0.0
3,2011-01-01T00:00:00Z,242.0,8.0,22.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,93.0,7.0,0.0,0.0
4,2012-01-01T00:00:00Z,242.0,25.0,27.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,93.0,9.0,0.0,0.0
5,2013-01-01T00:00:00Z,242.0,18.0,27.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,95.0,18.0,0.0,0.0
6,2014-01-01T00:00:00Z,242.0,39.0,38.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,112.0,62.0,0.0,0.0
7,2015-01-01T00:00:00Z,242.0,62.0,41.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,218.0,69.0,0.0,0.0
8,2016-01-01T00:00:00Z,261.0,87.0,60.0,0.0,0.0,0.0,11.0,1.0,0.0,2.0,231.0,807.0,0.0,0.0
9,2017-01-01T00:00:00Z,261.0,213.0,101.0,0.0,0.0,0.0,12.0,2.0,0.0,9.0,256.0,1340.0,0.0,0.0


In [46]:
# create a dataframe on the total number of bicycle rental locations for each city for each year from 2008 to 2023

# options for filters: (can remove each clause after and, but keep type:node)
filterKey = 'amenity=bicycle_rental and type:node'
# filterKey = 'amenity=bicycle_parking and type:node'

city = "London" # this will need to change for each city
df_docks = data_extraction('count', city, city_geom_df['Geometry String'][0], filterKey) # make dataframe for one city as starter df for merge

# iterate through list of cities
for index, row in city_geom_df.iterrows():
    city = row['Location']
    geom_str = row['Geometry String']
    df = data_extraction('count', city, geom_str, filterKey) # pull data for specific city
    df_docks = pd.merge(df_docks, df, how='outer') # merge city data into one dataframe 

df_docks = df_docks.drop(df_docks.columns[1], axis=1)
df_docks

Unnamed: 0,Date,new york,los angeles,chicago,houston,phoenix,philadelphia,pittsburgh,las vegas,atlanta,austin,san francisco,portland,washington,boston
0,2008-01-01T00:00:00Z,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2009-01-01T00:00:00Z,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,2010-01-01T00:00:00Z,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0
3,2011-01-01T00:00:00Z,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0
4,2012-01-01T00:00:00Z,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0
5,2013-01-01T00:00:00Z,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0
6,2014-01-01T00:00:00Z,0.0,3.0,300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0
7,2015-01-01T00:00:00Z,0.0,6.0,300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.0,0.0,0.0,0.0
8,2016-01-01T00:00:00Z,0.0,5.0,319.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.0,0.0,0.0,0.0
9,2017-01-01T00:00:00Z,0.0,132.0,327.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.0,28.0,0.0,0.0
