In [1]:
import pandas as pd
import geopandas as gpd
import osmnx as ox
import folium
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.ops import nearest_points
from itertools import chain


In [2]:
## INPUTS
# Define region and radius of interest
# place = 'Glasgow City, Scotland, UK' # Glasgow: [55.8642,-4.2518]
# place = 'San Francisco, California, USA' 
# place = 'Chicago, Illinois, USA' 
place = 'New York City, New York, USA'
root_name = 'new_york'

filename_cleaned_df = root_name + '_feature_target_table.geojson'
filename_grid = root_name + '_grid.geojson'
filename_amenities = root_name + '_amenities.geojson'
filename_bike_rentals = root_name + '_bike_rentals.geojson'


In [3]:
# Workaround to fix chrome issue where folium won't plot maps with a large number of layers
# See comment by dstein64 at: https://github.com/python-visualization/folium/issues/812

import base64
def _repr_html_(self, **kwargs):
    html = base64.b64encode(self.render(**kwargs).encode('utf8')).decode('utf8')
    onload = (
        'this.contentDocument.open();'
        'this.contentDocument.write(atob(this.getAttribute(\'data-html\')));'
        'this.contentDocument.close();'
    )
    if self.height is None:
        iframe = (
            '<div style="width:{width};">'
            '<div style="position:relative;width:100%;height:0;padding-bottom:{ratio};">'
            '<iframe src="about:blank" style="position:absolute;width:100%;height:100%;left:0;top:0;'
            'border:none !important;" '
            'data-html={html} onload="{onload}" '
            'allowfullscreen webkitallowfullscreen mozallowfullscreen>'
            '</iframe>'
            '</div></div>').format
        iframe = iframe(html=html, onload=onload, width=self.width, ratio=self.ratio)
    else:
        iframe = ('<iframe src="about:blank" width="{width}" height="{height}"'
                  'style="border:none !important;" '
                  'data-html={html} onload="{onload}" '
                  '"allowfullscreen" "webkitallowfullscreen" "mozallowfullscreen">'
                  '</iframe>').format
        iframe = iframe(html=html, onload=onload, width=self.width, height=self.height)
    return iframe

folium.branca.element.Figure._repr_html_ = _repr_html_


In [4]:
def gridify_polygon(poly,grid_spacing):
    # creates a cartesian grid inside polygon with the input grid_spacing
    # poly: polygon which we want a grid inside
    # grid_spacing: spaceing in lattitude/longitude degrees
    poly_xmin,poly_ymin,poly_xmax,poly_ymax = poly.geometry.total_bounds

    cols = list(np.arange(poly_xmin,poly_xmax+grid_spacing,grid_spacing))
    rows = list(np.arange(poly_ymin,poly_ymax+grid_spacing,grid_spacing))
    rows.reverse()

    polygons = []
    for x in cols:
        for y in rows:
            polygons.append( Polygon([(x,y), (x+grid_spacing, y), (x+grid_spacing, y-grid_spacing), (x, y-grid_spacing)]) )

    grid = gpd.GeoDataFrame({'geometry':polygons})

    grid['isin_poly'] = grid.apply(lambda row: row['geometry'].centroid.within(poly.geometry[0]), axis=1)
    poly_grid = grid[grid.isin_poly == True]
    poly_grid.crs = {'init': 'epsg:4326', 'no_defs': True}
    poly_grid = poly_grid.drop(['isin_poly'], axis = 1)
    
    # Calculate the polygon areas in km
    poly_grid_cart = poly_grid.copy()
    poly_grid_cart = poly_grid_cart.to_crs({'init': 'epsg:3857'})
    poly_grid_cart['poly_area_km'] = poly_grid_cart['geometry'].area/ 10**6
    # Store polygon area
    poly_grid['poly_area_km'] = poly_grid_cart['poly_area_km']
    
    # 
    poly_grid = poly_grid.reset_index()
    return poly_grid

def amenity_in_polygon(amenity_points,poly):
    # returns the amenities that are inside the given polygon
    # When there are zero amenities within the interrogation region, the function returns an empty dataframe as
    # as expected, but also prints out a lot of errors. not a huge issue but annoying.
    # Maybe implement a test for if empty, return 0
    # Example use:
    #         amenity_in_polygon(food_amenities,city_grid.geometry.iloc[38])
    
    # Generate boolean list of whether amenity is in polygon
    indices = amenity_points.apply(lambda row: row['geometry'].within(poly), axis=1)
    if not any(indices): # If all indices are false
        return pd.DataFrame(columns=['A']) # return empty dataframe (not sure what is best to output here )
    else:
        return amenity_points[amenity_points.apply(lambda row: row['geometry'].within(poly), axis=1)]

def avg_dist_to_amenities(interrogation_point,amenity_df,n):
    # calculates the mean distance of the n nearest amenities to the interrogation point
    # If there are less than n amenities in the search it'll just return the average of the known amenities.
    # Example: avg_dist_to_amenities(city_grid.geometry.iloc[39],food_amenities,5)
    dist_to_amenity = amenity_df['geometry'].apply(lambda x: x.distance(interrogation_point))
    dist_to_amenity.sort_values(inplace=True)
    dist_to_amenity[:5]
    if len(dist_to_amenity) >= n:
        return dist_to_amenity[:n].mean()
    elif len(dist_to_amenity) == 0:
        return np.nan
    else:
        return dist_to_amenity.mean()

In [5]:
# Get map network for bike only
# graph = ox.graph_from_point(latlon, distance=dist, network_type='bike')
# graph = ox.graph_from_place(place, network_type='bike')

# Get intersection and streets from graph
# nodes, streets = ox.graph_to_gdfs(graph)

In [6]:
# Generate city info
city = ox.gdf_from_place(place)

# generate city grid
city_grid = gridify_polygon(city,0.01)


In [7]:
# get target locations for training
bike_rentals = ox.pois_from_place(place, amenities=['bicycle_rental'])


In [8]:
# get all amenities in place
amenity_names = ['animal_shelter', 'archive', 'arts_centre', 'atm', 'bank', 'bar', 'bench', 'bench;waste_basket',
                 'bicycle_parking', 'bicycle_repair_station', 'biergarten', 'bureau_de_change', 'bus_station', 'cafe',
                 'car_rental', 'car_sharing', 'car_wash', 'casino', 'charging_station', 'childcare', 'cinema',
                 'circus_school', 'clinic', 'clock', 'club', 'college', 'community_centre', 'compressed_air',
                 'conference_centre', 'courthouse', 'crematorium', 'dentist', 'device_charging_station', 'doctors',
                 'doctors_offices', 'drinking_water', 'embassy', 'events_venue', 'fast_food', 'ferry_terminal',
                 'fire_station', 'fountain', 'fuel', 'gallery', 'gambling', 'garden', 'grave_yard', 'grit_bin',
                 'hospital', 'ice_cream', 'jobcentre', 'kindergarten', 'language_school', 'left_luggage',
                 'library', 'life_boats', 'luggage_locker', 'marketplace', 'monastery', 'money_transfer',
                 'money_transfer; post_office', 'motorcycle_parking', 'music_school', 'music_venue', 'nightclub',
                 'nursing_home', 'parcel_lockers', 'parking', 'parking_entrance', 'parking_space', 'pharmacy',
                 'photo_booth', 'place_of_worship', 'place_of_worship;monastery', 'police', 'post_box', 'post_depot',
                 'post_office', 'prep_school', 'preschool', 'prison', 'pub', 'public_bath', 'public_bookcase',
                 'public_building', 'recycling', 'restaurant', 'restaurant;cafe', 'school', 'shelter',
                 'social_centre', 'social_facility', 'sport', 'stripclub', 'studio', 'swimming_pool', 'swingerclub',
                 'taxi', 'telephone', 'theatre', 'toilets', 'townhall', 'trailer_park', 'trade_school', 'university',
                 'vending_machine', 'venue', 'veterinary', 'waste_basket', 'water', 'water_fountain','yacht_club']

all_amenities = ox.pois_from_place(place, amenities=amenity_names)

# all_amenities = ox.pois_from_point([city.geometry.centroid.y,city.geometry.centroid.x],distance = 1000)


all_amenities['geometry'] = all_amenities.apply(lambda row: row['geometry'].centroid 
                                                if (type(row['geometry']) == Polygon) or (type(row['geometry']) == MultiPolygon)
                                                else row['geometry'], axis=1)



In [9]:
# Generate features dataframe by finding the count of each unique amenity in each region

# create new cleaned df that will store features and target data
cleaned_df = city_grid.copy()
cleaned_df = cleaned_df.reset_index()
cleaned_df['bike_rental_density'] = 0
cleaned_df = cleaned_df.reindex(cleaned_df.columns.tolist() + amenity_names, axis=1) 


# loop through grid points and populate features.
for index, row in cleaned_df.iterrows():
    grid_pt = cleaned_df.geometry.iloc[index]
    amenities_in_grid = amenity_in_polygon(all_amenities,grid_pt)
    
    # fill amenity rows with counts inside each polygon
    if len(amenities_in_grid) > 0:
        amenity_counts = amenities_in_grid['amenity'].value_counts()
        for val, cnt in amenity_counts.iteritems():
            # test if value is in list of features that are selected for ML model
            if val in amenity_names:
                cleaned_df[val].iloc[index] = cnt / cleaned_df.poly_area_km.iloc[index]

    # add target column for bike rentals
    bike_rentals_in_grid = amenity_in_polygon(bike_rentals,grid_pt)
    if len(bike_rentals_in_grid) > 0:
        cleaned_df['bike_rental_density'].iloc[index] = len(bike_rentals_in_grid) / cleaned_df.poly_area_km.iloc[index]
    else:
        cleaned_df['bike_rental_density'].iloc[index] = 0
    
# remove nan values
cleaned_df[amenity_names] = cleaned_df[amenity_names].fillna(0)
# remove unecessary columns
cleaned_df = cleaned_df.drop(columns = ['level_0','index'])
# relable as density 
new_names = [name + '_density' for name in amenity_names]
cleaned_df.rename(columns = dict(zip(amenity_names, new_names)), inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [15]:
bike_rentals.dtypes

osmid                         object
geometry                    geometry
amenity                       object
name                          object
element_type                  object
source                        object
capacity                      object
network                       object
operator                      object
website                       object
payment:credit_cards          object
wheelchair                    object
fee                           object
phone                         object
ref                           object
opening_hours                 object
fixme                         object
addr:street                   object
branch                        object
brand                         object
addr:city                     object
addr:housenumber              object
addr:postcode                 object
addr:state                    object
email                         object
hudson_bike_share:number      object
hudson_bike_share             object
p

In [16]:
# Export geopandas dataframes

cleaned_df.to_file(filename_cleaned_df, driver='GeoJSON')
city_grid.to_file(filename_grid, driver='GeoJSON')
all_amenities[['osmid','geometry','element_type','amenity']].to_file(filename_amenities, driver='GeoJSON')
bike_rentals[['osmid','geometry','amenity','capacity','name','network']].to_file(filename_bike_rentals, driver='GeoJSON')


In [None]:
## TODO: add street type count to cleaned_df as a feature
# this celll doesn't work with arbitrary city

# def get_street_type_counts(interrogation_poly)
# interrogation_poly = city_grid.geometry.iloc[40]
# iterrogation_graph = ox.graph_from_polygon(interrogation_poly)
# interrogation_nodes, interrogation_streets = ox.graph_to_gdfs(iterrogation_graph)

# interrogation_streets['highway'].value_counts()

In [18]:
m = folium.Map([city['geometry'].centroid.y, city['geometry'].centroid.x],
               zoom_start=11,
               tiles="CartoDb dark_matter")

style_city = {'color':'#ebc923 ', 'fillColor': '#ebc923 ', 'weight':'1', 'fillOpacity' : 0.1}
folium.GeoJson(city, style_function=lambda x: style_city).add_to(m)

# grid points
# locs = zip(city_grid['geometry'].centroid.y, city_grid['geometry'].centroid.x)
# for location in locs:
#     folium.CircleMarker(location=location, 
#         color = "white",   radius=1).add_to(m)

# style_region = {'color':'#1FFD09 ', 'fillColor': '#1FFD09 ', 'weight':'1', 'fillOpacity' : 0.1}
# folium.GeoJson(city_grid.geometry.iloc[39], style_function=lambda x: style_region).add_to(m)

# plot bikable streets
# style_street = {'color':'#ebc923 ', 'fillColor': '#ebc923 ', 'weight':'1', 'fillOpacity' : 1}
# m = folium.Map([city['geometry'].centroid.y, city['geometry'].centroid.x],
#                zoom_start=15,
#                tiles="CartoDb dark_matter")
# folium.GeoJson(streets, style_function=lambda x: style_street).add_to(m)

# add amenities
# locs = zip(all_amenities.geometry.y, all_amenities.geometry.x)
# for location in locs:
#     folium.CircleMarker(location=location, 
#         color = "red",   radius=1).add_to(m)

# add bike rentals
locs = zip(bike_rentals.geometry.centroid.y, bike_rentals.geometry.centroid.x)
for location in locs:
    folium.CircleMarker(location=location, 
        color = "yellow",   radius=2).add_to(m)

# m.save("city_view.html")
m

In [19]:
# # find next search area
search_place = 'Montreal, Quebec, Canada'
ox.pois_from_place(search_place, amenities=['bicycle_rental'])

Unnamed: 0,osmid,geometry,amenity,network,element_type,addr:city,addr:postcode,addr:street,capacity,operator,...,designation,payment:credit_cards,source,bicycle_parking,opening_hours,addr:housenumber,addr:province,website,fee,operator:type
282147657,282147657,POINT (-73.60192 45.55383),bicycle_rental,bixi,node,,,,,,...,,,,,,,,,,
286084222,286084222,POINT (-73.60254 45.55289),bicycle_rental,bixi,node,,,,,,...,,,,,,,,,,
286300184,286300184,POINT (-73.60365 45.53531),bicycle_rental,bixi,node,,,,,,...,,,,,,,,,,
427618247,427618247,POINT (-73.56334 45.49534),bicycle_rental,,node,,,,,,...,,,,,,,,,,
458689528,458689528,POINT (-73.55877 45.50085),bicycle_rental,Bixi,node,Montreal,H2Y 2H2,McGill Street,11,Bixi,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6847925587,6847925587,POINT (-73.56736 45.46108),bicycle_rental,,node,,,,,Bixi No 7058,...,,,,,,,,,,
6847925588,6847925588,POINT (-73.57141 45.46301),bicycle_rental,,node,,,,,Bixi,...,,,,,,,,,,
6847929285,6847929285,POINT (-73.57192 45.45942),bicycle_rental,,node,,,,,Bixi,...,,,,,,,,,,
6848697354,6848697354,POINT (-73.58616 45.53877),bicycle_rental,BIXI,node,,,,20,City of Montreal,...,,,,,,,,,yes,public
