In [1]:
import pandas as pd
import geopandas as gpd
import osmnx as ox
import folium
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.ops import nearest_points
from itertools import chain


In [2]:
## INPUTS
# Define region and radius of interest
place = 'Glasgow City, Scotland, UK' # Glasgow: [55.8642,-4.2518]
# place = 'San Francisco, California, USA' 

filename_cleaned_df = 'glasgow_feature_target_table.csv'

In [3]:
# Workaround to fix chrome issue where folium won't plot maps with a large number of layers
# See comment by dstein64 at: https://github.com/python-visualization/folium/issues/812

import base64
def _repr_html_(self, **kwargs):
    html = base64.b64encode(self.render(**kwargs).encode('utf8')).decode('utf8')
    onload = (
        'this.contentDocument.open();'
        'this.contentDocument.write(atob(this.getAttribute(\'data-html\')));'
        'this.contentDocument.close();'
    )
    if self.height is None:
        iframe = (
            '<div style="width:{width};">'
            '<div style="position:relative;width:100%;height:0;padding-bottom:{ratio};">'
            '<iframe src="about:blank" style="position:absolute;width:100%;height:100%;left:0;top:0;'
            'border:none !important;" '
            'data-html={html} onload="{onload}" '
            'allowfullscreen webkitallowfullscreen mozallowfullscreen>'
            '</iframe>'
            '</div></div>').format
        iframe = iframe(html=html, onload=onload, width=self.width, ratio=self.ratio)
    else:
        iframe = ('<iframe src="about:blank" width="{width}" height="{height}"'
                  'style="border:none !important;" '
                  'data-html={html} onload="{onload}" '
                  '"allowfullscreen" "webkitallowfullscreen" "mozallowfullscreen">'
                  '</iframe>').format
        iframe = iframe(html=html, onload=onload, width=self.width, height=self.height)
    return iframe

folium.branca.element.Figure._repr_html_ = _repr_html_


In [4]:
def gridify_polygon(poly,grid_spacing):
    # creates a cartesian grid inside polygon with the input grid_spacing
    # poly: polygon which we want a grid inside
    # grid_spacing: spaceing in lattitude/longitude degrees
    poly_xmin,poly_ymin,poly_xmax,poly_ymax = poly.geometry.total_bounds

    cols = list(np.arange(poly_xmin,poly_xmax+grid_spacing,grid_spacing))
    rows = list(np.arange(poly_ymin,poly_ymax+grid_spacing,grid_spacing))
    rows.reverse()

    polygons = []
    for x in cols:
        for y in rows:
            polygons.append( Polygon([(x,y), (x+grid_spacing, y), (x+grid_spacing, y-grid_spacing), (x, y-grid_spacing)]) )

    grid = gpd.GeoDataFrame({'geometry':polygons})
    grid['center_x'] = grid['geometry'].centroid.x
    grid['center_y'] = grid['geometry'].centroid.y
    grid['center'] = grid['geometry'].centroid

    grid['isin_poly'] = grid.apply(lambda row: row['center'].within(poly.geometry[0]), axis=1)
    poly_grid = grid[grid.isin_poly == True]
    poly_grid.crs = {'init': 'epsg:4326', 'no_defs': True}
    poly_grid = poly_grid.drop(['isin_poly'], axis = 1)
    
    # Calculate the polygon areas in km
    poly_grid_cart = poly_grid.copy()
    poly_grid_cart = poly_grid_cart.to_crs({'init': 'epsg:3857'})
    poly_grid_cart['poly_area_km'] = poly_grid_cart['geometry'].area/ 10**6
    # Store polygon area
    poly_grid['poly_area_km'] = poly_grid_cart['poly_area_km']
    
    # 
    poly_grid = poly_grid.reset_index()
    return poly_grid

def amenity_in_polygon(amenity_points,poly):
    # returns the amenities that are inside the given polygon
    # When there are zero amenities within the interrogation region, the function returns an empty dataframe as
    # as expected, but also prints out a lot of errors. not a huge issue but annoying.
    # Maybe implement a test for if empty, return 0
    # Example use:
    #         amenity_in_polygon(food_amenities,city_grid.geometry.iloc[38])
    
    # Generate boolean list of whether amenity is in polygon
    indices = amenity_points.apply(lambda row: row['geometry'].within(poly), axis=1)
    if not any(indices): # If all indices are false
#         print('No amenities in polygon')
        return pd.DataFrame(columns=['A']) # return empty dataframe (not sure what is best to output here )
    else:
        return amenity_points[amenity_points.apply(lambda row: row['geometry'].within(poly), axis=1)]

def avg_dist_to_amenities(interrogation_point,amenity_df,n):
    # calculates the mean distance of the n nearest amenities to the interrogation point
    # If there are less than n amenities in the search it'll just return the average of the known amenities.
    # Example: avg_dist_to_amenities(city_grid.geometry.iloc[39],food_amenities,5)
    dist_to_amenity = amenity_df['geometry'].apply(lambda x: x.distance(interrogation_point))
    dist_to_amenity.sort_values(inplace=True)
    dist_to_amenity[:5]
    if len(dist_to_amenity) >= n:
        return dist_to_amenity[:n].mean()
    elif len(dist_to_amenity) == 0:
        return np.nan
    else:
        return dist_to_amenity.mean()

In [5]:
# Get map network for bike only
# graph = ox.graph_from_point(latlon, distance=dist, network_type='bike')
graph = ox.graph_from_place(place, network_type='bike')

# Get intersection and streets from graph
nodes, streets = ox.graph_to_gdfs(graph)

In [6]:
# Generate city grid for interrogation
city = ox.gdf_from_place(place)
city['center_x'] = city['geometry'].centroid.x
city['center_y'] = city['geometry'].centroid.y
city_grid = gridify_polygon(city,0.01)
city_grid.head()

Unnamed: 0,index,geometry,center_x,center_y,center,poly_area_km
0,17,"POLYGON ((-4.38320 55.92128, -4.37320 55.92128...",-4.378201,55.916277,POINT (-4.37820 55.91628),2.211268
1,18,"POLYGON ((-4.38320 55.91128, -4.37320 55.91128...",-4.378201,55.906277,POINT (-4.37820 55.90628),2.210698
2,19,"POLYGON ((-4.38320 55.90128, -4.37320 55.90128...",-4.378201,55.896277,POINT (-4.37820 55.89628),2.210128
3,20,"POLYGON ((-4.38320 55.89128, -4.37320 55.89128...",-4.378201,55.886277,POINT (-4.37820 55.88628),2.209559
4,23,"POLYGON ((-4.38320 55.86128, -4.37320 55.86128...",-4.378201,55.856277,POINT (-4.37820 55.85628),2.207852


In [7]:
street_types = streets['highway'].value_counts()
street_types.columns = ["type", "count"]
street_categories = list(street_types.index)

In [8]:

city_grid['poly_area_km'].isna().sum() 

0

In [9]:
# get target locations for training
bike_rentals = ox.pois_from_place(place, amenities=['bicycle_rental'])

# # get amentities for place
# bike_features = ["bicycle_parking",'bicycle_repair_station']
# bike_amenities = ox.pois_from_place(place, amenities=bike_features)

# food_features = ['cafe']
# food_amenities = ox.pois_from_place(place, amenities=food_features)


# # Convert all amenities to points if they are polygons
# bike_rentals['geometry'] = bike_rentals.apply(lambda row: row['geometry'].centroid if type(row['geometry']) == Polygon 
#                      else row['geometry'], axis=1)
# bike_amenities['geometry'] = bike_amenities.apply(lambda row: row['geometry'].centroid if type(row['geometry']) == Polygon 
#                      else row['geometry'], axis=1)
# food_amenities['geometry'] = food_amenities.apply(lambda row: row['geometry'].centroid if type(row['geometry']) == Polygon 
#                      else row['geometry'], axis=1)


In [10]:
# get all amenities in place
amenity_names = ['pharmacy', 'recycling', 'place_of_worship', 'post_box', 'library', 'post_office', 'parking', 'fuel',
                 'bank', 'pub', 'telephone', 'toilets', 'taxi', 'bicycle_parking', 'motorcycle_parking', 'fast_food',
                 'bar', 'life_boats', 'restaurant', 'arts_centre', 'music_venue', 'nightclub', 'cafe', 'atm',
                 'community_centre', 'jobcentre', 'doctors', 'cinema', 'grave_yard', 'police', 'bus_station',
                 'theatre', 'bureau_de_change', 'hospital', 'bench', 'school', 'courthouse', 'ice_cream', 'fountain',
                 'left_luggage', 'drinking_water', 'casino', 'car_rental', 'car_wash', 'ferry_terminal', 'dentist',
                 'townhall', 'shelter', 'parking_entrance', 'conference_centre', 'marketplace', 'vending_machine',
                 'waste_basket', 'clock', 'studio', 'veterinary', 'gallery', 'gambling', 'kindergarten',
                 'social_facility', 'charging_station', 'car_sharing', 'clinic', 'water', 'compressed_air',
                 'public_building', 'social_centre', 'childcare', 'grit_bin', 'bicycle_repair_station',
                 'events_venue', 'embassy', 'college', 'circus_school', 'parcel_lockers', 'money_transfer',
                 'photo_booth', 'luggage_locker', 'university', 'venue', 'swimming_pool', 'fire_station',
                 'post_depot', 'crematorium', 'sport', 'nursing_home', 'biergarten', 'garden', 'prison', 'club',
                 'parking_space', 'trailer_park', 'archive', 'monastery']

all_amenities = amenities = ox.pois_from_place(place, amenities=amenity_names)
all_amenities['geometry'] = all_amenities.apply(lambda row: row['geometry'].centroid 
                                                if (type(row['geometry']) == Polygon) or (type(row['geometry']) == MultiPolygon)
                                                else row['geometry'], axis=1)



In [11]:
# Generate features dataframe by finding the count of each unique amenity in each region

# get list of unique amenity names
# amenity_names = list(all_amenities['amenity'].unique())
# amenity_names = [x for x in amenity_names if str(x) != 'nan']

# create new cleaned df that will store features and target data
cleaned_df = city_grid.copy()
cleaned_df = cleaned_df.reset_index()
cleaned_df['bike_rental_density'] = 0
cleaned_df = cleaned_df.reindex(cleaned_df.columns.tolist() + amenity_names, axis=1) 

# loop through grid points and populate bike rental density



# loop through grid points and populate features.
for index, row in cleaned_df.iterrows():
    grid_pt = cleaned_df.geometry.iloc[index]
    amenities_in_grid = amenity_in_polygon(all_amenities,grid_pt)
    
    # fill amenity rows with counts inside each polygon
    if len(amenities_in_grid) > 0:
        amenity_counts = amenities_in_grid['amenity'].value_counts()
        for val, cnt in amenity_counts.iteritems():
            cleaned_df[val].iloc[index] = cnt / cleaned_df.poly_area_km.iloc[index]

    # add target column for bike rentals
    bike_rentals_in_grid = amenity_in_polygon(bike_rentals,grid_pt)
    if len(bike_rentals_in_grid) > 0:
        cleaned_df['bike_rental_density'].iloc[index] = len(bike_rentals_in_grid) / cleaned_df.poly_area_km.iloc[index]
    else:
        cleaned_df['bike_rental_density'].iloc[index] = 0
    
# remove nan values
cleaned_df[amenity_names] = cleaned_df[amenity_names].fillna(0)
# remove unecessary columns
cleaned_df = cleaned_df.drop(columns = ['level_0','index'])
# relable as density 
new_names = [name + '_density' for name in amenity_names]
cleaned_df.rename(columns = dict(zip(amenity_names, new_names)), inplace=True)

cleaned_df.head() 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,geometry,center_x,center_y,center,poly_area_km,bike_rental_density,pharmacy_density,recycling_density,place_of_worship_density,post_box_density,...,sport_density,nursing_home_density,biergarten_density,garden_density,prison_density,club_density,parking_space_density,trailer_park_density,archive_density,monastery_density
0,"POLYGON ((-4.38320 55.92128, -4.37320 55.92128...",-4.378201,55.916277,POINT (-4.37820 55.91628),2.211268,0.0,0.452229,0.0,1.356688,0.452229,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"POLYGON ((-4.38320 55.91128, -4.37320 55.91128...",-4.378201,55.906277,POINT (-4.37820 55.90628),2.210698,0.0,0.0,0.0,0.452346,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"POLYGON ((-4.38320 55.90128, -4.37320 55.90128...",-4.378201,55.896277,POINT (-4.37820 55.89628),2.210128,0.0,0.0,0.0,0.452462,0.452462,...,0.0,0.452462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"POLYGON ((-4.38320 55.89128, -4.37320 55.89128...",-4.378201,55.886277,POINT (-4.37820 55.88628),2.209559,0.0,0.452579,0.0,0.905158,1.357737,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"POLYGON ((-4.38320 55.86128, -4.37320 55.86128...",-4.378201,55.856277,POINT (-4.37820 55.85628),2.207852,0.0,0.0,0.0,0.0,0.452929,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
# cleaned_df.rename(columns = dict(zip(amenity_names, amenity_names + '_density')), inplace=True)


In [13]:
cleaned_df.to_csv(filename_cleaned_df,index=False)
city_g

In [14]:
## TODO: add street type count to cleaned_df as a feature
# this celll doesn't work with arbitrary city

# def get_street_type_counts(interrogation_poly)
# interrogation_poly = city_grid.geometry.iloc[40]
# iterrogation_graph = ox.graph_from_polygon(interrogation_poly)
# interrogation_nodes, interrogation_streets = ox.graph_to_gdfs(iterrogation_graph)

# interrogation_streets['highway'].value_counts()

In [15]:
m = folium.Map([city.center_y, city.center_x],
               zoom_start=11,
               tiles="CartoDb dark_matter")

style_city = {'color':'#ebc923 ', 'fillColor': '#ebc923 ', 'weight':'1', 'fillOpacity' : 0.1}
folium.GeoJson(city, style_function=lambda x: style_city).add_to(m)

# grid points
locs = zip(city_grid.center_y, city_grid.center_x)
for location in locs:
    folium.CircleMarker(location=location, 
        color = "white",   radius=1).add_to(m)

style_region = {'color':'#1FFD09 ', 'fillColor': '#1FFD09 ', 'weight':'1', 'fillOpacity' : 0.1}
folium.GeoJson(city_grid.geometry.iloc[39], style_function=lambda x: style_region).add_to(m)
# plot bikable streets
# m = folium.Map(latlon,
#                zoom_start=15,
#                tiles="CartoDb dark_matter")
# folium.GeoJson(streets, style_function=lambda x: style).add_to(m)

# add cafes
locs = zip(all_amenities.geometry.y, all_amenities.geometry.x)
for location in locs:
    folium.CircleMarker(location=location, 
        color = "red",   radius=1).add_to(m)

# add bike rentals
locs = zip(bike_rentals.geometry.y, bike_rentals.geometry.x)
for location in locs:
    folium.CircleMarker(location=location, 
        color = "yellow",   radius=2).add_to(m)

m.save("city_view.html")
m