In [1]:
## output bikeshare density for what area could support
## output bikeability based on strava data to decide if area is a good idea
## Assumes that bike shares are optimally placed and near optimal capacity
## use regularization in the training of the ML model

import pandas as pd
import geopandas as gpd
import osmnx as ox
import folium
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.ops import nearest_points

from sklearn.model_selection import train_test_split 
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
# Workaround to fix chrome issue where folium won't plot maps with a large number of layers
# See comment by dstein64 at: https://github.com/python-visualization/folium/issues/812

import base64
def _repr_html_(self, **kwargs):
    html = base64.b64encode(self.render(**kwargs).encode('utf8')).decode('utf8')
    onload = (
        'this.contentDocument.open();'
        'this.contentDocument.write(atob(this.getAttribute(\'data-html\')));'
        'this.contentDocument.close();'
    )
    if self.height is None:
        iframe = (
            '<div style="width:{width};">'
            '<div style="position:relative;width:100%;height:0;padding-bottom:{ratio};">'
            '<iframe src="about:blank" style="position:absolute;width:100%;height:100%;left:0;top:0;'
            'border:none !important;" '
            'data-html={html} onload="{onload}" '
            'allowfullscreen webkitallowfullscreen mozallowfullscreen>'
            '</iframe>'
            '</div></div>').format
        iframe = iframe(html=html, onload=onload, width=self.width, ratio=self.ratio)
    else:
        iframe = ('<iframe src="about:blank" width="{width}" height="{height}"'
                  'style="border:none !important;" '
                  'data-html={html} onload="{onload}" '
                  '"allowfullscreen" "webkitallowfullscreen" "mozallowfullscreen">'
                  '</iframe>').format
        iframe = iframe(html=html, onload=onload, width=self.width, height=self.height)
    return iframe

folium.branca.element.Figure._repr_html_ = _repr_html_


In [3]:
folder_name = 'OSM_geo_data/'
root_name = 'chicago'
# place = 'Glasgow City, Scotland, UK'
place = 'Chicago, Illinois, USA'

filename_cleaned_df = folder_name + root_name + '_feature_target_table.geojson'
filename_grid = folder_name + root_name + '_grid.geojson'
filename_amenities = folder_name + root_name + '_amenities.geojson'
filename_bike_rentals = folder_name + root_name + '_bike_rentals.geojson'

df = gpd.read_file(filename_cleaned_df)
city_grid = gpd.read_file(filename_grid)
all_amenities = gpd.read_file(filename_amenities)
bike_rentals = gpd.read_file(filename_bike_rentals)

# Generate city grid for interrogation
city = ox.gdf_from_place(place)



In [4]:
# Define feature and target labels
features = ['pharmacy_density', 'recycling_density', 'place_of_worship_density', 'post_box_density',
            'library_density', 'post_office_density', 'parking_density', 'fuel_density', 'bank_density',
            'pub_density', 'telephone_density', 'toilets_density', 'taxi_density', 'bicycle_parking_density',
            'motorcycle_parking_density', 'fast_food_density', 'bar_density', 'life_boats_density',
            'restaurant_density', 'arts_centre_density', 'music_venue_density', 'nightclub_density',
            'cafe_density', 'atm_density', 'community_centre_density', 'jobcentre_density', 'doctors_density',
            'cinema_density', 'grave_yard_density', 'police_density', 'bus_station_density', 'theatre_density',
            'bureau_de_change_density', 'hospital_density', 'bench_density', 'school_density', 'courthouse_density',
            'ice_cream_density', 'fountain_density', 'left_luggage_density', 'drinking_water_density',
            'casino_density', 'car_rental_density', 'car_wash_density', 'ferry_terminal_density', 'dentist_density',
            'townhall_density', 'shelter_density', 'parking_entrance_density', 'conference_centre_density',
            'marketplace_density', 'vending_machine_density', 'waste_basket_density', 'clock_density',
            'studio_density', 'veterinary_density', 'gallery_density', 'gambling_density', 'kindergarten_density',
            'social_facility_density', 'charging_station_density', 'car_sharing_density', 'clinic_density',
            'water_density', 'compressed_air_density', 'public_building_density', 'social_centre_density',
            'childcare_density', 'grit_bin_density', 'bicycle_repair_station_density', 'events_venue_density',
            'embassy_density', 'college_density', 'circus_school_density', 'parcel_lockers_density',
            'money_transfer_density', 'photo_booth_density', 'luggage_locker_density', 'university_density',
            'venue_density', 'swimming_pool_density', 'fire_station_density', 'post_depot_density',
            'crematorium_density', 'sport_density', 'nursing_home_density', 'biergarten_density', 'garden_density',
            'prison_density', 'club_density', 'parking_space_density', 'trailer_park_density', 'archive_density',
            'monastery_density']
target = ['bike_rental_density']

In [5]:
# Split data into train and test
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [6]:
# train linear model
bike_share_LM = LinearRegression()
bike_share_LM.fit(X_train,y_train)

y_pred = bike_share_LM.predict(X_test)
# remove negative predictions
y_pred[y_pred <0] = 0


In [7]:
comparison = y_test.copy()
comparison['y_pred'] = y_pred



diff = comparison.y_pred - comparison.bike_rental_density
comparison['difference'] = diff
comparison['bike_share_score'] =  y_pred / max(y_pred)
# y_pred
comparison

# get grid index of positive bike share locations
bike_share_pos_ind = list(comparison.index)


In [8]:
m = folium.Map([city.geometry.centroid.y, city.geometry.centroid.x],
               zoom_start=11,
               tiles="CartoDb dark_matter")

style_city = {'color':'#ebc923 ', 'fillColor': '#ebc923 ', 'weight':'1', 'fillOpacity' : 0.1}
folium.GeoJson(city, style_function=lambda x: style_city).add_to(m)

# plot grid points
# locs = zip(city_grid['geometry'].centroid.y, city_grid['geometry'].centroid.x)
# for location in locs:
#     folium.CircleMarker(location=location, 
#         color = "white",   radius=1).add_to(m)

# plot predictions of bike demand
style_region = {'color':'#1FFD09 ', 'fillColor': '#1FFD09 ', 'weight':'1', 'fillOpacity' : 0.1}
for ind in bike_share_pos_ind:
    folium.Choropleth(city_grid.geometry[city_grid.index == ind], fill_color = '#1FFD09',
                      fill_opacity = comparison.bike_share_score.loc[ind] ).add_to(m)
    
# plot bikable streets
# m = folium.Map(latlon,
#                zoom_start=15,
#                tiles="CartoDb dark_matter")
# folium.GeoJson(streets, style_function=lambda x: style).add_to(m)

# add cafes
# locs = zip(all_amenities.geometry.y, all_amenities.geometry.x)
# for location in locs:
#     folium.CircleMarker(location=location, 
#         color = "red",   radius=1).add_to(m)

# add bike rentals
# locs = zip(bike_rentals.geometry.y, bike_rentals.geometry.x)
# for location in locs:
#     folium.CircleMarker(location=location, 
#         color = "yellow",   radius=2, fill=True).add_to(m)

# m.save("chicago_bike_demand.html")
m

In [9]:
all_amenities.head()

Unnamed: 0,osmid,element_type,amenity,geometry
0,20217109,node,ferry_terminal,POINT (-87.62252 41.88914)
1,20217237,node,ferry_terminal,POINT (-87.63177 41.88782)
2,20217428,node,,POINT (-87.63790 41.88221)
3,20217429,node,,POINT (-87.63789 41.88202)
4,20217442,node,ferry_terminal,POINT (-87.63778 41.87916)
