In [31]:
## output bikeshare density for what area could support
## output bikeability based on strava data to decide if area is a good idea
## Assumes that bike shares are optimally placed and near optimal capacity
## use regularization in the training of the ML model

import pandas as pd
import geopandas as gpd
import osmnx as ox
import folium
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.ops import nearest_points
import branca.colormap as cm
from pprint import pprint
from sklearn.model_selection import RandomizedSearchCV
import joblib
from folium import plugins
import osm_functions as osf


from sklearn.model_selection import train_test_split 
from sklearn import metrics
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from folium import GeoJsonTooltip

In [8]:
## Start validation on unseen city, 
place = 'Portland, Oregon, USA'

# city_grid = gpd.read_file(filename_grid)
# all_amenities = gpd.read_file(filename_amenities)
# bike_rentals = gpd.read_file(filename_bike_rentals)

# Generate city grid for interrogation
city = ox.gdf_from_place(place)

folder_name = 'OSM_geo_data/'
city_grid = gpd.read_file(folder_name + 'portland_grid.geojson')
city_ft = gpd.read_file(folder_name + 'portland_feature_target_table.geojson')
city_bike_rentals = gpd.read_file(folder_name + 'portland_bike_rentals.geojson')
city_amenities = gpd.read_file(folder_name + 'portland_amenities.geojson')

In [9]:
loaded_model = joblib.load('best_RF_model.sav')

In [10]:
loaded_model.get_params()

{'bootstrap': False,
 'criterion': 'mse',
 'max_depth': 90,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 5,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 800,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [11]:
top_features = ['cafe_density', 'bar_density', 'bicycle_parking_density', 'fast_food_density',
                'bank_density', 'pharmacy_density', 'pub_density', 'atm_density', 'car_sharing_density',
                'theatre_density', 'post_office_density', 'drinking_water_density', 'school_density',
                'cinema_density', 'bench_density', 'motorcycle_parking_density', 'ice_cream_density',
                'recycling_density', 'college_density', 'toilets_density', 'arts_centre_density',
                'nightclub_density', 'library_density', 'taxi_density', 'marketplace_density',
                'community_centre_density', 'place_of_worship_density', 'waste_basket_density',
                'clinic_density', 'social_facility_density', 'fountain_density', 'bureau_de_change_density',
                'kindergarten_density', 'police_density', 'veterinary_density']

In [12]:
city_predict = loaded_model.predict(city_ft[top_features])
city_comparison = city_grid.copy()
city_comparison['bike_rental_density'] = city_ft['bike_rental_density'] 
city_comparison['RF_prediction'] = city_predict
city_comparison['bike_rental_diff'] = city_comparison['RF_prediction'] - city_comparison['bike_rental_density']

scale_factor = max(max(city_predict),max(city_comparison.bike_rental_density))
diff_factor = max(abs(city_comparison['bike_rental_diff']))


city_comparison['scaled_actual_density'] = city_ft['bike_rental_density'] / scale_factor
city_comparison['scaled_pred_density'] = city_comparison['RF_prediction'] / scale_factor


print('Random forest mean absolute error (MAE): %4.3f' % metrics.mean_absolute_error(city_comparison.bike_rental_density,city_comparison.RF_prediction))
print('Random forest mean squared error (MSE): %4.3f' % metrics.mean_squared_error(city_comparison.bike_rental_density,city_comparison.RF_prediction))
print('Random forest root mean squared error (RMSE): %4.3f' % np.sqrt(metrics.mean_squared_error(city_comparison.bike_rental_density,city_comparison.RF_prediction)))


Random forest mean absolute error (MAE): 0.207
Random forest mean squared error (MSE): 0.210
Random forest root mean squared error (RMSE): 0.458


In [13]:
# define dictionaries for opacity and colormaps

pred_dict = city_comparison['scaled_pred_density']
actual_dict = city_comparison['scaled_actual_density']
diff_dict = city_comparison['bike_rental_diff']

pred_opacity = {str(key): pred_dict[key]*0.5 for key in pred_dict.keys()}
actual_opacity = {str(key): actual_dict[key]*0.5 for key in actual_dict.keys()}
diff_opacity = {str(key): abs(diff_dict[key])/(diff_factor*2) for key in diff_dict.keys()}

colormap = cm.linear.RdBu_09.scale(-diff_factor,diff_factor)

diff_color = {str(key): colormap(diff_dict[key]) for key in diff_dict.keys()}




In [45]:
city_comparison.head()


Unnamed: 0,index,poly_area_km,geometry,bike_rental_density,RF_prediction,bike_rental_diff,scaled_actual_density,scaled_pred_density
0,6,1.771064,"POLYGON ((-122.83675 45.60254, -122.82675 45.6...",0.0,0.001791,0.001791,0.0,0.000291
1,30,1.771064,"POLYGON ((-122.82675 45.60254, -122.81675 45.6...",0.0,0.001791,0.001791,0.0,0.000291
2,53,1.77138,"POLYGON ((-122.81675 45.61254, -122.80675 45.6...",0.0,0.001791,0.001791,0.0,0.000291
3,54,1.771064,"POLYGON ((-122.81675 45.60254, -122.80675 45.6...",0.0,0.001791,0.001791,0.0,0.000291
4,77,1.77138,"POLYGON ((-122.80675 45.61254, -122.79675 45.6...",0.0,0.001791,0.001791,0.0,0.000291


In [47]:
# city_comparison.bike_rental_density.sum()
# city_comparison.poly_area_km.sum()
print('Total actual bike shares %04.2f' % (city_comparison.poly_area_km*city_comparison.bike_rental_density).sum())
print('Total predicted bike shares %04.2f' %(city_comparison.poly_area_km*city_comparison.RF_prediction).sum())
# city_amenities

Total actual bike shares 76.00
Total predicted bike shares 203.27


In [15]:
m = folium.Map([city.geometry.centroid.y, city.geometry.centroid.x],
               zoom_start=11,
               tiles="CartoDb positron")

style_city = {'color':'#ebc923 ', 'fillColor': '#ebc923 ', 'weight':'2', 'fillOpacity' : 0}
folium.GeoJson(city,
               style_function=lambda x: style_city,
               name='City Limit').add_to(m)

# Plot actual bike share density
folium.GeoJson(
    city_comparison['geometry'],
    name='Actual bike share density',
    show = False,
    style_function=lambda feature: {
        'fillColor': '#04d45b',
        'color': 'black',
        'weight': 0,
        'fillOpacity': actual_opacity[feature['id']]*0.5,
    }
).add_to(m)

# plot predictions of bike share density
folium.GeoJson(
    city_comparison['geometry'],
    name='Prediction: bike share density',
    show = False,
    style_function=lambda feature: {
        'fillColor': '#04d45b',
        'color': 'black',
        'weight': 0,
        'fillOpacity': pred_opacity[feature['id']],
    }
).add_to(m)

# Plot difference between predicted and actual
tooltip=GeoJsonTooltip(
    fields=["bike_rental_density", "RF_prediction"],
    aliases=["Bike share density:", "ML prediction:"],
    localize=True,
    sticky=False,
    labels=True,
#     style="""
#         background-color: #ffffff;
#         border: 2px solid black;
#         border-radius: 3px;
#         box-shadow: 1px;
#     """,
#     max_width=800,
)

folium.GeoJson(
    city_comparison,
    name='Difference: bike share density',
    tooltip=tooltip,
    style_function=lambda feature: {
        'fillColor': diff_color[feature['id']],
        'color': 'black',
        'weight': 0,
#         'fillOpacity': 0.75,
        'fillOpacity': diff_opacity[feature['id']],
    }
).add_to(m)

colormap.caption = 'Difference in actual vs predicted bike share density'
colormap.add_to(m)


folium.LayerControl().add_to(m)

# m.save("portland_validation.html")
m

In [16]:
print(sorted(zip(map(lambda x: round(x, 4), loaded_model.feature_importances_), top_features), 
             reverse=True))

[(0.1516, 'cafe_density'), (0.1479, 'bar_density'), (0.0826, 'fast_food_density'), (0.0818, 'bicycle_parking_density'), (0.0563, 'bank_density'), (0.0489, 'pharmacy_density'), (0.0451, 'pub_density'), (0.0389, 'theatre_density'), (0.0313, 'atm_density'), (0.0293, 'drinking_water_density'), (0.023, 'fountain_density'), (0.0215, 'school_density'), (0.0194, 'place_of_worship_density'), (0.0188, 'car_sharing_density'), (0.0181, 'nightclub_density'), (0.0169, 'post_office_density'), (0.0149, 'bench_density'), (0.0145, 'toilets_density'), (0.0132, 'college_density'), (0.0131, 'motorcycle_parking_density'), (0.0127, 'waste_basket_density'), (0.0114, 'library_density'), (0.0113, 'cinema_density'), (0.0089, 'arts_centre_density'), (0.0076, 'ice_cream_density'), (0.0076, 'clinic_density'), (0.0073, 'taxi_density'), (0.0066, 'kindergarten_density'), (0.0063, 'community_centre_density'), (0.0062, 'recycling_density'), (0.006, 'veterinary_density'), (0.0058, 'marketplace_density'), (0.0057, 'police

In [19]:
ft_dict = city_ft['car_rental_density']/max(city_ft['car_rental_density'])
ft_opacity = {str(key): ft_dict[key]*0.75 for key in ft_dict.keys()}

bp_dict = city_ft['bicycle_parking_density']/max(city_ft['bicycle_parking_density'])
bp_opacity = {str(key): bp_dict[key]*0.75 for key in bp_dict.keys()}

In [20]:

m = plugins.DualMap([city.geometry.centroid.y, city.geometry.centroid.x],
               zoom_start=11,
               tiles="CartoDb positron")


# folium.TileLayer('cartodbpositron').add_to(m.m2)
# folium.TileLayer('cartodbpositron').add_to(m)

style_city = {'color':'#ebc923 ', 'fillColor': '#ebc923 ', 'weight':'2', 'fillOpacity' : 0}
folium.GeoJson(city,
               style_function=lambda x: style_city,
               name='City Limit').add_to(m.m1)
folium.GeoJson(city,
               style_function=lambda x: style_city,
               name='City Limit').add_to(m.m2)

# Plot actual bike share density
folium.GeoJson(
    city_comparison['geometry'],
    name='Actual bike share density',
    show = True,
    style_function=lambda feature: {
        'fillColor': '#00c40a',
        'color': 'black',
        'weight': 0,
        'fillOpacity': actual_opacity[feature['id']]*0.5,
    }
).add_to(m.m1)

# plot predictions of bike share density
folium.GeoJson(
    city_comparison['geometry'],
    name='Prediction: bike share density',
    show = False,
    style_function=lambda feature: {
        'fillColor': '#00c40a',
        'color': 'black',
        'weight': 0,
        'fillOpacity': pred_opacity[feature['id']],
    }
).add_to(m.m1)

# Plot difference between predicted and actual
folium.GeoJson(
    city_comparison['geometry'],
    name='Difference: bike share density',
    style_function=lambda feature: {
        'fillColor': diff_color[feature['id']],
        'color': 'black',
        'weight': 0,
#         'fillOpacity': 0.75,
        'fillOpacity': diff_opacity[feature['id']],
    }
).add_to(m.m2)

# plot cafe density
folium.GeoJson(
    city_ft['geometry'],
    name='feature density',
    show = False,
    style_function=lambda feature: {
        'fillColor': '#5b03ff',
        'color': 'black',
        'weight': 0,
        'fillOpacity': ft_opacity[feature['id']],
    }
).add_to(m.m2)


folium.LayerControl(collapsed=True).add_to(m)
# m.save(os.path.join('results', 'Plugins_11.html'))
m