In [1]:
## output bikeshare density for what area could support
## output bikeability based on strava data to decide if area is a good idea
## Assumes that bike shares are optimally placed and near optimal capacity
## use regularization in the training of the ML model

import pandas as pd
import pandas_profiling
import geopandas as gpd
import osmnx as ox
import folium
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.ops import nearest_points
import branca.colormap as cm
from pprint import pprint
from sklearn.model_selection import RandomizedSearchCV
import joblib
from folium import plugins

from sklearn.model_selection import train_test_split 
from sklearn import metrics
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
## Start validation on unseen city, 
place = 'Portland, Oregon, USA'

# city_grid = gpd.read_file(filename_grid)
# all_amenities = gpd.read_file(filename_amenities)
# bike_rentals = gpd.read_file(filename_bike_rentals)

# Generate city grid for interrogation
city = ox.gdf_from_place(place)

folder_name = 'OSM_geo_data/'
portland_grid = gpd.read_file(folder_name + 'portland_grid.geojson')
portland_ft = gpd.read_file(folder_name + 'portland_feature_target_table.geojson')
portland_bike_rentals = gpd.read_file(folder_name + 'portland_bike_rentals.geojson')
portland_amenities = gpd.read_file(folder_name + 'portland_amenities.geojson')

In [3]:
loaded_model = joblib.load('best_RF_model.sav')

In [4]:
top_features = ['cafe_density', 'bar_density', 'bicycle_parking_density', 'fast_food_density',
                'bank_density', 'pharmacy_density', 'pub_density', 'atm_density', 'car_sharing_density',
                'theatre_density', 'post_office_density', 'drinking_water_density', 'school_density',
                'cinema_density', 'bench_density', 'motorcycle_parking_density', 'ice_cream_density',
                'recycling_density', 'college_density', 'toilets_density', 'arts_centre_density',
                'nightclub_density', 'library_density', 'taxi_density', 'marketplace_density',
                'community_centre_density', 'place_of_worship_density', 'waste_basket_density',
                'clinic_density', 'social_facility_density', 'fountain_density', 'bureau_de_change_density',
                'kindergarten_density', 'police_density', 'veterinary_density', 'parking_density',
                'university_density', 'parking_entrance_density', 'childcare_density', 'hospital_density',
                'car_rental_density', 'vending_machine_density', 'dentist_density', 'bus_station_density']



In [5]:
portland_predict = loaded_model.predict(portland_ft[top_features])
portland_comparison = portland_grid.copy()
portland_comparison['bike_rental_density'] = portland_ft['bike_rental_density'] 
portland_comparison['RF_prediction'] = portland_predict
portland_comparison['bike_rental_diff'] = portland_comparison['RF_prediction'] - portland_comparison['bike_rental_density']
# tes
portland_comparison

scale_factor = max(max(portland_predict),max(portland_comparison.bike_rental_density))

portland_comparison['scaled_actual_density'] = portland_ft['bike_rental_density'] / scale_factor
portland_comparison['scaled_pred_density'] = portland_comparison['RF_prediction'] / scale_factor

print('Random forest mean absolute error (MAE): %4.3f' % metrics.mean_absolute_error(portland_comparison.bike_rental_density,portland_comparison.RF_prediction))
print('Random forest mean squared error (MSE): %4.3f' % metrics.mean_squared_error(portland_comparison.bike_rental_density,portland_comparison.RF_prediction))
print('Random forest root mean squared error (RMSE): %4.3f' % np.sqrt(metrics.mean_squared_error(portland_comparison.bike_rental_density,portland_comparison.RF_prediction)))


Random forest mean absolute error (MAE): 0.185
Random forest mean squared error (MSE): 0.176
Random forest root mean squared error (RMSE): 0.419


In [42]:
# define dictionaries for opacity and colormaps
pred_dict = portland_comparison['scaled_pred_density']
actual_dict = portland_comparison['scaled_actual_density']
diff_dict = portland_comparison['bike_rental_diff']


pred_opacity = {str(key): pred_dict[key]*0.75 for key in pred_dict.keys()}
actual_opacity = {str(key): actual_dict[key]*0.75 for key in actual_dict.keys()}
diff_opacity = {str(key): abs(diff_dict[key])/(scale_factor/2) for key in diff_dict.keys()}

colormap = cm.linear.RdBu_09.scale(-scale_factor,scale_factor)
# colormap = cm.LinearColormap(colors=['yellow','white','green'],vmin=0,vmax=scale_factor)

diff_color = {str(key): colormap(diff_dict[key]) for key in diff_dict.keys()}
colormap


cafe_dict = portland_ft['cafe_density']/max(portland_ft['cafe_density'])
cafe_opacity = {str(key): cafe_dict[key]*0.75 for key in cafe_dict.keys()}

bp_dict = portland_ft['bicycle_parking_density']/max(portland_ft['bicycle_parking_density'])
bp_opacity = {str(key): bp_dict[key]*0.75 for key in bp_dict.keys()}


In [43]:
m = folium.Map([city.geometry.centroid.y, city.geometry.centroid.x],
               zoom_start=11,
               tiles="CartoDb positron")

style_city = {'color':'#ebc923 ', 'fillColor': '#ebc923 ', 'weight':'2', 'fillOpacity' : 0}
folium.GeoJson(city,
               style_function=lambda x: style_city,
               name='City Limit').add_to(m)

# Plot actual bike share density
folium.GeoJson(
    portland_comparison['geometry'],
    name='Actual bike share density',
    show = False,
    style_function=lambda feature: {
        'fillColor': '#00c40a',
        'color': 'black',
        'weight': 0,
        'fillOpacity': actual_opacity[feature['id']]*0.5,
    }
).add_to(m)

# plot predictions of bike share density
folium.GeoJson(
    portland_comparison['geometry'],
    name='Prediction: bike share density',
    show = False,
    style_function=lambda feature: {
        'fillColor': '#00c40a',
        'color': 'black',
        'weight': 0,
        'fillOpacity': pred_opacity[feature['id']],
    }
).add_to(m)

# Plot difference between predicted and actual
folium.GeoJson(
    portland_comparison['geometry'],
    name='Difference: bike share density',
    style_function=lambda feature: {
        'fillColor': diff_color[feature['id']],
        'color': 'black',
        'weight': 0,
#         'fillOpacity': 0.75,
        'fillOpacity': diff_opacity[feature['id']],
    }
).add_to(m)

# plot cafe density
folium.GeoJson(
    portland_ft['geometry'],
    name='amenity density',
    show = False,
    style_function=lambda feature: {
        'fillColor': '#0088ff',
        'color': 'black',
        'weight': 0,
        'fillOpacity': bp_opacity[feature['id']],
    }
).add_to(m)


colormap.caption = 'Difference in bike density prediction'
colormap.add_to(m)


folium.LayerControl().add_to(m)

# m.save("portland_validation.html")
m

In [8]:
print(sorted(zip(map(lambda x: round(x, 4), loaded_model.feature_importances_), top_features), 
             reverse=True))

[(0.1399, 'cafe_density'), (0.1315, 'bar_density'), (0.0715, 'fast_food_density'), (0.0689, 'bicycle_parking_density'), (0.0561, 'bank_density'), (0.051, 'pharmacy_density'), (0.0457, 'pub_density'), (0.0348, 'parking_density'), (0.0314, 'theatre_density'), (0.0286, 'drinking_water_density'), (0.0265, 'atm_density'), (0.0199, 'fountain_density'), (0.0195, 'school_density'), (0.0193, 'car_sharing_density'), (0.0177, 'place_of_worship_density'), (0.0166, 'nightclub_density'), (0.016, 'post_office_density'), (0.0137, 'bench_density'), (0.0131, 'college_density'), (0.0127, 'toilets_density'), (0.0116, 'motorcycle_parking_density'), (0.0115, 'waste_basket_density'), (0.0103, 'cinema_density'), (0.0097, 'library_density'), (0.008, 'bus_station_density'), (0.0078, 'parking_entrance_density'), (0.0075, 'dentist_density'), (0.0075, 'arts_centre_density'), (0.0071, 'taxi_density'), (0.0071, 'clinic_density'), (0.0068, 'recycling_density'), (0.0066, 'university_density'), (0.0065, 'marketplace_de

In [27]:

m = plugins.DualMap([city.geometry.centroid.y, city.geometry.centroid.x],
               zoom_start=11,
               tiles="CartoDb positron")


# folium.TileLayer('cartodbpositron').add_to(m.m2)
# folium.TileLayer('cartodbpositron').add_to(m)

style_city = {'color':'#ebc923 ', 'fillColor': '#ebc923 ', 'weight':'2', 'fillOpacity' : 0}
folium.GeoJson(city,
               style_function=lambda x: style_city,
               name='City Limit').add_to(m.m1)
folium.GeoJson(city,
               style_function=lambda x: style_city,
               name='City Limit').add_to(m.m2)

# Plot actual bike share density
folium.GeoJson(
    portland_comparison['geometry'],
    name='Actual bike share density',
    show = True,
    style_function=lambda feature: {
        'fillColor': '#00c40a',
        'color': 'black',
        'weight': 0,
        'fillOpacity': actual_opacity[feature['id']]*0.5,
    }
).add_to(m.m1)

# plot predictions of bike share density
folium.GeoJson(
    portland_comparison['geometry'],
    name='Prediction: bike share density',
    show = False,
    style_function=lambda feature: {
        'fillColor': '#00c40a',
        'color': 'black',
        'weight': 0,
        'fillOpacity': pred_opacity[feature['id']],
    }
).add_to(m.m2)

# Plot difference between predicted and actual
folium.GeoJson(
    portland_comparison['geometry'],
    name='Difference: bike share density',
    style_function=lambda feature: {
        'fillColor': diff_color[feature['id']],
        'color': 'black',
        'weight': 0,
#         'fillOpacity': 0.75,
        'fillOpacity': diff_opacity[feature['id']],
    }
).add_to(m.m2)

# plot cafe density
folium.GeoJson(
    portland_ft['geometry'],
    name='cafe density',
    show = False,
    style_function=lambda feature: {
        'fillColor': '#0088ff',
        'color': 'black',
        'weight': 0,
        'fillOpacity': cafe_opacity[feature['id']],
    }
).add_to(m.m1)




folium.LayerControl(collapsed=False).add_to(m)
# m.save(os.path.join('results', 'Plugins_11.html'))
m