In [1]:
## output bikeshare density for what area could support
## output bikeability based on strava data to decide if area is a good idea
## Assumes that bike shares are optimally placed and near optimal capacity
## use regularization in the training of the ML model

import pandas as pd
import geopandas as gpd
import osmnx as ox
import folium
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.model_selection import train_test_split 
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [32]:
root_name = 'glasgow'

filename_cleaned_df = root_name + '_feature_target_table.csv'
filename_grid = root_name + '_grid.csv'
filename_amenities = root_name + '_amenities.csv'

df = pd.read_csv(filename_cleaned_df)
city_grid = pd.read_csv(filename_grid)
all_amenities = pd.read_csv(filename_amenities)

df.head()

Unnamed: 0,geometry,center_x,center_y,center,poly_area_km,bike_rental_density,pharmacy_density,recycling_density,place_of_worship_density,post_box_density,...,sport_density,nursing_home_density,biergarten_density,garden_density,prison_density,club_density,parking_space_density,trailer_park_density,archive_density,monastery_density
0,"POLYGON ((-4.3832006 55.92127669999997, -4.373...",-4.378201,55.916277,POINT (-4.3782006 55.91627669999996),2.211268,0.0,0.452229,0.0,1.356688,0.452229,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"POLYGON ((-4.3832006 55.91127669999997, -4.373...",-4.378201,55.906277,POINT (-4.3782006 55.90627669999997),2.210698,0.0,0.0,0.0,0.452346,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"POLYGON ((-4.3832006 55.90127669999998, -4.373...",-4.378201,55.896277,POINT (-4.3782006 55.89627669999997),2.210128,0.0,0.0,0.0,0.452462,0.452462,...,0.0,0.452462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"POLYGON ((-4.3832006 55.89127669999998, -4.373...",-4.378201,55.886277,POINT (-4.3782006 55.88627669999997),2.209559,0.0,0.452579,0.0,0.905158,1.357737,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"POLYGON ((-4.3832006 55.86127669999998, -4.373...",-4.378201,55.856277,POINT (-4.3782006 55.85627669999998),2.207852,0.0,0.0,0.0,0.0,0.452929,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# Define feature and target labels
features = ['pharmacy_density', 'recycling_density', 'place_of_worship_density', 'post_box_density',
            'library_density', 'post_office_density', 'parking_density', 'fuel_density', 'bank_density',
            'pub_density', 'telephone_density', 'toilets_density', 'taxi_density', 'bicycle_parking_density',
            'motorcycle_parking_density', 'fast_food_density', 'bar_density', 'life_boats_density',
            'restaurant_density', 'arts_centre_density', 'music_venue_density', 'nightclub_density',
            'cafe_density', 'atm_density', 'community_centre_density', 'jobcentre_density', 'doctors_density',
            'cinema_density', 'grave_yard_density', 'police_density', 'bus_station_density', 'theatre_density',
            'bureau_de_change_density', 'hospital_density', 'bench_density', 'school_density', 'courthouse_density',
            'ice_cream_density', 'fountain_density', 'left_luggage_density', 'drinking_water_density',
            'casino_density', 'car_rental_density', 'car_wash_density', 'ferry_terminal_density', 'dentist_density',
            'townhall_density', 'shelter_density', 'parking_entrance_density', 'conference_centre_density',
            'marketplace_density', 'vending_machine_density', 'waste_basket_density', 'clock_density',
            'studio_density', 'veterinary_density', 'gallery_density', 'gambling_density', 'kindergarten_density',
            'social_facility_density', 'charging_station_density', 'car_sharing_density', 'clinic_density',
            'water_density', 'compressed_air_density', 'public_building_density', 'social_centre_density',
            'childcare_density', 'grit_bin_density', 'bicycle_repair_station_density', 'events_venue_density',
            'embassy_density', 'college_density', 'circus_school_density', 'parcel_lockers_density',
            'money_transfer_density', 'photo_booth_density', 'luggage_locker_density', 'university_density',
            'venue_density', 'swimming_pool_density', 'fire_station_density', 'post_depot_density',
            'crematorium_density', 'sport_density', 'nursing_home_density', 'biergarten_density', 'garden_density',
            'prison_density', 'club_density', 'parking_space_density', 'trailer_park_density', 'archive_density',
            'monastery_density']
target = ['bike_rental_density']

In [8]:
# Split data into train and test
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [25]:
# train linear model
bike_share_LM = LinearRegression()
bike_share_LM.fit(X_train,y_train)
y_pred = bike_share_LM.predict(X_test)

y_pred[y_pred <0] = 0
print('Multivariate linear regression mean absolute error (MAE): %4.3f' % metrics.mean_absolute_error(np.expm1(y_test),np.expm1(y_pred)))

Multivariate linear regression mean absolute error (MAE): 2.333


In [24]:
m = folium.Map([city.center_y, city.center_x],
               zoom_start=11,
               tiles="CartoDb dark_matter")

style_city = {'color':'#ebc923 ', 'fillColor': '#ebc923 ', 'weight':'1', 'fillOpacity' : 0.1}
folium.GeoJson(city, style_function=lambda x: style_city).add_to(m)

# grid points
locs = zip(city_grid.center_y, city_grid.center_x)
for location in locs:
    folium.CircleMarker(location=location, 
        color = "white",   radius=1).add_to(m)

style_region = {'color':'#1FFD09 ', 'fillColor': '#1FFD09 ', 'weight':'1', 'fillOpacity' : 0.1}
folium.GeoJson(city_grid.geometry.iloc[39], style_function=lambda x: style_region).add_to(m)
# plot bikable streets
# m = folium.Map(latlon,
#                zoom_start=15,
#                tiles="CartoDb dark_matter")
# folium.GeoJson(streets, style_function=lambda x: style).add_to(m)

# add cafes
locs = zip(all_amenities.geometry.y, all_amenities.geometry.x)
for location in locs:
    folium.CircleMarker(location=location, 
        color = "red",   radius=1).add_to(m)

# add bike rentals
locs = zip(bike_rentals.geometry.y, bike_rentals.geometry.x)
for location in locs:
    folium.CircleMarker(location=location, 
        color = "yellow",   radius=2).add_to(m)

m.save("city_view.html")
m

numpy.ndarray

In [26]:
# data={'y_test': y_test, 'y_pred': y_pred, 'difference': y_test-y_pred}

# # pd.DataFrame(data)
comparison = y_test.copy()
comparison['y_pred'] = y_pred
comparison['difference'] = y_test - y_pred
comparison

Unnamed: 0,bike_rental_density,y_pred,difference
53,0.0,0.0,0.0
13,0.0,0.0,0.0
144,0.0,0.0,0.0
94,0.0,0.002599,-0.002599
158,0.0,0.0,0.0
71,0.0,0.14429,-0.14429
85,0.0,0.0,0.0
35,0.0,0.0,0.0
3,0.0,0.0,0.0
118,0.0,0.752071,-0.752071
