In [1]:
## output bikeshare density for what area could support
## output bikeability based on strava data to decide if area is a good idea
## Assumes that bike shares are optimally placed and near optimal capacity
## use regularization in the training of the ML model

import pandas as pd
import geopandas as gpd
import osmnx as ox
import folium
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.ops import nearest_points

from sklearn.model_selection import train_test_split 
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
def gridify_polygon(poly,grid_spacing):
    # creates a cartesian grid inside polygon with the input grid_spacing
    # poly: polygon which we want a grid inside
    # grid_spacing: spaceing in lattitude/longitude degrees
    poly_xmin,poly_ymin,poly_xmax,poly_ymax = poly.geometry.total_bounds

    cols = list(np.arange(poly_xmin,poly_xmax+grid_spacing,grid_spacing))
    rows = list(np.arange(poly_ymin,poly_ymax+grid_spacing,grid_spacing))
    rows.reverse()

    polygons = []
    for x in cols:
        for y in rows:
            polygons.append( Polygon([(x,y), (x+grid_spacing, y), (x+grid_spacing, y-grid_spacing), (x, y-grid_spacing)]) )

    grid = gpd.GeoDataFrame({'geometry':polygons})
    grid['center_x'] = grid['geometry'].centroid.x
    grid['center_y'] = grid['geometry'].centroid.y
    grid['center'] = grid['geometry'].centroid

    grid['isin_poly'] = grid.apply(lambda row: row['center'].within(poly.geometry[0]), axis=1)
    poly_grid = grid[grid.isin_poly == True]
    poly_grid.crs = {'init': 'epsg:4326', 'no_defs': True}
    poly_grid = poly_grid.drop(['isin_poly'], axis = 1)
    
    # Calculate the polygon areas in km
    poly_grid_cart = poly_grid.copy()
    poly_grid_cart = poly_grid_cart.to_crs({'init': 'epsg:3857'})
    poly_grid_cart['poly_area_km'] = poly_grid_cart['geometry'].area/ 10**6
    # Store polygon area
    poly_grid['poly_area_km'] = poly_grid_cart['poly_area_km']
    
    # 
    poly_grid = poly_grid.reset_index()
    return poly_grid

In [3]:
folder_name = 'OSM_geo_data/'
root_name = 'glasgow'
place = 'Glasgow City, Scotland, UK'

filename_cleaned_df = folder_name + root_name + '_feature_target_table.csv'
filename_grid = folder_name + root_name + '_grid.csv'
filename_amenities = folder_name + root_name + '_amenities.csv'
filename_bike_rentals = folder_name + root_name + '_bike_rentals.csv'


df = pd.read_csv(filename_cleaned_df)
# city_grid = pd.read_csv(filename_grid)
all_amenities = pd.read_csv(filename_amenities)

# Generate city grid for interrogation
city = ox.gdf_from_place(place)
city['center_x'] = city['geometry'].centroid.x
city['center_y'] = city['geometry'].centroid.y
city_grid = gridify_polygon(city,0.01)



  interactivity=interactivity, compiler=compiler, result=result)


In [4]:

# my_geo_df = gpd.GeoDataFrame(my_df, geometry='geometry')

In [5]:
# Define feature and target labels
features = ['pharmacy_density', 'recycling_density', 'place_of_worship_density', 'post_box_density',
            'library_density', 'post_office_density', 'parking_density', 'fuel_density', 'bank_density',
            'pub_density', 'telephone_density', 'toilets_density', 'taxi_density', 'bicycle_parking_density',
            'motorcycle_parking_density', 'fast_food_density', 'bar_density', 'life_boats_density',
            'restaurant_density', 'arts_centre_density', 'music_venue_density', 'nightclub_density',
            'cafe_density', 'atm_density', 'community_centre_density', 'jobcentre_density', 'doctors_density',
            'cinema_density', 'grave_yard_density', 'police_density', 'bus_station_density', 'theatre_density',
            'bureau_de_change_density', 'hospital_density', 'bench_density', 'school_density', 'courthouse_density',
            'ice_cream_density', 'fountain_density', 'left_luggage_density', 'drinking_water_density',
            'casino_density', 'car_rental_density', 'car_wash_density', 'ferry_terminal_density', 'dentist_density',
            'townhall_density', 'shelter_density', 'parking_entrance_density', 'conference_centre_density',
            'marketplace_density', 'vending_machine_density', 'waste_basket_density', 'clock_density',
            'studio_density', 'veterinary_density', 'gallery_density', 'gambling_density', 'kindergarten_density',
            'social_facility_density', 'charging_station_density', 'car_sharing_density', 'clinic_density',
            'water_density', 'compressed_air_density', 'public_building_density', 'social_centre_density',
            'childcare_density', 'grit_bin_density', 'bicycle_repair_station_density', 'events_venue_density',
            'embassy_density', 'college_density', 'circus_school_density', 'parcel_lockers_density',
            'money_transfer_density', 'photo_booth_density', 'luggage_locker_density', 'university_density',
            'venue_density', 'swimming_pool_density', 'fire_station_density', 'post_depot_density',
            'crematorium_density', 'sport_density', 'nursing_home_density', 'biergarten_density', 'garden_density',
            'prison_density', 'club_density', 'parking_space_density', 'trailer_park_density', 'archive_density',
            'monastery_density']
target = ['bike_rental_density']

In [6]:
# Split data into train and test
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [7]:
# train linear model
bike_share_LM = LinearRegression()
bike_share_LM.fit(X_train,y_train)
y_pred = bike_share_LM.predict(X_test)

y_pred[y_pred <0] = 0
print('Multivariate linear regression mean absolute error (MAE): %4.3f' % metrics.mean_absolute_error(np.expm1(y_test),np.expm1(y_pred)))

Multivariate linear regression mean absolute error (MAE): 1.145


In [8]:
m = folium.Map([city.center_y, city.center_x],
               zoom_start=11,
               tiles="CartoDb dark_matter")

style_city = {'color':'#ebc923 ', 'fillColor': '#ebc923 ', 'weight':'1', 'fillOpacity' : 0.1}
folium.GeoJson(city, style_function=lambda x: style_city).add_to(m)

# grid points
locs = zip(city_grid.center_y, city_grid.center_x)
for location in locs:
    folium.CircleMarker(location=location, 
        color = "white",   radius=1).add_to(m)

style_region = {'color':'#1FFD09 ', 'fillColor': '#1FFD09 ', 'weight':'1', 'fillOpacity' : 0.1}
folium.GeoJson(city_grid.geometry.iloc[39], style_function=lambda x: style_region).add_to(m)
# plot bikable streets
# m = folium.Map(latlon,
#                zoom_start=15,
#                tiles="CartoDb dark_matter")
# folium.GeoJson(streets, style_function=lambda x: style).add_to(m)

# add cafes
locs = zip(all_amenities.geometry.y, all_amenities.geometry.x)
for location in locs:
    folium.CircleMarker(location=location, 
        color = "red",   radius=1).add_to(m)

# add bike rentals
locs = zip(bike_rentals.geometry.y, bike_rentals.geometry.x)
for location in locs:
    folium.CircleMarker(location=location, 
        color = "yellow",   radius=2).add_to(m)

m.save("city_view.html")
m

AttributeError: 'Series' object has no attribute 'y'

In [None]:
# data={'y_test': y_test, 'y_pred': y_pred, 'difference': y_test-y_pred}

# # pd.DataFrame(data)
comparison = y_test.copy()
comparison['y_pred'] = y_pred
comparison['difference'] = y_test - y_pred
comparison

In [None]:
city_grid.geometry.iloc[39]


In [9]:
all_amenities.head()

Unnamed: 0,osmid,geometry,created_by,element_type,crossing,highway,amenity,postal_code,ref,dispensing,...,ref:seed,source:geometry,fee:amount,not:addr:postcode,construction:amenity,compressed_air,construction,noname,self_service,ways
0,2914008,POINT (-4.320671 55.896043),JOSM,node,,,,,,,...,,,,,,,,,,
1,25702791,POINT (-4.2401217 55.8630826),,node,traffic_signals,traffic_signals,,,,,...,,,,,,,,,,
2,25894913,POINT (-4.335318 55.9243337),,node,,,post_box,,,,...,,,,,,,,,,
3,25997235,POINT (-4.3265141 55.9263684),JOSM,node,,,post_box,,,,...,,,,,,,,,,
4,25997236,POINT (-4.3243375 55.9228994),JOSM,node,,,post_box,G61 3N,G61 1014,,...,,,,,,,,,,
