In [1]:
import pandas as pd
import numpy as np
import os
import requests
%matplotlib inline

### Get data for all quarters of 2017 into a single dataframe.

In [2]:
PATH = '2017-capitalbikeshare-tripdata'

data_2017=pd.DataFrame()
for csv in os.listdir(path=PATH):
    df=pd.read_csv(f'{PATH}/{csv}')
    data_2017 = pd.concat([data_2017, df], sort=True)

### Do our normal datetime feature engineering to get columns for month, day, hour, weekday, etc.

In [3]:
data_2017['Start date'] = pd.to_datetime(data_2017['Start date'])
data_2017['End date'] = pd.to_datetime(data_2017['End date'])
data_2017['Duration'] = data_2017['End date'] - data_2017['Start date']

data_2017['Month'] = data_2017['Start date'].dt.month
data_2017['Day'] = data_2017['Start date'].dt.day
data_2017['Hour'] = data_2017['Start date'].dt.hour
data_2017['Weekday'] = data_2017['Start date'].dt.weekday

### Let's also grab some extra data that contains latitude / longitude for each station, and merge it onto our main dataframe.

In [4]:
response = requests.get('https://gbfs.capitalbikeshare.com/gbfs/en/station_information.json')
stations = pd.DataFrame(response.json()['data']['stations'])

In [5]:
#only take the columns I'm really interested in
stations_short = stations.copy()[['lat', 'lon', 'name', 'capacity', 'short_name', 'station_id']]

In [6]:
#was originally a string, so just need to convert it to integer
stations_short['short_name'] = stations_short['short_name'].astype(np.int64)
df_merged = pd.merge(data_2017, stations_short, how='inner', left_on='Start station number', right_on='short_name')

In [11]:
import json

with open('washington.geojson', mode='r') as f:
    geojson = f.read()

In [12]:
geojson = json.loads(geojson)

In [18]:
def get_multipolygon_coordinates(gj):
    
    """From a given GeoJSON file (after it has been loaded into Python as a dictionary),
        build a dictionary where the keys are the name of the geographic region
        and the values are the coordinates outlining the multipolygon shape."""
        
    multipolygon_dict = {}
    
    for f in gj['features']:
        
        geo_name = f['properties']['name']
        
        if f['geometry']['type'] == 'MultiPolygon':
            multipolygon_dict[f"{geo_name}"] = f['geometry']['coordinates']
                          
        else:
            multipolygon_dict[f"{geo_name}"] = 'ERROR'
            print(f'{geo_name} not a valid GeoJSON MultiPolygon!')
    
    
    return multipolygon_dict

In [20]:
multipolygon_dict = get_multipolygon_coordinates(geojson)

In [21]:
from shapely.geometry import MultiPolygon, Point

In [22]:
def convert_to_multipolygon_objects(multipolygon_dict):
    
    """
       NOTE: This function builds off the previous one. 
       The output of the get_multipolygon_dict() function is the input to this one.
    
       For any given dictionary of multipolygon coordinates (e.g. created from the previous
       get_multipolygon_coordinates() function), this function converts
       the values of this dictionary into a valid MultiPolygon objects needed
       by the Shapely library.
       
       This eventually allows us to determine whether or not a given coordinate
       falls within a multipolygon shape.
    """
    
    for name, coordinate_list in multipolygon_dict.items():
        
        multipolygon = []
        
        #start reshaping the list of lists into the correct format
        for pg in coordinate_list: 
            shell_parameter = []
            holes_parameter = []

            shell_layer= pg[0]
            holes_layer = pg[1:]

            for pair in shell_layer:
                shell_parameter.append(tuple(pair))
            for pair in holes_parameter:
                holes_parameter.append(tuple(pair))

            polygon = (shell_parameter, holes_parameter)
            multipolygon.append(polygon)
            
        #overwrite the original value in the dictionary with the Shapely multipolygon object
        multipolygon_dict[name] = MultiPolygon(multipolygon) 
  
    return multipolygon_dict

In [24]:
multipolygon_dict = convert_to_multipolygon_objects(multipolygon_dict)

In [39]:
def get_neighborhood(coordinate_string):
    
    """For any given string of longitude and latitude values separated by blank space
       (see example format below), this function extracts the coordinates,
       converts them to Shapely Point objects, and tests to see in which geographical region
       (i.e. neighborhood) this point is located. More technically, it returns the name
       of the MultiPolygon that contains the Point.
       
       Example format: '-77.0024 38.87501'
       
       """
    
    #initialization
    neighborhood = 'Unknown' 
    
    lon = float(coordinate_string.split()[0])
    lat = float(coordinate_string.split()[1])
    
    point = Point(lon, lat) 
    mp_dict = get_multipolygon_coordinates(geojson)

    #overwrite coordinates with MultiPolygon objects
    mp_dict = convert_to_multipolygon_objects(mp_dict) 
    
    for name, multipolygon in mp_dict.items():
     
        if multipolygon.contains(point):
            
            neighborhood = name
 
            break #as soon as it finds a match, break out of the loop.
    
    return neighborhood

In [34]:
df_merged['lon_lat_coordinates'] = df_merged['lon'].astype(str) + ' ' + df_merged['lat'].astype(str)

In [40]:
get_neighborhood('-77.0024 38.87501')

'Capitol Hill'

In [43]:
# df['neighborhood'] = df_merged['lon_lat_coordinates'].apply(get_neighborhood)

In [50]:
bike_counts = df_merged.groupby(['lon_lat_coordinates'])[['Bike number']].count().reset_index().rename(columns={'Bike number': 'Count'})

In [52]:
bike_counts['neighborhood'] = bike_counts['lon_lat_coordinates'].apply(get_neighborhood)

In [56]:
bike_counts_final = bike_counts.groupby('neighborhood')[['Count']].sum().reset_index()

In [62]:
bike_counts_final.head()

Unnamed: 0,neighborhood,Count
0,Adams Morgan,125515
1,Anacostia,11615
2,Au-Tenleytown,7032
3,Brentwood,49630
4,Brightwood,6168


---

In [74]:
import folium 
#pip install folium


bike_map = folium.Map(location=[38.8867099, -77.0075084],
                    zoom_start=12,
                    tiles='CartoDB positron')

In [75]:
#color map
folium.Choropleth(
    geo_data = geojson,
    name = 'Count of Bikes',
    data = bike_counts_final,
    columns = ('neighborhood', 'Count'), # first col is the key, second col is the value
    key_on = 'properties.name', 
    #tell folium which values in the geojson file itself should be mapped to the the first column above
    fill_color = 'YlOrRd',
    nan_fill_color = '#ededed',
    fill_opacity = 0.5,
    line_opacity = 0.3,
    legend_name = 'Bike Counts 2017',
    highlight = True
).add_to(bike_map)

<folium.features.Choropleth at 0x1340edfd0>

In [72]:
geojson['features'][0]['properties']['name']

'Takoma Park'

In [76]:
bike_map

In [77]:
bike_map.save('bike_map_dc.html')