In [56]:
import folium
from folium import plugins
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import branca
import glob
import datetime


%matplotlib inline

In [57]:
files = glob.glob('data/JC-2018*.csv')
files

['data/JC-201801-citibike-tripdata.csv',
 'data/JC-201802-citibike-tripdata.csv',
 'data/JC-201803-citibike-tripdata.csv',
 'data/JC-201804-citibike-tripdata.csv',
 'data/JC-201805-citibike-tripdata.csv',
 'data/JC-201806-citibike-tripdata.csv',
 'data/JC-201807-citibike-tripdata.csv',
 'data/JC-201808-citibike-tripdata.csv',
 'data/JC-201809-citibike-tripdata.csv',
 'data/JC-201810-citibike-tripdata.csv',
 'data/JC-201811-citibike-tripdata.csv',
 'data/JC-201812-citibike-tripdata.csv']

In [58]:
df = pd.DataFrame()

for file in files:
    temp = pd.read_csv(file) #load each montly data csv
    temp.rename(str.lower, axis='columns', inplace = True) #standardize column names
    temp.columns = temp.columns.str.replace(" ","")
    df = df.append(temp) #combine into sinlge dataframe

df.reset_index(inplace = True) #create a new overall index

In [59]:
df.columns = df.columns.str.replace(" ","")


In [60]:
df.starttime = pd.to_datetime(df.starttime)
df.stoptime = pd.to_datetime(df.stoptime)
df['month'] = df['starttime'].dt.month

In [61]:
df.head()

Unnamed: 0,index,tripduration,starttime,stoptime,startstationid,startstationname,startstationlatitude,startstationlongitude,endstationid,endstationname,endstationlatitude,endstationlongitude,bikeid,usertype,birthyear,gender,month
0,0,932,2018-01-01 02:06:17.541,2018-01-01 02:21:50.027,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,31929,Subscriber,1992,1,1
1,1,550,2018-01-01 12:06:18.039,2018-01-01 12:15:28.443,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,31845,Subscriber,1969,2,1
2,2,510,2018-01-01 12:06:56.978,2018-01-01 12:15:27.810,3183,Exchange Place,40.716247,-74.033459,3199,Newport Pkwy,40.728745,-74.032108,31708,Subscriber,1946,1,1
3,3,354,2018-01-01 14:53:10.186,2018-01-01 14:59:05.096,3183,Exchange Place,40.716247,-74.033459,3267,Morris Canal,40.712419,-74.038526,31697,Subscriber,1994,1,1
4,4,250,2018-01-01 17:34:30.192,2018-01-01 17:38:40.984,3183,Exchange Place,40.716247,-74.033459,3639,Harborside,40.719252,-74.034234,31861,Subscriber,1991,1,1


In [62]:
station = df[['startstationname', 'startstationlongitude', 'startstationlatitude']]
station = station.drop_duplicates('startstationname')
station.reset_index(drop = True, inplace = True)
#station.head()

In [63]:
start = df.groupby('startstationname').size().reset_index().rename(columns={'startstationname': 'station', 0:'startcount'})
end = df.groupby('endstationname').size().reset_index().rename(columns={'endstationname': 'station', 0:'endcount'})
end = end[end['station'].isin(df.startstationname.unique())].reset_index(drop = True)
use = pd.merge(start, end)
use['total'] = use['startcount'] + use['endcount']


In [64]:
station.rename(columns={'startstationname': 'station', 'startstationlatitude': 'latitude','startstationlongitude': 'longitude' }, inplace = True)

In [65]:
station = pd.merge(station, use)

In [66]:
station.drop(50, inplace=True)

In [67]:
station.sort_values('station', inplace=True)

In [68]:
station.reset_index(drop=True, inplace=True)
station

Unnamed: 0,station,longitude,latitude,startcount,endcount,total
0,5 Corners Library,-74.059503,40.734961,2026,1751,3777
1,Astor Place,-74.071262,40.719282,2898,2946,5844
2,Baldwin at Montgomery,-74.064194,40.723659,4069,3464,7533
3,Bayside Park,-74.08208,40.698651,34,31,65
4,Bergen Ave,-74.071455,40.722104,2416,2105,4521
5,Bethune Center,-74.085931,40.704958,24,24,48
6,Brunswick & 6th,-74.050389,40.726012,8251,7297,15548
7,Brunswick St,-74.050656,40.724176,7383,6817,14200
8,Christ Hospital,-74.050444,40.734786,2420,1635,4055
9,City Hall,-74.043845,40.717733,8829,9469,18298


In [69]:
def my_color_function(feature):
    """Maps low values to green and hugh values to red."""
    if feature < station.total.quantile(.2):
        return '#FFF5EE'
    elif feature < station.total.quantile(.4):
        return '#FAD6A5'
    elif feature < station.total.quantile(.6):
        return '#FFB347'
    elif feature < station.total.quantile(.8):
        return '#FF8C00'
    else:
        return '#996515'

In [71]:
m = folium.Map([station['latitude'].mean(), station['longitude'].mean()],
               tiles='Stamen Toner',
               zoom_start =14)

# mark each station as a point
for index, row in station.iterrows():
    
    html="""
    {name}
    {total}
    """.format(name=row['station'], start = row['startcount'], end = row['endcount'], total = row['total'])
    
    iframe = branca.element.IFrame(html=html, width=200, height=150)

    folium.RegularPolygonMarker([row['latitude'], row['longitude']],
                        radius=12,
                        color= 'black',
                        fill_color= my_color_function(row['total']),
                        popup=html,
                        ).add_to(m)

stationArr = list()

for index, row in station.iterrows():
   x = [row['latitude'], row['longitude']]
   stationArr.append(x)

m.save('map2018.html')

m


In [50]:
start = df.groupby(['startstationname', 'month']).size().reset_index().rename(columns={'startstationname': 'station', 0:'startcount'})
end = df.groupby(['endstationname', 'month']).size().reset_index().rename(columns={'endstationname': 'station', 0:'endcount'})
end = end[end['station'].isin(df.startstationname.unique())].reset_index(drop = True)
monthly_use = pd.merge(start, end)
monthly_use['total'] = use['startcount'] + use['endcount']


In [51]:
months = monthly_use.station.value_counts()

In [19]:
months['Bethune Center'] = months['Bethune Center'] + 1

In [20]:
months['Bayside Park'] = months['Bayside Park'] + 1

In [21]:
months = pd.DataFrame(months)

In [22]:
months

Unnamed: 0,station
Harborside,12
Liberty Light Rail,12
Exchange Place,12
McGinley Square,12
Hamilton Park,12
Brunswick & 6th,12
Morris Canal,12
Jersey & 3rd,12
Jersey & 6th St,12
Dey St,12


In [23]:
months.drop('JCBS Depot', inplace=True)

In [24]:
months.reset_index(inplace = True)

In [25]:
months.rename(columns={'index': 'station', 'station':'months'}, inplace=True)

In [26]:
months.sort_values('station', inplace=True)

In [27]:
months.reset_index(drop=True,inplace = True)

In [28]:
months

Unnamed: 0,station,months
0,5 Corners Library,12
1,Astor Place,12
2,Baldwin at Montgomery,12
3,Bayside Park,5
4,Bergen Ave,9
5,Bethune Center,5
6,Brunswick & 6th,12
7,Brunswick St,12
8,Christ Hospital,12
9,City Hall,12


In [29]:
station['months'] = months['months']

In [30]:
station

Unnamed: 0,station,longitude,latitude,startcount,endcount,total,months
0,5 Corners Library,-74.059503,40.734961,2026,1751,3777,12
1,Astor Place,-74.071262,40.719282,2898,2946,5844,12
2,Baldwin at Montgomery,-74.064194,40.723659,4069,3464,7533,12
3,Bayside Park,-74.08208,40.698651,34,31,65,5
4,Bergen Ave,-74.071455,40.722104,2416,2105,4521,9
5,Bethune Center,-74.085931,40.704958,24,24,48,5
6,Brunswick & 6th,-74.050389,40.726012,8251,7297,15548,12
7,Brunswick St,-74.050656,40.724176,7383,6817,14200,12
8,Christ Hospital,-74.050444,40.734786,2420,1635,4055,12
9,City Hall,-74.043845,40.717733,8829,9469,18298,12


In [31]:
station['avgtotal'] = (station['total']/station['months']).round(1)
station['avgstart'] = (station['startcount']/station['months']).round(1)
station['avgend'] = (station['endcount']/station['months']).round(1)

In [32]:
station

Unnamed: 0,station,longitude,latitude,startcount,endcount,total,months,avgtotal,avgstart,avgend
0,5 Corners Library,-74.059503,40.734961,2026,1751,3777,12,314.8,168.8,145.9
1,Astor Place,-74.071262,40.719282,2898,2946,5844,12,487.0,241.5,245.5
2,Baldwin at Montgomery,-74.064194,40.723659,4069,3464,7533,12,627.8,339.1,288.7
3,Bayside Park,-74.08208,40.698651,34,31,65,5,13.0,6.8,6.2
4,Bergen Ave,-74.071455,40.722104,2416,2105,4521,9,502.3,268.4,233.9
5,Bethune Center,-74.085931,40.704958,24,24,48,5,9.6,4.8,4.8
6,Brunswick & 6th,-74.050389,40.726012,8251,7297,15548,12,1295.7,687.6,608.1
7,Brunswick St,-74.050656,40.724176,7383,6817,14200,12,1183.3,615.2,568.1
8,Christ Hospital,-74.050444,40.734786,2420,1635,4055,12,337.9,201.7,136.2
9,City Hall,-74.043845,40.717733,8829,9469,18298,12,1524.8,735.8,789.1


In [33]:
def my_color_function_avg(feature):
    """Maps low values to green and hugh values to red."""
    if feature < station.avgtotal.quantile(.2):
        return '#FFF5EE'
    elif feature < station.avgtotal.quantile(.4):
        return '#FAD6A5'
    elif feature < station.avgtotal.quantile(.6):
        return '#FFB347'
    elif feature < station.avgtotal.quantile(.8):
        return '#FF8C00'
    else:
        return '#996515'

In [34]:
m = folium.Map([station['latitude'].mean(), station['longitude'].mean()],
               tiles='Stamen Terrain',
               zoom_start =13)

# mark each station as a point
for index, row in station.iterrows():
    
    html="""
    <div style="display:flex; flex-direction: column; justify-content:center;">
    <h3 style="text-align:center; font-family:sans-serif; margin-bottom:5px">{name}</h3>
    <div style="font-family:sans-serif; font-weight: bold;">Average Monthly Use</div>
    <div style="font-family:sans-serif; margin:0">Avg Departures: {start} </div>
    <div style="font-family:sans-serif;">Avg Arrivals: {end}</div>
    <div style="font-family:sans-serif;">Avg Monthly Total: {total}</div>
    </div>
    """.format(name=row['station'], start = row['avgstart'], end = row['avgend'], total = row['avgtotal'])
    
    iframe = branca.element.IFrame(html=html, width=200, height=150)

    folium.RegularPolygonMarker([row['latitude'], row['longitude']],
                        radius=8,
                        color= 'black',
                        fill_color= my_color_function_avg(row['avgtotal']),
                        popup=folium.Popup(iframe, max_width=2000),
                        ).add_to(m)

stationArr = list()

#for index, row in station.iterrows():
#    x = [row['latitude'], row['longitude']]
#    stationArr.append(x)

m.save('map2018.html')

m
