In [1]:
import numpy as np
from scipy import stats
import pandas as pd
from matplotlib import pyplot as plt

import time
#from datetime import *
from calendar import timegm

import pickle

pd.set_option('display.max_columns',20)

file = '/Users/JiIn/Desktop/Data/201809-citibike-tripdata.csv'

df_citibike = pd.read_csv(file)

#, nrows = 10
#remove outliers outside of 3 stdev
df_citibike = df_citibike[(np.abs(stats.zscore(df_citibike['tripduration'])) < 2.5)] 

def StripDate(rawDateString):
    striptime = time.strptime(rawDateString,"%Y-%m-%d %H:%M:%S.%f")
    return striptime

def EpochDate(strip):
    epochtime = timegm(strip)
    return epochtime 
def Hour(striptime):
    hour = striptime.tm_hour
    return hour

df_citibike['startstrip'] = df_citibike.apply(lambda x: StripDate(x['starttime']), axis=1)
df_citibike['startepoch'] = df_citibike.apply(lambda x: EpochDate(x['startstrip']), axis =1)
df_citibike['starthour'] = df_citibike.apply(lambda x: Hour(x['startstrip']) ,axis=1)

df_citibike['stopstrip'] = df_citibike.apply(lambda x: StripDate(x['stoptime']), axis=1)
df_citibike['stopepoch'] = df_citibike.apply(lambda x: EpochDate(x['stopstrip']), axis =1)
df_citibike['stophour'] = df_citibike.apply(lambda x: Hour(x['stopstrip']) ,axis=1)



In [2]:
#extract start hour, station id, and frequency
df_start = df_citibike[['starthour', 'start station id', 'tripduration']]
df_start.set_index(['starthour', 'start station id'], inplace = True)
df_start = df_start.groupby(level = ['starthour', 'start station id']).count()
df_start = df_start.rename(index = str, columns = {'tripduration':'startfreq'})

In [3]:
df_stop = df_citibike[['stophour', 'end station id', 'tripduration']]
df_stop.set_index(['stophour', 'end station id'], inplace = True)
df_stop = df_stop.groupby(level = ['stophour', 'end station id']).count()
df_stop = df_stop.rename(index = str, columns = {'tripduration': 'stopfreq'})

In [4]:
def getAllUniqueStations(df):
    start_li = df['start station id']
    end_li = df['end station id']
    master_li = start_li.append(end_li)
    return master_li.unique()

In [5]:
#index by start and end station names 
df_start_station = df_citibike.set_index('start station id')
df_end_station = df_citibike.set_index('end station id')

In [6]:
#create a dictionary - search thru df indexed by station names and add to dict
station_list = getAllUniqueStations(df_citibike)
station_dict = {}
for i in range(len(station_list)):
    query = None
    if station_list[i] in df_start_station.index:
        query = df_start_station.loc[station_list[i],['start station longitude','start station latitude', 'start station name']]
    else:
        query = df_end_station.loc[station_list[i], ['end station longitude', 'end station latitude', 'end station name']]
    if(isinstance(query, pd.core.frame.DataFrame)):
        query = query.iloc[0]
    station_dict[station_list[i]] = query

In [9]:
import folium as fm
from matplotlib import pyplot as plt

#plot a map of Manhattan


#put circle markers for every end station
def createBubbleMap(data, hour, color):
    bubbleMap = fm.Map(location = [40.738, -73.98], zoom_start =13, tiles = 'CartoDB dark_matter')
    
    hour_str = str(hour)
    
    for index in data.loc[(hour_str,)].index:
        #print(index)
        MarkerScale = 200.0
        lat = station_dict[int(float(index))][1]
        long = station_dict[int(float(index))][0]
        name = station_dict[int(float(index))][2]
        marker = fm.CircleMarker(location = [lat,long], popup = name, color = color, radius = data.loc[(hour_str,index)][0]/MarkerScale)
        marker.add_to(bubbleMap)
    return(bubbleMap)    

def OverlapBubbleMap(hour, startcolor, endcolor):
    m3 = fm.Map(location = [40.738, -73.98], zoom_start =13, tiles = 'CartoDB dark_matter')
    
    hour_str = str(hour)
    
    for index in df_start.loc[(hour_str,)].index:
        MarkerScale = 200.0
        lat = station_dict[int(float(index))][1]
        long = station_dict[int(float(index))][0]
        name = station_dict[int(float(index))][2]
        marker = fm.CircleMarker(location = [lat,long], popup = name, color = startcolor, radius = df_start.loc[(hour_str,index)][0]/MarkerScale, fill_color = startcolor)
        marker.add_to(m3)
        
    for index in df_stop.loc[(hour_str,)].index:
        MarkerScale = 200.0
        lat = station_dict[int(float(index))][1]
        long = station_dict[int(float(index))][0]
        name = station_dict[int(float(index))][2]
        marker = fm.CircleMarker(location = [lat,long], popup = name, color = endcolor, radius = df_stop.loc[(hour_str,index)][0]/MarkerScale, fill_color = endcolor)
        marker.add_to(m3)
    
    return(m3)
    

#ZeroStart = StartBubbleMap(0,'crimson')
#ZeroStart = createBubbleMap(df_start, 0,'crimson')


    

Zero = OverlapBubbleMap(0,'yellow','aqua')
One = OverlapBubbleMap(1,'yellow','aqua')
Two = OverlapBubbleMap(2,'yellow','aqua')
Three = OverlapBubbleMap(3,'yellow','aqua')
Four = OverlapBubbleMap(4,'yellow','aqua')
Five = OverlapBubbleMap(5,'yellow','aqua')
Six = OverlapBubbleMap(6,'yellow','aqua')
Seven = OverlapBubbleMap(7,'yellow','aqua')
Eight = OverlapBubbleMap(8,'yellow','aqua')
Nine = OverlapBubbleMap(9,'yellow','aqua')
Ten = OverlapBubbleMap(10,'yellow','aqua')
Eleven = OverlapBubbleMap(11,'yellow','aqua')
Twelve = OverlapBubbleMap(12,'yellow','aqua')
Thirteen = OverlapBubbleMap(13,'yellow','aqua')
Fourteen = OverlapBubbleMap(14,'yellow','aqua')
Fifteen = OverlapBubbleMap(15,'yellow','aqua')
Sixteen = OverlapBubbleMap(16,'yellow','aqua')
Seventeen = OverlapBubbleMap(17,'yellow','aqua')
Eighteen = OverlapBubbleMap(18,'yellow','aqua')
Nineteen = OverlapBubbleMap(19,'yellow','aqua')
Twenty = OverlapBubbleMap(20,'yellow','aqua')
Twentyone = OverlapBubbleMap(21,'yellow','aqua')
Twentytwo = OverlapBubbleMap(22,'yellow','aqua')
Twentythree = OverlapBubbleMap(23,'yellow','aqua')



#NineStart = StartBubbleMap(9,'crimson')
#NineEnd = EndBubbleMap(9,'aqua')
#NineOver = OverlapBubbleMap(9,'crimson','aqua')

