In [1]:
import numpy as np
import pandas as pd
from multiprocessing import Pool, cpu_count
import gc
import time
gc.enable()
# import seaborn as sns
# import matplotlib.pyplot as plt
import warnings
import re
import requests
# import folium
# import branca.colormap as cm
# import geopy
from tqdm import tqdm_notebook as tqdm
import json
import os
# import geojson
import datetime

warnings.filterwarnings('ignore')

## Load trip data

In [2]:
%%time
def _convert_to_dateobject(x):
    return datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")

if not os.path.exists('../data/raw_trip_datetime_2018_Q3.pk'):
    trip_df = pd.read_csv('../data/Divvy_Trips_2018_Q3.csv')
    trip_df['start_time_dtoj'] = trip_df.apply(lambda row: _convert_to_dateobject(row.start_time), axis=1)
    trip_df['end_time_dtoj'] = trip_df.apply(lambda row: _convert_to_dateobject(row.end_time), axis=1)
    
    trip_df.to_pickle('../data/raw_trip_datetime_2018_Q3.pk')
else:
    trip_df = pd.read_pickle('../data/raw_trip_datetime_2018_Q3.pk')

CPU times: user 692 ms, sys: 311 ms, total: 1 s
Wall time: 1.01 s


In [3]:
trip_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1513570 entries, 0 to 1513569
Data columns (total 14 columns):
trip_id              1513570 non-null int64
start_time           1513570 non-null object
end_time             1513570 non-null object
bikeid               1513570 non-null int64
tripduration         1513570 non-null object
from_station_id      1513570 non-null int64
from_station_name    1513570 non-null object
to_station_id        1513570 non-null int64
to_station_name      1513570 non-null object
usertype             1513570 non-null object
gender               1218574 non-null object
birthyear            1221990 non-null float64
start_time_dtoj      1513570 non-null datetime64[ns]
end_time_dtoj        1513570 non-null datetime64[ns]
dtypes: datetime64[ns](2), float64(1), int64(4), object(7)
memory usage: 161.7+ MB


## Load station info

In [4]:
%%time
# Load from preprocessed data
sd = pd.read_csv('../data/Divvy_Stations_2017_Q3Q4.csv')

CPU times: user 6.72 ms, sys: 6.44 ms, total: 13.2 ms
Wall time: 13 ms


In [5]:
sd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 585 entries, 0 to 584
Data columns (total 8 columns):
id             585 non-null int64
name           585 non-null object
city           585 non-null object
latitude       585 non-null float64
longitude      585 non-null float64
dpcapacity     585 non-null int64
online_date    585 non-null object
Unnamed: 7     0 non-null float64
dtypes: float64(3), int64(2), object(3)
memory usage: 36.6+ KB


## Select station and date

In [6]:
# Select a date
DAY_RANDOM_FLAG = False

if DAY_RANDOM_FLAG:
    dd = np.random.choice(range(1, 32), 1)[0]
    mm = np.random.choice([7, 8, 9], 1)[0] 
    if dd == 31 and mm == 9:
        dd = np.random.choice(range(1, 31), 1)[0]
else:
    dd = 2
    mm = 7
print('Month: {}, day: {}'.format(mm, dd))

Month: 7, day: 2


In [7]:
# Get the heavy demand station list for this day
SHOW_hot_station_list = True

def _get_hd_stn_lst(df):
    top_station_df = df[
        (df.start_time_dtoj.dt.day == dd) &
        (df.start_time_dtoj.dt.month == mm) 
    ].groupby(['from_station_id'])[['trip_id']]\
     .count().sort_values(by='trip_id', ascending=False)\
     .reset_index().head(100)

    if SHOW_hot_station_list:
        display(top_station_df)
    
    return top_station_df

In [8]:
STN_RANDOM_FLAG = True

if STN_RANDOM_FLAG:
    top_station_df = _get_hd_stn_lst(trip_df)
    st_id = np.random.choice(top_station_df.from_station_id.unique(), 1)[0]
else:
    st_id = 100
print('Station id: {}'.format(st_id))

Unnamed: 0,from_station_id,trip_id
0,35,518
1,192,364
2,77,312
3,177,268
4,268,239
5,85,228
6,91,220
7,287,215
8,43,213
9,283,191


Station id: 111


## Trip collection for a single day and a single location

In [9]:
## Helper functions

# Get net change for each trip
def _get_net(row):
    if row.incoming:
        return 1
    elif row.outgoing:
        return -1
    else:
        return 0

# Get exact time for bike rental/return for this station and then sort
def _get_time(row):
    if row.incoming:
        return row.end_time_dtoj
    elif row.outgoing:
        return row.start_time_dtoj
    else:
        return

In [10]:
# Filter trip day that meet this condition
def get_trip_trips(trip_df, dd, mm, st_id):
    daily_trip_details = trip_df[
        (trip_df.start_time_dtoj.dt.day == dd) &
        (trip_df.start_time_dtoj.dt.month == mm) &
        (
            (trip_df.from_station_id == st_id) | 
            (trip_df.to_station_id == st_id)
        )
    ][['trip_id', 'tripduration', 'from_station_id', 'to_station_id', 
       'usertype', 'gender', 'birthyear', 'start_time_dtoj', 'end_time_dtoj']]

    # Check if incoming or outgoing
    daily_trip_details['outgoing'] = daily_trip_details.from_station_id == st_id
    daily_trip_details['incoming'] = daily_trip_details.to_station_id == st_id
    daily_trip_details['net'] = daily_trip_details.apply(lambda x: _get_net(x), axis=1)

    daily_trip_details['time'] = daily_trip_details.apply(lambda x: _get_time(x), axis=1)
    daily_trip_details.sort_values(by='time', inplace=True)

    daily_trip_details['in_cum'] = -daily_trip_details['incoming'].cumsum()
    daily_trip_details['out_cum'] = daily_trip_details['outgoing'].cumsum()
    daily_trip_details['net_cum'] = -daily_trip_details['net'].cumsum()
    
    station_info = sd[sd.id == st_id]
    
    return daily_trip_details, station_info

## Plot as function of time

[Great example of make multi-type subplots](https://plot.ly/~empet/15130/mixed-2d-and-3d-subplots-forum/#/)

In [11]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from ipywidgets import widgets
import plotly.graph_objs as go
from plotly import tools

init_notebook_mode(connected=True)

In [12]:
import plotly
plotly.__version__

'3.10.0'

In [13]:
## Global setup for plotting
# API key
mapbox_access_token = 'pk.eyJ1IjoibWVydnluMTUyIiwiYSI6ImNqeHpkNWZmdjAxczUzY29hbHVoandyMnUifQ.D-botzm1Hr6Gjs8jqwD5VA'

# style dict
style_dict = {}
style_dict['color'] = {'in':'red', 'out':'orange', 'net':'blue'}
style_dict['name'] = {'in':'Return', 'out':'Rental', 'net':'Net Demand'}

In [14]:
## Function to get trip trend data
def _get_trip_trend_date(df, conf, vis=False):
    return go.Scatter(
        x=df.time,
        y=df[conf+'_cum'],
        name = style_dict['name'][conf],
        mode = 'lines+markers',
        line = dict(
            color = style_dict['color'][conf],
            
        ),
        visible=vis
    )

## Function to get map data
def _get_map_data(df, vis=False):
    return go.Scattermapbox(
        lat=[float(df.latitude)],
        lon=[float(df.longitude)],
        text=["<b>Station id</b>: {}\
               <br> <b>Latitude</b>: {:.4f} \
               <br> <b>Longitude</b>: {:.4f} \
               <br> <b>Station capacity</b>: {}\
               <br> <b>Station name</b>: <br> {}\
              ".format(
            int(df.id),
            float(df.latitude), 
            float(df.longitude),
            int(df.dpcapacity),
            str(df.name.to_string()), 
        )],
        mode='markers',
        hoverinfo = 'text',
        marker=go.scattermapbox.Marker(
            size=10,
            color='yellow',
        ),   
        subplot='mapbox',
        visible=vis,
        name='Station '+str(int(df.id)),
        showlegend=False,
    )

In [15]:
## Style of mixed plot
layout = {
    'title': {
        'text': 'Trend of bike rental and net demand',
        'font': dict(
            family='Droid Serif, serif',
            size=30,
            color='Black'
        ),
    },
    'yaxis': {
        'zeroline': False,
#         'showgrid': True,
        'title': "Number of bikes",
         'titlefont': dict(
            family='Arial, sans-serif',
            size=25,
            color='grey'
        ),
#         'range': [-100, 420],
        'domain': [0, 0.95],
        'tickangle': -45,
        'tickfont': dict(
            family='Old Standard TT, serif',
            size=14,
            color='black'
        ),
    },
    'xaxis': {
        'zeroline': True,
#         'showgrid': True,
        'domain': [0., 0.99],
        'tickangle': 0,
        'tickfont': dict(
            family='Old Standard TT, serif',
            size=14,
            color='black'
        ),
    },
    'mapbox': go.layout.Mapbox(
        accesstoken=mapbox_access_token,
        bearing=0,
        domain={'x': [0.05, 0.4], 'y': [0.55, 1]},
        center=go.layout.mapbox.Center(
            lat=41.89, 
            lon=-87.625
        ),
        pitch=60,
        zoom=11.5,
        style='dark',
#         style='mapbox://styles/mervyn152/cjy2i8m1y1s5s1cnxcto3gppa',
#         style = 'mapbox://styles/mervyn152/cjy2i8m1y1s5s1cnxcto3gppa'
    ),  
#     'paper_bgcolor': 'black',
    'showlegend': True,
    'autosize': True,
    'legend': dict(
        orientation="v", 
        x=0.5, 
        y=1,
        font=dict(
            size=16,
        ),
    ),
    'margin': go.layout.Margin(l=60, r=10, b=10, t=50, pad=6),
    'shapes': [
        go.layout.Shape(
            type="rect",
            xref="paper",
            yref="y",
            x0="0",
            y0=-35,
            x1="1",
            y1=35,
            fillcolor="lightgrey",
            opacity=0.5,
            layer="below",
            line_width=0,
        ),
        go.layout.Shape(
            type="line",
            xref="paper",
            yref="y",
            x0=0,
            y0=0,
            x1=1,
            y1=0,
            line=dict(
                color="black",
                width=0.5,
                dash='dot',
            ),
        ),
    ],
}

In [16]:
# Set date and station list
pm = 7
pd = 2

station_list = [192, 100, 35, 91, 56]
station_list = [192, 177, 100, 143]

In [17]:
## Get data
data = []

vis_flag = True
for st_id in station_list:
    daily_trip_details, station_info = get_trip_trips(trip_df, pd, pm, st_id)

    data.append(_get_map_data(station_info, vis=vis_flag))
    data.append(_get_trip_trend_date(daily_trip_details, 'out', vis=vis_flag))
    data.append(_get_trip_trend_date(daily_trip_details, 'net', vis=vis_flag))
    vis_flag = False

In [18]:
# Create button list
button_list = []
n_st = len(station_list)

blank = [False] * n_st *3

for i in range(n_st):
    vis_lst = blank.copy()
    vis_lst[i*3:i*3+3] = [True] * 3
    label_ = 'Station '+str(station_list[i])
    
    tmp_d = dict(
        args = [{'visible': vis_lst}],
        label = label_,
        method ='update'
    )
    button_list.append(tmp_d)

In [19]:
updatemenus=list([
    dict(
        buttons=button_list,
        direction = 'up',
        x = 0.82,
        xanchor = 'left',
        y = -0.18,
        yanchor = 'bottom',
        bgcolor = 'lightgrey',
        bordercolor = 'black',
        font = dict(size=11, color='black'),
        showactive=False,
    ),        
])

layout['updatemenus'] = updatemenus

In [20]:
figure = {}
figure['data'] = data
figure['layout'] = layout


SHOW = True

if SHOW:
    iplot(figure, config={'displayModeBar': False})
else:
    plot(figure, config={'displayModeBar': False}, filename="bike_trip_trend.html")