In [2]:
# Major version: the feature setup
# Minor version: model hypertunning
VERSION = 'v3.0'
major_VERSION = VERSION.split('.')[0]+'.0'

In [52]:
import pandas as pd
from fbprophet import Prophet
import os
import plotly
from tqdm import tqdm_notebook as tqdm

In [46]:
import plotly.graph_objects as go
from plotly.offline import plot, iplot

In [16]:
plotly.__version__

'4.0.0'

# Load preprocessed data

In [4]:
%%time
## load preprocessed data

if os.path.exists('../data/divvy_data_model_'+major_VERSION+'.pk'):
    print("Loading from previous pickle file.")
    data = pd.read_pickle('../data/divvy_data_model_'+major_VERSION+'.pk')
else:
    print("Create data set for this model... ")
    data_lst = []

    for year in [2013, 2014, 2015, 2016, 2017, 2018]:
        dt_tmp = pd.read_feather('../data/Final_Divvy_data_'+str(year)+'.feather')
        data_lst.append(dt_tmp)

    data = pd.concat(data_lst, ignore_index=True, sort=True) # !!! Careful with Pandas future version


    data.to_pickle('../data/divvy_data_model_'+major_VERSION+'.pk')
    print("Data saved to pickle file")

Loading from previous pickle file.
CPU times: user 227 ms, sys: 667 ms, total: 895 ms
Wall time: 1.22 s


# Perform Time Series Analysis using `prophet`

In [203]:
def _get_ts(row):
    return str(int(row.year)) + '-' + str(int(row.month)) + '-' + str(int(row.day))

def time_series_analysis(data, station_id):
    # Prepare dataframes
    st_data = data[data.station_id == station_id]
    df = pd.DataFrame()   
    df['ds'] = st_data.apply(lambda x: _get_ts(x), axis=1)
    df['y'] = st_data['total_out']
    
    # Prophet forecasting
    m = Prophet(daily_seasonality=True)
    m.fit(df)
    future = m.make_future_dataframe(periods=365)
    forecast = m.predict(future)
    
    forecast['yhat_cutoff'] = forecast['yhat'].apply(lambda x: max(x, 0))
    
    return df, forecast

# Prepare Plotly data

In [204]:
def get_plotly_data(df, forecast, vis):
    # Prepare plotly data
    trace_data = []
    trace_data.append(go.Scatter(
        x=df.ds,
        y=df.y,
        mode='markers',
        name="Actual",
        line_color='deepskyblue',
        opacity=0.8,
        visible=vis,
    ))
    trace_data.append(go.Scatter(
        x=forecast.ds,
        y=forecast.yhat_cutoff,
        name="Prediction",
        line_color='red',
        opacity=0.7,
        visible=vis,
    ))
    
    return trace_data

In [205]:
%%time
station_list = [192, 100, 35, 18, 268, 253]

data_traces = []
visflag = True
for stid in tqdm(station_list):
    actual_df, pred_df = time_series_analysis(data, stid)
    data_traces.extend(get_plotly_data(actual_df, pred_df, visflag))
    visflag = False

HBox(children=(IntProgress(value=0, max=6), HTML(value='')))


CPU times: user 54.5 s, sys: 3.26 s, total: 57.8 s
Wall time: 39.4 s


In [206]:
## Style of mixed plot
layout = {
    'title': {
        'text': 'Trend of Daily Demand',
        'font': dict(
            family='Open Sans, sans-serif',
            size=30,
            color='white'
        ),
        'xref': 'paper',
        'x': 0.5,
        'xanchor': 'center',
        'y': 0.95,
        'yanchor': 'top',
    },
    'yaxis': {
        'zeroline': False,
        'showgrid': False,
        'title': "Number of bikes",
        'titlefont': dict(
            family='Open Sans, sans-serif',
            size=25,
            color='white'
        ),
        'domain': [0, 0.95],
        'tickangle': -45,
        'tickfont': dict(
            family='Open Sans, sans-serif',
            size=14,
            color='white'
        ),
    },
    'xaxis': {
        'zeroline': True,
        'showgrid': False,
        'title': {
            'text':"Year",
        },
        'titlefont': dict(
            family='Open Sans, sans-serif',
            size=25,
            color='white'
        ),
        'domain': [0., 0.99],
        'tickangle': -45,
        'tickfont': dict(
            family='Open Sans, sans-serif',
            size=14,
            color='white'
        ),
    }, 
    'paper_bgcolor': 'black',
    'plot_bgcolor': 'black',
    'showlegend': True,
    'autosize': True,
    'legend': dict(
        orientation="v", 
        x=0.85, 
        xanchor='left',
        y=1,
        font=dict(
            size=16,
            color='white'
        ),
    ),
    'margin': go.layout.Margin(l=60, r=30, b=10, t=50, pad=6),
}

In [207]:
# Create button list
button_list = []
n_st = len(station_list)

blank = [False] * n_st *2

for i in range(n_st):
    vis_lst = blank.copy()
    vis_lst[i*2:i*2+2] = [True] * 2
    label_ = 'Station '+str(station_list[i])
    
    tmp_d = dict(
        args = [{'visible': vis_lst}],
        label = label_,
        method ='update'
    )
    button_list.append(tmp_d)
    
updatemenus=list([
    dict(
        buttons=button_list,
        pad={"t": 500},
        direction = 'down',
        showactive = False,
        x = 0.15,
        xanchor = 'center',
        y = 0.9,
        yanchor = 'bottom',
        bgcolor = 'black',
        bordercolor = 'white',
        borderwidth = 2,
        font = dict(size=14, color='white')
    ),        
])

layout['updatemenus'] = updatemenus

In [209]:
fig = go.Figure(data=data_traces)
fig.update_layout(layout)

plot(fig, config={'displayModeBar': False}, filename='time_series_ana.html')

'time_series_ana.html'