# Forecasting Divvy Bike Share Demand During Covid-19
**Objective**   
Forecast daily Divvy rideshare demand across all Chicago stations from September 1-December 31, 2020 using data from January 1, 2017-August 31, 2020  

**Data**   
This data was pull from the [City of Chicago](https://divvy-tripdata.s3.amazonaws.com/index.html) from January 1, 2017-August 31, 2020.

In [1]:
import pandas as pd
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation
import pickle
import datetime
from sklearn import metrics
import math
from tqdm import tqdm
from multiprocessing import Pool, cpu_count
import numpy as np
from fbprophet.diagnostics import performance_metrics
from fbprophet.diagnostics import cross_validation
from sklearn.preprocessing import MinMaxScaler
from fbprophet.plot import plot_cross_validation_metric

Importing plotly failed. Interactive plots will not work.


In [2]:
# Load in dataframe
df = pd.read_csv('daily_df_2017_to_2020.csv')

In [3]:
def is_covid(ds):
    '''
    A helper fucntion that takes in a date and returns a value of 0 or 1 if the
    date is before or during Covid-19 (indicated by the initial date of Chicago's shut
    down).

    Parameters
    ----------
    date : The date of a Divvy bike ride.

    Returns
    -------
    A 0 if the date took place before Covid-19 and a 1 if the ride took place during
    Covid-19.
    '''
    date = pd.to_datetime(ds)
    return date >= datetime.date(year=2020, month=3, day=17)


def is_phase_one(ds):
    '''
    A helper fucntion that takes in a date and returns a value of 0 or 1 if the
    date is during Chicago's Phase 1 Covid response (March 17-April 30, 2020).

    Parameters
    ----------
    date : The date of a Divvy bike ride.

    Returns
    -------
    False if the date did not take place during Phase 1 and True if the ride took place during
    Phase 1.
    '''
    date = pd.to_datetime(ds)
    return (date >= datetime.date(year=2020, month=3, day=17)) and (date <= datetime.date(year=2020, month=4, day=30))


def is_phase_two(ds):
    '''
    A helper fucntion that takes in a date and returns a value of 0 or 1 if the
    date is during Chicago's Phase 2 Covid response (May 1-June 2, 2020).

    Parameters
    ----------
    date : The date of a Divvy bike ride.

    Returns
    -------
    False if the date did not take place during Phase 2 and True if the ride took place during
    Phase 2.
    '''
    date = pd.to_datetime(ds)
    return (date >= datetime.date(year=2020, month=5, day=1)) and (date <= datetime.date(year=2020, month=6, day=2))


def is_phase_three(ds):
    '''
    A helper fucntion that takes in a date and returns a value of 0 or 1 if the
    date is during Chicago's Phase 3 Covid response (June 3-June 25, 2020).

    Parameters
    ----------
    date : The date of a Divvy bike ride.

    Returns
    -------
    False if the date did not take place during Phase 3 and True if the ride took place during
    Phase 3.
    '''
    date = pd.to_datetime(ds)
    return (date >= datetime.date(year=2020, month=6, day=3)) and (date <= datetime.date(year=2020, month=6, day=25))

In [4]:
# Rename columns to match requirement for Prophet
df = df.rename(columns={'number_daily_rides': 'y', 'start_day_of_year': 'ds'})

In [5]:
# Drop all other columns
df = df.drop(['from_station_id'], axis=1)

In [6]:
# Add columns for Covid seasonality
df['covid'] = df['ds'].apply(is_covid)
df['precovid'] = ~df['ds'].apply(is_covid)

# Add columns for Phase 1 seasonality
df['phase_one'] = df['ds'].apply(is_phase_one)
df['not_phase_one'] = ~df['ds'].apply(is_phase_one)

# Add columns for Phase 2 seasonality
df['phase_two'] = df['ds'].apply(is_phase_two)
df['not_phase_two'] = ~df['ds'].apply(is_phase_two)

# Add columns for Phase 3 seasonality
df['phase_three'] = df['ds'].apply(is_phase_three)
df['not_phase_three'] = ~df['ds'].apply(is_phase_three)

In [None]:
# Final model with (pre/during covid and phases 1-3 of reopening)
prophet = Prophet(daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=False,
                 seasonality_prior_scale=20, changepoint_prior_scale = 0.2)

prophet.add_seasonality(name='covid', period=7, fourier_order=10, condition_name='covid')
prophet.add_seasonality(name='precovid', period=7, fourier_order=3, condition_name='precovid')
prophet.add_seasonality(name='phase_one', period=365.25, fourier_order=20, condition_name='phase_one')
prophet.add_seasonality(name='not_phase_one', period=365.25, fourier_order=3, condition_name='not_phase_one')
prophet.add_seasonality(name='phase_two', period=365.25, fourier_order=3, condition_name='phase_two')
prophet.add_seasonality(name='not_phase_two', period=365.25, fourier_order=3, condition_name='not_phase_two')
prophet.add_seasonality(name='phase_three', period=365.25, fourier_order=10, condition_name='phase_three')
prophet.add_seasonality(name='not_phase_three', period=365.25, fourier_order=3, condition_name='not_phase_three')
prophet.add_country_holidays(country_name='US')

prophet.fit(df)

future = prophet.make_future_dataframe(periods=122)
future['covid'] = future['ds'].apply(is_covid)
future['precovid'] = ~future['ds'].apply(is_covid)
future['phase_one'] = future['ds'].apply(is_phase_one)
future['not_phase_one'] = ~future['ds'].apply(is_phase_one)
future['phase_two'] = future['ds'].apply(is_phase_two)
future['not_phase_two'] = ~future['ds'].apply(is_phase_two)
future['phase_three'] = future['ds'].apply(is_phase_three)
future['not_phase_three'] = ~future['ds'].apply(is_phase_three)

forecast = prophet.predict(future)

In [None]:
cv_results = cross_validation(prophet, initial = '730 days', period = '180 days', horizon = '122 days')

In [None]:
performance_results = performance_metrics(cv_results, metrics=['mae', 'rmse'])

In [None]:
print("Mean MAE:", np.mean(performance_results.mae))
print("Mean RMSE:", np.mean(performance_results.rmse))

In [None]:
fig = prophet.plot(forecast)