In [2]:
!pip install prophet



In [2]:
import boto3
import pickle
import matplotlib.pyplot as plt
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly

%matplotlib inline

Matplotlib is building the font cache; this may take a moment.


In [2]:
bucket_name = 'tfl-cycle-data'
s3_file_path = 'Docking_station_data/Waterloo-Jan2016-Apr2024.pkl'
local_file_path = 'temp/Waterloo-Jan2016-Apr2024.pkl'
s3 = boto3.client('s3')

s3.download_file(bucket_name, s3_file_path, local_file_path)

In [3]:
with open(notebook_file_path, 'rb') as f:
    waterloo_df = pickle.load(f)

In [4]:
waterloo_df.head()

Unnamed: 0,Rental ID,Start Date,Start Station Name,End Date,End Station Name
0,50755276,2016-10-01 08:41:00,"William IV Street, Strand",2016-10-01 08:47:00,"Waterloo Station 3, Waterloo"
1,50755420,2016-10-01 09:05:00,"Bermondsey Street, Bermondsey",2016-10-01 09:15:00,"Waterloo Station 3, Waterloo"
2,50755598,2016-10-01 09:28:00,"Lollard Street, Vauxhall",2016-10-01 09:36:00,"Waterloo Station 3, Waterloo"
3,50755603,2016-10-01 09:28:00,"Lollard Street, Vauxhall",2016-10-01 09:36:00,"Waterloo Station 3, Waterloo"
4,50756101,2016-10-01 10:12:00,"Waterloo Station 3, Waterloo",2016-10-01 10:27:00,"Braham Street, Aldgate"


In [5]:
waterloo_df_out = waterloo_df[waterloo_df['Start Station Name'] == 'Waterloo Station 3, Waterloo']
waterloo_df_out.set_index('Start Date', inplace=True)
rentals_5min = waterloo_df_out.resample('5min').size().reset_index(name='Rentals')

In [6]:
rentals_5min_reformat = rentals_5min[['Start Date', 'Rentals']].rename(columns={'Start Date': 'ds', 'Rentals': 'y'})
rentals_5min_reformat.head()

Unnamed: 0,ds,y
0,2015-01-02 01:45:00,1
1,2015-01-02 01:50:00,0
2,2015-01-02 01:55:00,0
3,2015-01-02 02:00:00,0
4,2015-01-02 02:05:00,0


In [7]:
# rentals_hourly = rentals_5min_reformat.resample('H', on='ds').sum().reset_index() # need to do this for it to run on current instance size
# rentals_hourly.head()

In [8]:
# Model for outflow
outflow_model = Prophet()


In [9]:
outflow_model.fit(rentals_5min_reformat)

17:11:12 - cmdstanpy - INFO - Chain [1] start processing
17:20:37 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x7f11e25abe50>

In [3]:
# pickle the model and save to S3
name = 'waterloo_outflow_Prophet_model.pkl'
remote_loc = f'Docking-Station-Model/{name}'
local_loc = f'temp/{name}'

with open(local_loc, 'wb') as f:
    pickle.dump(outflow_model, f)
    
bucket_name = 'tfl-cycle-data'
s3 = boto3.client('s3')
s3.upload_file(local_loc, bucket_name, remote_loc)

NameError: name 'outflow_model' is not defined

In [4]:
#load from S3 and Unpickle

name = 'waterloo_outflow_Prophet_model.pkl'
remote_loc = f'Docking-Station-Model/{name}'
local_loc = f'temp/{name}'

bucket_name = 'tfl-cycle-data'
s3 = boto3.client('s3')
s3.download_file(bucket_name, remote_loc, local_loc)

with open(local_loc, 'rb') as f:
    outflow_model = pickle.load(f)

In [5]:
future_outflow = outflow_model.make_future_dataframe(periods=1, freq='h')

In [None]:
outflow_forecast = outflow_model.predict(future_outflow)

In [None]:
# Plot outflow forecast
fig_outflow = outflow_model.plot(outflow_forecast)
fig_outflow.show()

In [None]:
# Filter the forecast to include only the future dates
future_forecast = outflow_forecast[outflow_forecast['ds'] > rentals_hourly['ds'].max()]

# Plot the forecasted data
plt.figure(figsize=(10, 6))
plt.plot(future_forecast['ds'], future_forecast['yhat'], label='Forecasted Outflow')
plt.fill_between(future_forecast['ds'], future_forecast['yhat_lower'], future_forecast['yhat_upper'], color='lightblue', alpha=0.5)
plt.title('Forecasted Outflow')
plt.xlabel('Date')
plt.ylabel('Outflow')
plt.legend()
plt.show()