## Deployment Mockup

### Set up

In [2]:
import pandas as pd
from datetime import datetime
from tqdm import tqdm
import requests
import sagemaker
from sagemaker.serverless import ServerlessInferenceConfig

from deepar_model_utils import DeepARPredictor
from deepar_model_utils import get_station_data
from deepar_model_utils import prep_station_data

In [3]:
s3_bucket = ""
s3_prefix = "deepar_model"
s3_output_path_start = "s3://{}/{}/output_start_poc".format(s3_bucket, s3_prefix)
s3_output_path_stop = "s3://{}/{}/output_stop_poc".format(s3_bucket, s3_prefix)

In [4]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
region = sagemaker_session.boto_region_name
image_name = sagemaker.image_uris.retrieve("forecasting-deepar", region)

### 1. Get station name from user and convert it to station id for model.

In [5]:
#trips_lookup = pd.read_csv("trips_lookup.csv")
trips_lookup = pd.read_csv("s3://{}/trips_lookup.csv".format(s3_bucket))
print(trips_lookup.shape)
trips_lookup.head()

(460, 2)


Unnamed: 0,station name,station id
0,One Kendall Square at Hampshire St / Portland St,91
1,Dartmouth St at Newbury St,370
2,Christian Science Plaza - Massachusetts Ave at...,46
3,MIT Pacific St at Purrington St,178
4,Sennott Park Broadway at Norfolk Street,386


In [6]:
station_name_user = "One Kendall Square at Hampshire St / Portland St"

In [7]:
station_id = trips_lookup[trips_lookup["station name"] == station_name_user]["station id"].tolist()[0]
station_id

91

### 2. Get date and time from user and convert it to timestamp for model.

In [8]:
datetime_user = "08/29/2022 00:15"

In [9]:
timestamp = datetime.strptime(datetime_user, "%m/%d/%Y %H:%M").strftime("%Y-%m-%d %H:%M:%S")
timestamp

'2022-08-29 00:15:00'

### 3. Get predictions from trip start model.

- Model requires the complete history of data for the user-requested station.
- Model automatically generates 3 days of predictions because that was the prediction length set when training the model.
    - Cut off predictions based on the user-requested time.

In [10]:
freq = "15min"
max_date = "2022-08-28 23:45:00"

In [11]:
trips_start = pd.read_csv("s3://{}/model_trips_start_station_20208029_20220831.csv".format(s3_bucket), parse_dates = True)
trips_start_all_group = prep_station_data(trips_start, "start station id", "starttime")

In [12]:
trip_start_model = sagemaker.model.Model(
    model_data = "{}/deepar-poc-start-2022-10-19-16-11-42-997/output/model.tar.gz".format(s3_output_path_start),
    image_uri = image_name,
    role = role,
    predictor_cls = DeepARPredictor, 
    name = "deepar-poc-startmodel")

serverless_config = ServerlessInferenceConfig()

start_predictor = trip_start_model.deploy(endpoint_name = "deepar-poc-startendpoint", 
                                          serverless_inference_config = serverless_config)

--------!

In [13]:
predicted_start = start_predictor.predict(ts = get_station_data(trips_start_all_group, "start station id", "starttime", station_id, freq, max_date)["size"], quantiles = [0.9])

In [14]:
trips_start_model = round(sum(predicted_start.loc[:timestamp][:-1]["0.9"]))
trips_start_model

3

### 4. Get predictions from trip stop model.

- Same considerations from the trip start model apply here.

In [15]:
trips_stop = pd.read_csv("s3://{}/model_trips_stop_station_20208029_20220831.csv".format(s3_bucket), parse_dates = True)
trips_stop_all_group = prep_station_data(trips_stop, "end station id", "stoptime")

In [16]:
trip_stop_model = sagemaker.model.Model(
    model_data = "{}/deepar-poc-stop-2022-10-21-20-01-24-335/output/model.tar.gz".format(s3_output_path_stop),
    image_uri = image_name,
    role = role,
    predictor_cls = DeepARPredictor, 
    name = "deepar-poc-stopmodel")

serverless_config = ServerlessInferenceConfig()

stop_predictor = trip_stop_model.deploy(endpoint_name = "deepar-poc-stopendpoint", 
                                        serverless_inference_config = serverless_config)

--------!

In [17]:
predicted_stop = stop_predictor.predict(ts = get_station_data(trips_stop_all_group, "end station id", "stoptime", station_id, freq, max_date)["size"], quantiles = [0.9])

In [18]:
trips_stop_model = round(sum(predicted_stop.loc[:timestamp][:-1]["0.9"]))
trips_stop_model

4

### 5. Get real-time station status data.

- The model's forecasts are from the last 3 days in August 2022 and we're using real-time data from now.
    - MVP illustrates the concept and idea we have, but we have model, data, and time limitations
        - model: DeepAR doesn't like to predict much more than 3 days into the future.
        - data: we only get monthly files of data from Bluebikes
        - time: user would have to wait a long time to get forecasts if we predicted 1-3 months out (our users want predictions 10 mins to 1 day out, so that's what our model serves). Time series forecasts also get less and less certain and reliable (i.e. the confidence intervals widen) the farther out predictions are made.

In [19]:
all_station_status = requests.get("https://gbfs.bluebikes.com/gbfs/en/station_status.json").json()
all_station_status = all_station_status["data"]["stations"]

In [20]:
for i in range(len(all_station_status)):
    if all_station_status[i]["station_id"] == str(station_id):
        station_status = all_station_status[i]
        break

In [21]:
realtime_bikes_available = station_status["num_bikes_available"]
realtime_bikes_available

1

In [22]:
realtime_docks_available = station_status["num_docks_available"]
realtime_docks_available

16

### 6. Method 1: Combine model predictions with real-time data.

- **Go-forward method (ignore method 2)**
- This method shows what the station is forecasted to look like at the **end** of the time interval and hides all the details about the potential movement and traffic at that station.

In [23]:
bike_available = realtime_bikes_available - trips_start_model + trips_stop_model
dock_available = realtime_docks_available + trips_start_model - trips_stop_model

print("Bike Availability Forecast: " + str(bike_available))
print("Dock Availability Forecast: " + str(dock_available))

Bike Availability Forecast: 2
Dock Availability Forecast: 15


### 6. Method 2: Display results as-is to the user.

- **Ignore (not the go-forward method)**
- This method shows all the potential movement and traffic details at the station. It allows users to better know their chances of securing a bike or dock because it doesn't summarize the numbers or hide the details.

In [24]:
realtime_bike_print = ""
realtime_dock_print = ""
model_stop_print = ""
model_start_print = ""

if realtime_bikes_available == 1:
    realtime_bike_print += "There is currently {} bike available ".format(realtime_bikes_available)
else:
    realtime_bike_print += "There are currently {} bikes available ".format(realtime_bikes_available)

if realtime_docks_available == 1:
    realtime_dock_print += "There is currently {} dock available ".format(realtime_docks_available)
else:
    realtime_dock_print += "There are currently {} docks available ".format(realtime_docks_available)
    
if trips_stop_model == 1:
    model_stop_print += "with {} bike forecasted to arrive.".format(trips_stop_model)
else:
    model_stop_print += "with {} bikes forecasted to arrive.".format(trips_stop_model)

if trips_start_model == 1:
    model_start_print += "with {} bike forecasted to leave.".format(trips_start_model)
else:
    model_start_print += "with {} bikes forecasted to leave.".format(trips_start_model)

print("Bike Availability Forecast:")
print("  " + realtime_bike_print + model_stop_print)
print("Dock Availability Forecast:")
print("  " + realtime_dock_print + model_start_print)

Bike Availability Forecast:
  There is currently 1 bike available with 4 bikes forecasted to arrive.
Dock Availability Forecast:
  There are currently 16 docks available with 3 bikes forecasted to leave.


### 7. Edge Cases

- Model works well and provides value for the busy, high volume stations.
- Model does not work well for smaller or newer stations because it predicts constant zero movement. Users are encouraged to use the real-time station data available on the Bluebikes app.

### Predictions for Flask web app

**Trips Start**

In [None]:
start_pred = pd.DataFrame()
for station in tqdm(trips_lookup["station id"].unique()):
    if station != 438:
        start = start_predictor.predict(ts = get_station_data(trips_start_all_group, "start station id", "starttime", station, freq, max_date)["size"], quantiles = [0.9])
        start.columns = ["num_bikes"]
        start["station id"] = station
        start["num_bikes"] = round(start["num_bikes"]).astype(int)
        start["timestamp"] = start.index
        start = start.reset_index(drop = True)

        start_pred = pd.concat([start_pred, start], ignore_index = True)

In [None]:
#start_pred.to_csv("trip_start_pred.csv", index = False)

**Trips Stop**

In [None]:
stop_pred = pd.DataFrame()
for station in tqdm(trips_lookup["station id"].unique()):
    if station != 438:
        stop = stop_predictor.predict(ts = get_station_data(trips_stop_all_group, "end station id", "stoptime", station, freq, max_date)["size"], quantiles = [0.9])
        stop.columns = ["num_bikes"]
        stop["station id"] = station
        stop["num_bikes"] = round(stop["num_bikes"]).astype(int)
        stop["timestamp"] = stop.index
        stop = stop.reset_index(drop = True)

        stop_pred = pd.concat([stop_pred, stop], ignore_index = True)

In [None]:
#stop_pred.to_csv("trip_stop_pred.csv", index = False)

### Clean up

In [25]:
start_predictor.delete_model()
start_predictor.delete_endpoint()

In [26]:
stop_predictor.delete_model()
stop_predictor.delete_endpoint()