# Pre-forecast demand for faster training

In [30]:
# imports
import pandas as pd
import numpy as np
import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', '..')))

from demand_forecasting.IrConv_LSTM_demand_forecaster import IrConvLstmDemandForecaster

In [31]:
# constants
DROPOFF_MODEL_PATH = "/home/ruroit00/rebalancing_framework/rl_framework/demand_forecasting/models/irregular_convolution_LSTM_dropoff.pkl"
PICKUP_MODEL_PATH = "/home/ruroit00/rebalancing_framework/rl_framework/demand_forecasting/models/irregular_convolution_LSTM_pickup.pkl"

ZONE_COMMUNITY_MAP_PATH = "/home/ruroit00/rebalancing_framework/processed_data/grid_community_map.pickle"

DROP_OFF_DEMAND_DATA_PATH = '/home/ruroit00/rebalancing_framework/processed_data/voi_dropoff_demand_h3_hourly.pickle'
PICK_UP_DEMAND_DATA_PATH = '/home/ruroit00/rebalancing_framework/processed_data/voi_pickup_demand_h3_hourly.pickle'

DROPOFF_OUTPUT_PATH = '../data/IrConv_LSTM_dropoff_forecasts.pkl'
PICKUP_OUTPUT_PATH = '../data/IrConv_LSTM_pickup_forecasts.pkl'

In [32]:
ZONE_COMMUNITY_MAP: pd.DataFrame = pd.read_pickle(
        ZONE_COMMUNITY_MAP_PATH
    )

N_COMMUNITIES = ZONE_COMMUNITY_MAP["community_index"].nunique()
N_ZONES = ZONE_COMMUNITY_MAP.shape[0]

In [33]:
# load models
dropoff_demand_forecaster = IrConvLstmDemandForecaster(
    num_communities=N_COMMUNITIES,
    num_zones=N_ZONES,
    zone_community_map=ZONE_COMMUNITY_MAP,
    model_path=DROPOFF_MODEL_PATH,
    demand_data_path=DROP_OFF_DEMAND_DATA_PATH,
)

pickup_demand_forecaster = IrConvLstmDemandForecaster(
    num_communities=N_COMMUNITIES,
    num_zones=N_ZONES,
    zone_community_map=ZONE_COMMUNITY_MAP,
    model_path=PICKUP_MODEL_PATH,
    demand_data_path=PICK_UP_DEMAND_DATA_PATH,
)

In [34]:
dropoff_demand_forecaster.scaled_demand_data

array([[[0.       ],
        [0.       ],
        [0.       ],
        ...,
        [0.       ],
        [0.0106383],
        [0.       ]],

       [[0.       ],
        [0.       ],
        [0.       ],
        ...,
        [0.       ],
        [0.0106383],
        [0.       ]],

       [[0.       ],
        [0.       ],
        [0.       ],
        ...,
        [0.       ],
        [0.       ],
        [0.       ]],

       ...,

       [[0.       ],
        [0.       ],
        [0.       ],
        ...,
        [0.       ],
        [0.0212766],
        [0.       ]],

       [[0.       ],
        [0.       ],
        [0.       ],
        ...,
        [0.       ],
        [0.0106383],
        [0.       ]],

       [[0.       ],
        [0.       ],
        [0.       ],
        ...,
        [0.       ],
        [0.       ],
        [0.       ]]])

In [35]:
dropoff_forecast_data = pd.DataFrame(
    index=dropoff_demand_forecaster.demand_data.index,
    columns=dropoff_demand_forecaster.demand_data.columns,
    data=np.zeros(dropoff_demand_forecaster.demand_data.shape),
)
pickup_forecast_data = pd.DataFrame(
    index=pickup_demand_forecaster.demand_data.index,
    columns=pickup_demand_forecaster.demand_data.columns,
    data=np.zeros(pickup_demand_forecaster.demand_data.shape),
)

In [36]:
# generate dropoff forecasts
for i, row in dropoff_demand_forecaster.demand_data.iterrows():
    try:
        forecast = dropoff_demand_forecaster.predict_demand_per_zone(
            i.hour, i.day, i.month
        )
    except Exception as e:
        print(f"Error predicting dropoff demand for index {i}: {e}")
        forecast = np.zeros(N_ZONES, dtype=float)
    dropoff_forecast_data.loc[i] = forecast[:, 0]

In [37]:
dropoff_forecast_data.describe()

grid_index,881faa4485fffff,881faa4487fffff,881faa4493fffff,881faa4497fffff,881faa4499fffff,881faa449bfffff,881faa44a3fffff,881faa44a7fffff,881faa44abfffff,881faa44b1fffff,...,881faa7ad9fffff,881faa7adbfffff,881faa7addfffff,881faa7ae1fffff,881faa7ae3fffff,881faa7ae5fffff,881faa7ae7fffff,881faa7ae9fffff,881faa7aebfffff,881faa7aedfffff
count,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,...,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0
mean,0.002517,0.000268,0.010899,0.003281,-0.010478,0.04289,0.089205,0.00735,-0.006515,-0.014907,...,0.83229,0.579978,0.747116,0.953544,3.604405,-0.089381,0.015435,0.26181,1.462632,0.460296
std,0.022443,0.07878,0.048005,0.057587,0.020273,0.041399,0.055712,0.038149,0.009296,0.171146,...,0.324603,0.202876,0.401822,0.23098,0.919169,0.117057,0.06117,0.192791,0.420974,0.161806
min,-0.209665,-0.699352,-0.385751,-0.452033,-0.086345,-0.156566,-0.316518,-0.181242,-0.029087,-0.910063,...,-0.573249,0.240188,0.065274,0.599329,1.954054,-0.564199,-0.279652,-0.03499,0.614213,0.170371
25%,-0.011795,-0.049105,-0.021939,-0.035537,-0.026708,0.011878,0.054405,-0.01841,-0.013907,-0.129661,...,0.57246,0.413753,0.427124,0.768624,2.85171,-0.188074,-0.022265,0.101707,1.11031,0.326406
50%,0.003986,0.003483,0.012548,0.004102,-0.010909,0.041158,0.091223,0.007524,-0.007534,-0.029075,...,0.803106,0.560313,0.706349,0.929427,3.522799,-0.096809,0.01,0.246171,1.417731,0.443707
75%,0.019903,0.059225,0.045929,0.042264,0.003986,0.073234,0.122723,0.031895,-0.000508,0.091129,...,1.030565,0.721424,1.041074,1.117517,4.250134,-0.009842,0.050132,0.391065,1.721733,0.573642
max,0.095963,0.417361,0.21974,0.27807,0.069474,0.301036,0.342582,0.21849,0.04817,1.541196,...,2.349487,1.347107,2.192145,2.176766,7.101725,0.970448,0.675833,1.519288,3.749177,1.16317


In [38]:
# generate pickup forecasts
for i, row in pickup_demand_forecaster.demand_data.iterrows():
    try:
        forecast = pickup_demand_forecaster.predict_demand_per_zone(
            i.hour, i.day, i.month
        )
    except Exception as e:
        print(f"Error predicting dropoff demand for index {i}: {e}")
        forecast = np.zeros(N_ZONES, dtype=float)
    pickup_forecast_data.loc[i] = forecast[:, 0]

In [39]:
pickup_forecast_data.describe()

grid_index,881faa4485fffff,881faa4487fffff,881faa4493fffff,881faa4497fffff,881faa4499fffff,881faa449bfffff,881faa44a3fffff,881faa44a7fffff,881faa44abfffff,881faa44b1fffff,...,881faa7ad9fffff,881faa7adbfffff,881faa7addfffff,881faa7ae1fffff,881faa7ae3fffff,881faa7ae5fffff,881faa7ae7fffff,881faa7ae9fffff,881faa7aebfffff,881faa7aedfffff
count,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,...,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0,3050.0
mean,-0.007846,0.045697,0.01307,-0.000675,0.002139,-0.064474,0.017077,0.051951,0.138697,0.030452,...,0.824135,0.542927,1.127628,0.901483,3.080214,-0.056192,0.064046,0.267073,1.131617,0.436142
std,0.020095,0.025639,0.043963,0.006778,0.018594,0.043523,0.032142,0.038087,0.089626,0.033477,...,0.402567,0.256883,0.34937,0.282231,1.396297,0.02737,0.033777,0.250203,0.514201,0.165598
min,-0.153709,-0.088092,-0.165131,-0.045436,-0.057513,-0.286202,-0.142921,-0.108017,-0.32218,-0.256679,...,0.170735,0.144616,0.560663,0.399361,0.888005,-0.20777,-0.02472,-0.178829,0.318391,0.175674
25%,-0.020268,0.031543,-0.011013,-0.005298,-0.011852,-0.09,-3.5e-05,0.028791,0.080626,0.010381,...,0.484534,0.33016,0.834136,0.67478,1.890105,-0.073297,0.033957,0.056719,0.702807,0.298407
50%,-0.009028,0.048469,0.017774,-0.000408,0.000841,-0.066093,0.01804,0.052773,0.143327,0.031561,...,0.784848,0.513598,1.08319,0.874267,2.94406,-0.058486,0.065555,0.248512,1.079156,0.40885
75%,0.004138,0.063799,0.0406,0.004029,0.014848,-0.03825,0.035902,0.078921,0.197262,0.051465,...,1.121596,0.719976,1.384282,1.105325,4.129266,-0.04114,0.092025,0.434901,1.507097,0.546551
max,0.104856,0.234986,0.220462,0.034711,0.061091,0.159301,0.246894,0.225367,0.609205,0.170815,...,2.300133,1.919164,2.631814,1.983194,7.980289,0.075674,0.170492,1.2648,3.101238,1.716655


In [40]:
# save forecasts
dropoff_forecast_data.to_pickle(DROPOFF_OUTPUT_PATH)
pickup_forecast_data.to_pickle(PICKUP_OUTPUT_PATH)