In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
import src.config as config

In [18]:
from src.inference import get_feature_store
from datetime import datetime, timedelta
import pandas as pd

# Get the current datetime64[us, US/EST]
current_date = pd.Timestamp.now(tz='US/Eastern')
feature_store = get_feature_store()

# read time-series data from the feature store
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")
feature_view = feature_store.get_feature_view(
    name=config.FEATURE_VIEW_NAME, version=config.FEATURE_VIEW_VERSION
)

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.start_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["start_station_id", "start_hour"]).reset_index(drop=True)
ts_data["start_hour"] = ts_data["start_hour"].dt.tz_convert("US/Eastern")

from src.data_utils import transform_ts_data_into_features_and_target_loop
features, _ = transform_ts_data_into_features_and_target_loop(ts_data, window_size=24*28, step_size=23)

2025-05-04 13:18:31,734 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-04 13:18:31,740 INFO: Initializing external client
2025-05-04 13:18:31,740 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-04 13:18:32,537 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212635
Fetching data from 2025-04-05 13:18:31.734976-04:00 to 2025-05-04 12:18:31.734976-04:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.50s) 


In [27]:
features, targets = transform_ts_data_into_features_and_target_loop(ts_data, window_size=24*28, step_size=23)

In [26]:
targets

0    23
1    21
2     0
Name: target, dtype: int32

In [28]:
features

Unnamed: 0,rides_t-672,rides_t-671,rides_t-670,rides_t-669,rides_t-668,rides_t-667,rides_t-666,rides_t-665,rides_t-664,rides_t-663,rides_t-662,rides_t-661,rides_t-660,rides_t-659,rides_t-658,rides_t-657,rides_t-656,rides_t-655,rides_t-654,rides_t-653,rides_t-652,rides_t-651,rides_t-650,rides_t-649,rides_t-648,rides_t-647,rides_t-646,rides_t-645,rides_t-644,rides_t-643,rides_t-642,rides_t-641,rides_t-640,rides_t-639,rides_t-638,rides_t-637,rides_t-636,rides_t-635,rides_t-634,rides_t-633,...,rides_t-38,rides_t-37,rides_t-36,rides_t-35,rides_t-34,rides_t-33,rides_t-32,rides_t-31,rides_t-30,rides_t-29,rides_t-28,rides_t-27,rides_t-26,rides_t-25,rides_t-24,rides_t-23,rides_t-22,rides_t-21,rides_t-20,rides_t-19,rides_t-18,rides_t-17,rides_t-16,rides_t-15,rides_t-14,rides_t-13,rides_t-12,rides_t-11,rides_t-10,rides_t-9,rides_t-8,rides_t-7,rides_t-6,rides_t-5,rides_t-4,rides_t-3,rides_t-2,rides_t-1,start_hour,start_station_id
0,17,4,26,4,2,82,16,2,16,0,19,96,12,2,27,4,0,22,1,11,0,8,22,2,3,6,5,19,4,63,13,32,21,10,13,14,46,0,19,27,...,0,25,17,32,27,48,0,21,34,13,4,0,0,2,5,11,0,5,2,12,17,25,16,5,2,22,14,10,9,1,1,4,15,4,22,3,3,10,2025-04-23 13:00:00,6822.089844
1,21,5,47,1,8,34,1,1,3,10,25,31,2,0,4,25,21,42,1,3,2,26,31,7,1,8,55,15,17,3,17,36,7,14,20,18,35,2,46,1,...,26,15,23,33,26,19,33,0,1,0,16,16,7,15,1,19,26,29,5,17,23,23,31,28,17,21,33,39,27,68,61,29,8,26,69,13,15,19,2025-04-18 23:00:00,5905.140137
2,1,40,67,28,17,42,0,0,64,4,40,27,24,8,3,79,34,1,2,19,14,7,46,11,0,38,1,0,35,32,26,7,41,3,74,59,21,21,46,15,...,2,23,43,44,5,7,36,41,1,59,5,4,28,37,24,2,50,31,1,1,1,1,0,0,0,2,25,4,35,25,0,67,2,0,39,31,22,22,2025-04-19 09:00:00,6140.049805


In [19]:
features.sort_values(by="start_station_id", ascending=True)

Unnamed: 0,rides_t-672,rides_t-671,rides_t-670,rides_t-669,rides_t-668,rides_t-667,rides_t-666,rides_t-665,rides_t-664,rides_t-663,rides_t-662,rides_t-661,rides_t-660,rides_t-659,rides_t-658,rides_t-657,rides_t-656,rides_t-655,rides_t-654,rides_t-653,rides_t-652,rides_t-651,rides_t-650,rides_t-649,rides_t-648,rides_t-647,rides_t-646,rides_t-645,rides_t-644,rides_t-643,rides_t-642,rides_t-641,rides_t-640,rides_t-639,rides_t-638,rides_t-637,rides_t-636,rides_t-635,rides_t-634,rides_t-633,...,rides_t-38,rides_t-37,rides_t-36,rides_t-35,rides_t-34,rides_t-33,rides_t-32,rides_t-31,rides_t-30,rides_t-29,rides_t-28,rides_t-27,rides_t-26,rides_t-25,rides_t-24,rides_t-23,rides_t-22,rides_t-21,rides_t-20,rides_t-19,rides_t-18,rides_t-17,rides_t-16,rides_t-15,rides_t-14,rides_t-13,rides_t-12,rides_t-11,rides_t-10,rides_t-9,rides_t-8,rides_t-7,rides_t-6,rides_t-5,rides_t-4,rides_t-3,rides_t-2,rides_t-1,start_hour,start_station_id
1,21,5,47,1,8,34,1,1,3,10,25,31,2,0,4,25,21,42,1,3,2,26,31,7,1,8,55,15,17,3,17,36,7,14,20,18,35,2,46,1,...,26,15,23,33,26,19,33,0,1,0,16,16,7,15,1,19,26,29,5,17,23,23,31,28,17,21,33,39,27,68,61,29,8,26,69,13,15,19,2025-04-18 23:00:00,5905.140137
2,1,40,67,28,17,42,0,0,64,4,40,27,24,8,3,79,34,1,2,19,14,7,46,11,0,38,1,0,35,32,26,7,41,3,74,59,21,21,46,15,...,2,23,43,44,5,7,36,41,1,59,5,4,28,37,24,2,50,31,1,1,1,1,0,0,0,2,25,4,35,25,0,67,2,0,39,31,22,22,2025-04-19 09:00:00,6140.049805
0,17,4,26,4,2,82,16,2,16,0,19,96,12,2,27,4,0,22,1,11,0,8,22,2,3,6,5,19,4,63,13,32,21,10,13,14,46,0,19,27,...,0,25,17,32,27,48,0,21,34,13,4,0,0,2,5,11,0,5,2,12,17,25,16,5,2,22,14,10,9,1,1,4,15,4,22,3,3,10,2025-04-23 13:00:00,6822.089844


In [20]:
from src.inference import load_model_from_registry

model_5905 = load_model_from_registry(station_id=5905)
model_6140 = load_model_from_registry(station_id=6140)
model_6822 = load_model_from_registry(station_id=6822)

2025-05-04 13:18:36,494 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-04 13:18:36,500 INFO: Initializing external client
2025-05-04 13:18:36,500 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-04 13:18:37,296 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212635


Downloading: 0.000%|          | 0/318399 elapsed<00:00 remaining<?

2025-05-04 13:18:38,863 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-04 13:18:38,872 INFO: Initializing external client
2025-05-04 13:18:38,873 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-04 13:18:39,626 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212635


Downloading: 0.000%|          | 0/318361 elapsed<00:00 remaining<?

2025-05-04 13:18:40,994 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-04 13:18:41,000 INFO: Initializing external client
2025-05-04 13:18:41,001 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-04 13:18:41,825 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212635


Downloading: 0.000%|          | 0/319572 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... DONE

In [21]:
from src.inference import get_model_predictions
predictions = get_model_predictions(model_5905, model_6140, model_6822, features.sort_values(by="start_station_id", ascending=True))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [22]:
predictions["start_hour"] = current_date.ceil('h')
predictions

Unnamed: 0,start_station_id,predicted_demand,start_hour
0,5905.140137,23.0,2025-05-04 14:00:00-04:00
1,6140.049805,14.0,2025-05-04 14:00:00-04:00
2,6822.089844,9.0,2025-05-04 14:00:00-04:00


In [23]:
from src.inference import get_feature_store

feature_group = get_feature_store().get_or_create_feature_group(
    name=config.FEATURE_GROUP_MODEL_PREDICTION,
    version=1,
    description="Predictions from LGBM Model",
    primary_key=["start_station_id", "start_hour"],
    event_time="start_hour",
)

2025-05-04 13:18:43,773 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-04 13:18:43,777 INFO: Initializing external client
2025-05-04 13:18:43,778 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-04 13:18:44,530 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212635


In [24]:
feature_group.insert(predictions, write_options={"wait_for_job": False})

Uploading Dataframe: 100.00% |██████████| Rows 3/3 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: bike_hourly_model_prediction_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1212635/jobs/named/bike_hourly_model_prediction_1_offline_fg_materialization/executions


(Job('bike_hourly_model_prediction_1_offline_fg_materialization', 'SPARK'),
 None)