In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
import src.config as config

In [3]:
from src.inference import get_feature_store

In [5]:
from datetime import datetime, timedelta
import pandas as pd

# Get the current datetime64[us, Etc/UTC]
current_date = pd.Timestamp.now(tz="US/Eastern")
feature_store = get_feature_store()

# read time-series data from the feature store
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")
feature_view = feature_store.get_feature_view(
    name=config.FEATURE_VIEW_NAME, version=config.FEATURE_VIEW_VERSION
)

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.start_hour.between(fetch_data_from, fetch_data_to)]

2025-05-04 10:34:07,257 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-04 10:34:07,267 INFO: Initializing external client
2025-05-04 10:34:07,268 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-04 10:34:08,123 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212635
Fetching data from 2025-04-05 10:34:07.257254-04:00 to 2025-05-04 09:34:07.257254-04:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.25s) 


In [6]:
ts_data.sort_values(["start_station_id", "start_hour"]).reset_index(drop=True)

Unnamed: 0,start_hour,start_station_id,rides
0,2025-04-05 15:00:00+00:00,5905.140137,28
1,2025-04-05 16:00:00+00:00,5905.140137,37
2,2025-04-05 17:00:00+00:00,5905.140137,44
3,2025-04-05 18:00:00+00:00,5905.140137,42
4,2025-04-05 19:00:00+00:00,5905.140137,72
...,...,...,...
2026,2025-05-03 15:00:00+00:00,6822.089844,20
2027,2025-05-03 16:00:00+00:00,6822.089844,16
2028,2025-05-03 17:00:00+00:00,6822.089844,25
2029,2025-05-03 18:00:00+00:00,6822.089844,13


In [8]:
ts_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2031 entries, 0 to 2102
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype                  
---  ------            --------------  -----                  
 0   start_hour        2031 non-null   datetime64[us, Etc/UTC]
 1   start_station_id  2031 non-null   float32                
 2   rides             2031 non-null   int32                  
dtypes: datetime64[us, Etc/UTC](1), float32(1), int32(1)
memory usage: 47.6 KB


In [9]:
ts_data["start_hour"] = ts_data["start_hour"].dt.tz_convert("US/Eastern")

In [10]:
ts_data.sort_values(["start_station_id", "start_hour"]).reset_index(drop=True)

Unnamed: 0,start_hour,start_station_id,rides
0,2025-04-05 11:00:00-04:00,5905.140137,28
1,2025-04-05 12:00:00-04:00,5905.140137,37
2,2025-04-05 13:00:00-04:00,5905.140137,44
3,2025-04-05 14:00:00-04:00,5905.140137,42
4,2025-04-05 15:00:00-04:00,5905.140137,72
...,...,...,...
2026,2025-05-03 11:00:00-04:00,6822.089844,20
2027,2025-05-03 12:00:00-04:00,6822.089844,16
2028,2025-05-03 13:00:00-04:00,6822.089844,25
2029,2025-05-03 14:00:00-04:00,6822.089844,13


In [11]:
ts_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2031 entries, 0 to 2102
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype                     
---  ------            --------------  -----                     
 0   start_hour        2031 non-null   datetime64[us, US/Eastern]
 1   start_station_id  2031 non-null   float32                   
 2   rides             2031 non-null   int32                     
dtypes: datetime64[us, US/Eastern](1), float32(1), int32(1)
memory usage: 47.6 KB


In [12]:
from src.data_utils import transform_ts_data_into_features_and_target_loop

features, targets = transform_ts_data_into_features_and_target_loop(ts_data, window_size=24*28, step_size=23)

In [13]:
features

Unnamed: 0,rides_t-672,rides_t-671,rides_t-670,rides_t-669,rides_t-668,rides_t-667,rides_t-666,rides_t-665,rides_t-664,rides_t-663,...,rides_t-8,rides_t-7,rides_t-6,rides_t-5,rides_t-4,rides_t-3,rides_t-2,rides_t-1,start_hour,start_station_id
0,17,4,26,4,2,82,16,2,16,0,...,10,9,1,1,4,15,4,22,2025-04-27 03:00:00,6822.089844
1,21,5,47,1,8,34,1,1,3,10,...,39,27,68,61,29,8,26,69,2025-04-11 01:00:00,5905.140137
2,1,40,67,28,17,42,0,0,64,4,...,4,35,25,0,67,2,0,39,2025-04-28 16:00:00,6140.049805


In [14]:
features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Columns: 674 entries, rides_t-672 to start_station_id
dtypes: datetime64[ns](1), float32(1), int64(672)
memory usage: 15.9 KB


In [15]:
from src.inference import load_batch_of_features_from_store
current_date = pd.Timestamp.now(tz='US/Eastern')
features = load_batch_of_features_from_store(current_date)

2025-05-04 10:44:36,050 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-04 10:44:36,058 INFO: Initializing external client
2025-05-04 10:44:36,058 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-04 10:44:36,949 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212635
Fetching data from 2025-04-05 10:44:36.050852-04:00 to 2025-05-04 09:44:36.050852-04:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.39s) 


In [16]:
current_date

Timestamp('2025-05-04 10:44:36.050852-0400', tz='US/Eastern')

In [17]:
features

Unnamed: 0,rides_t-672,rides_t-671,rides_t-670,rides_t-669,rides_t-668,rides_t-667,rides_t-666,rides_t-665,rides_t-664,rides_t-663,...,rides_t-8,rides_t-7,rides_t-6,rides_t-5,rides_t-4,rides_t-3,rides_t-2,rides_t-1,start_hour,start_station_id
0,28,37,44,42,72,38,54,43,23,19,...,3,3,3,3,6,17,32,35,2025-05-03 15:00:00,5905.140137
1,29,40,31,39,23,28,37,29,13,13,...,4,2,2,4,4,14,18,33,2025-05-03 15:00:00,6140.049805
2,17,11,12,20,10,5,15,9,10,10,...,0,3,0,4,12,14,8,23,2025-05-03 15:00:00,6822.089844


In [18]:
from src.inference import load_model_from_registry

model_5905 = load_model_from_registry(station_id=5905)
model_6140 = load_model_from_registry(station_id=6140)
model_6822 = load_model_from_registry(station_id=6822)

2025-05-04 10:45:50,637 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-04 10:45:50,643 INFO: Initializing external client
2025-05-04 10:45:50,643 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-04 10:45:51,412 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212635


Downloading: 0.000%|          | 0/318399 elapsed<00:00 remaining<?

2025-05-04 10:45:53,693 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-04 10:45:53,703 INFO: Initializing external client
2025-05-04 10:45:53,704 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-04 10:45:54,488 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212635


Downloading: 0.000%|          | 0/318361 elapsed<00:00 remaining<?

2025-05-04 10:45:55,943 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-04 10:45:55,949 INFO: Initializing external client
2025-05-04 10:45:55,950 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-04 10:45:56,755 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212635


Downloading: 0.000%|          | 0/319572 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... DONE

In [19]:
from src.inference import get_model_predictions

predictions_5905 = get_model_predictions(model_5905, features[features["start_station_id"] == 5905.140137])
predictions_6140 = get_model_predictions(model_6140, features[features["start_station_id"] == 6140.049805])
predictions_6822 = get_model_predictions(model_6822, features[features["start_station_id"] == 6822.089844])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [20]:
predictions_5905

Unnamed: 0,start_station_id,predicted_demand
0,5905.140137,42.0


In [21]:
predictions_6140

Unnamed: 0,start_station_id,predicted_demand
0,6140.049805,28.0


In [22]:
predictions_6822

Unnamed: 0,start_station_id,predicted_demand
0,6822.089844,13.0


In [23]:
predictions = pd.concat([predictions_5905, predictions_6140, predictions_6822], ignore_index=True)
predictions.sort_values("predicted_demand", ascending=False).head(10)["start_station_id"].values

array([5905.14, 6140.05, 6822.09], dtype=float32)