In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
import src.config as config

In [3]:
from src.inference import get_feature_store

In [4]:
from datetime import datetime, timedelta
import pandas as pd  

# Get the current datetime64[us, Etc/UTC]  
current_date = pd.Timestamp.now(tz='Etc/UTC')
feature_store = get_feature_store()

# read time-series data from the feature store
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=1*29)

print(fetch_data_to)
print(fetch_data_from)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")
feature_view = feature_store.get_feature_view(
    name=config.FEATURE_VIEW_NAME, version=config.FEATURE_VIEW_VERSION
)



2025-05-11 07:06:01,953 INFO: Initializing external client
2025-05-11 07:06:01,953 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-11 07:06:02,587 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214695
2025-05-11 10:06:01.952001+00:00
2025-04-12 11:06:01.952001+00:00
Fetching data from 2025-04-12 11:06:01.952001+00:00 to 2025-05-11 10:06:01.952001+00:00


In [5]:
ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.46s) 


In [6]:
ts_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 865 entries, 0 to 869
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype                  
---  ------              --------------  -----                  
 0   pickup_hour         865 non-null    datetime64[us, Etc/UTC]
 1   pickup_location_id  865 non-null    int32                  
 2   rides               865 non-null    int32                  
dtypes: datetime64[us, Etc/UTC](1), int32(2)
memory usage: 20.3 KB


In [7]:
ts_data.sort_values(["pickup_location_id", "pickup_hour"]).reset_index(drop=True)

Unnamed: 0,pickup_hour,pickup_location_id,rides
0,2025-04-12 12:00:00+00:00,5626,104
1,2025-04-12 16:00:00+00:00,5626,86
2,2025-04-12 20:00:00+00:00,5626,31
3,2025-04-13 00:00:00+00:00,5626,19
4,2025-04-13 04:00:00+00:00,5626,11
...,...,...,...
860,2025-05-10 12:00:00+00:00,6072,85
861,2025-05-10 16:00:00+00:00,6072,59
862,2025-05-10 20:00:00+00:00,6072,54
863,2025-05-11 00:00:00+00:00,6072,23


In [8]:
ts_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 865 entries, 0 to 869
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype                  
---  ------              --------------  -----                  
 0   pickup_hour         865 non-null    datetime64[us, Etc/UTC]
 1   pickup_location_id  865 non-null    int32                  
 2   rides               865 non-null    int32                  
dtypes: datetime64[us, Etc/UTC](1), int32(2)
memory usage: 20.3 KB


In [9]:
ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)

In [10]:
ts_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 865 entries, 0 to 869
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   pickup_hour         865 non-null    datetime64[us]
 1   pickup_location_id  865 non-null    int32         
 2   rides               865 non-null    int32         
dtypes: datetime64[us](1), int32(2)
memory usage: 20.3 KB


In [11]:
from src.data_utils import transform_ts_data_info_features
features = transform_ts_data_info_features(ts_data, window_size=6*28, step_size=1)

In [12]:
features

Unnamed: 0,rides_t-168,rides_t-167,rides_t-166,rides_t-165,rides_t-164,rides_t-163,rides_t-162,rides_t-161,rides_t-160,rides_t-159,...,rides_t-8,rides_t-7,rides_t-6,rides_t-5,rides_t-4,rides_t-3,rides_t-2,rides_t-1,pickup_location_id,pickup_hour
0,23,20,49,22,34,98,9,59,5,73,...,109,4,7,80,11,78,5,86,6072,2025-04-15 20:00:00
1,20,49,22,34,98,9,59,5,73,124,...,4,7,80,11,78,5,86,12,6072,2025-05-08 16:00:00
2,49,22,34,98,9,59,5,73,124,79,...,7,80,11,78,5,86,12,179,6072,2025-04-17 16:00:00
3,22,34,98,9,59,5,73,124,79,85,...,80,11,78,5,86,12,179,47,6072,2025-05-01 04:00:00
4,34,98,9,59,5,73,124,79,85,59,...,11,78,5,86,12,179,47,21,6072,2025-05-05 16:00:00
5,73,5,92,82,40,117,157,83,103,4,...,3,154,29,62,35,85,22,11,5626,2025-04-26 20:00:00
6,5,92,82,40,117,157,83,103,4,143,...,154,29,62,35,85,22,11,57,5626,2025-04-25 00:00:00
7,92,82,40,117,157,83,103,4,143,14,...,29,62,35,85,22,11,57,12,5626,2025-04-16 04:00:00
8,82,40,117,157,83,103,4,143,14,114,...,62,35,85,22,11,57,12,3,5626,2025-04-15 08:00:00
9,40,117,157,83,103,4,143,14,114,110,...,35,85,22,11,57,12,3,42,5626,2025-04-18 00:00:00


In [13]:
from src.inference import load_batch_of_features_from_store
current_date = pd.Timestamp.now(tz='Etc/UTC')
features = load_batch_of_features_from_store(current_date)

2025-05-11 07:06:51,396 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-11 07:06:51,399 INFO: Initializing external client
2025-05-11 07:06:51,399 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-11 07:06:52,076 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214695
Fetching data from 2025-04-12 11:06:51.395981+00:00 to 2025-05-11 10:06:51.395981+00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.43s) 


In [14]:
current_date

Timestamp('2025-05-11 11:06:51.395981+0000', tz='Etc/UTC')

In [15]:
features

Unnamed: 0,rides_t-168,rides_t-167,rides_t-166,rides_t-165,rides_t-164,rides_t-163,rides_t-162,rides_t-161,rides_t-160,rides_t-159,...,rides_t-8,rides_t-7,rides_t-6,rides_t-5,rides_t-4,rides_t-3,rides_t-2,rides_t-1,pickup_location_id,pickup_hour
0,104,86,31,19,11,48,97,71,31,3,...,39,124,91,154,91,51,13,121,5626,2025-05-10 12:00:00
1,86,31,19,11,48,97,71,31,3,14,...,124,91,154,91,51,13,121,160,5626,2025-05-10 16:00:00
2,31,19,11,48,97,71,31,3,14,74,...,91,154,91,51,13,121,160,102,5626,2025-05-10 20:00:00
3,19,11,48,97,71,31,3,14,74,86,...,154,91,51,13,121,160,102,69,5626,2025-05-11 00:00:00
4,11,48,97,71,31,3,14,74,86,95,...,91,51,13,121,160,102,69,46,5626,2025-05-11 04:00:00
5,109,84,48,20,8,69,85,74,40,4,...,39,126,131,203,80,47,13,145,5779,2025-05-10 12:00:00
6,84,48,20,8,69,85,74,40,4,22,...,126,131,203,80,47,13,145,135,5779,2025-05-10 16:00:00
7,48,20,8,69,85,74,40,4,22,53,...,131,203,80,47,13,145,135,136,5779,2025-05-10 20:00:00
8,20,8,69,85,74,40,4,22,53,86,...,203,80,47,13,145,135,136,89,5779,2025-05-11 00:00:00
9,8,69,85,74,40,4,22,53,86,116,...,80,47,13,145,135,136,89,65,5779,2025-05-11 04:00:00


In [16]:
from src.inference import load_model_from_registry

model = load_model_from_registry()

2025-05-11 07:07:07,975 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-11 07:07:07,977 INFO: Initializing external client
2025-05-11 07:07:07,977 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-11 07:07:08,565 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1214695
Downloading model artifact (0 dirs, 1 files)... DONE

In [17]:
from src.inference import get_model_predictions
predictions = get_model_predictions(model, features)

In [18]:
predictions

Unnamed: 0,pickup_location_id,predicted_demand
0,5626,142.0
1,5626,137.0
2,5626,68.0
3,5626,37.0
4,5626,20.0
5,5779,156.0
6,5779,147.0
7,5779,79.0
8,5779,37.0
9,5779,19.0


In [19]:
predictions.sort_values("predicted_demand", ascending=False).head(10)["pickup_location_id"].values

array([5788, 5905, 5779, 5779, 5626, 5626, 5788, 5905, 6072, 5779],
      dtype=int32)