In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
import src.config as config

In [27]:
from datetime import datetime, timedelta

time1 = datetime.now()
time2 = time1 - timedelta(hours=1)
time3 = time2 - timedelta(hours=1)

In [56]:
import pandas as pd

data = [[time1, 100], [time2, 150], [time3, 175]]
df = pd.DataFrame(data, columns=["pickup_hour", "rides"])

In [46]:
df

Unnamed: 0,pickup_hour,rides
0,2025-03-02 13:54:56.814712,100
1,2025-03-02 12:54:56.814712,150
2,2025-03-02 11:54:56.814712,175


In [47]:
df["pickup_hour"] = df["pickup_hour"].dt.floor("h")

In [48]:
df

Unnamed: 0,pickup_hour,rides
0,2025-03-02 13:00:00,100
1,2025-03-02 12:00:00,150
2,2025-03-02 11:00:00,175


In [49]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   pickup_hour  3 non-null      datetime64[ns]
 1   rides        3 non-null      int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 180.0 bytes


In [50]:
import hopsworks

project = hopsworks.login(
    project=config.HOPSWORKS_PROJECT_NAME, api_key_value=config.HOPSWORKS_API_KEY
)

feature_store = project.get_feature_store()

FEATURE_GROUP_NAME = "time_test"
FEATURE_GROUP_VERSION = 1

feature_group = feature_store.get_or_create_feature_group(
    name=FEATURE_GROUP_NAME,
    version=FEATURE_GROUP_VERSION,
    description="Time test",
    primary_key=["pickup_hour"],
    event_time="pickup_hour"
)

feature_group.insert(df, write_options={"wait_for_job": False})

2025-03-02 14:04:07,931 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-02 14:04:08,007 INFO: Initializing external client
2025-03-02 14:04:08,007 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-02 14:04:09,112 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212635
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1212635/fs/1200268/fg/1403309


Uploading Dataframe: 100.00% |██████████| Rows 3/3 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: time_test_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1212635/jobs/named/time_test_1_offline_fg_materialization/executions


(Job('time_test_1_offline_fg_materialization', 'SPARK'), None)

In [52]:
# Create a feature view if it doesn't already exist
try:
    feature_store.create_feature_view(
        name="test",
        version=1,
        query=feature_group.select_all(),
    )
    print("Feature view test (version 1) created successfully.")
except Exception as e:
    print(f"Error creating feature view: {e}")

# Retrieve the feature view
try:
    feature_view = feature_store.get_feature_view(
        name="test",
        version=1,
    )
    print("Feature view test (version 1) retrieved successfully.")
except Exception as e:
    print(f"Error retrieving feature view: {e}")

Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1212635/fs/1200268/fv/test/version/1
Feature view test (version 1) created successfully.
Feature view test (version 1) retrieved successfully.


In [53]:
ts_data, _ = feature_view.training_data(
    description="Test"
)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.89s) 




In [54]:
ts_data

Unnamed: 0,pickup_hour,rides
0,2025-03-02 12:00:00+00:00,150
1,2025-03-02 11:00:00+00:00,175
2,2025-03-02 13:00:00+00:00,100


In [55]:
ts_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   pickup_hour  3 non-null      object
 1   rides        3 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 180.0+ bytes


In [57]:
ts_data["pickup_hour"] = pd.to_datetime(ts_data["pickup_hour"], errors="coerce")

In [59]:
ts_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype              
---  ------       --------------  -----              
 0   pickup_hour  3 non-null      datetime64[ns, UTC]
 1   rides        3 non-null      int64              
dtypes: datetime64[ns, UTC](1), int64(1)
memory usage: 180.0 bytes


In [60]:
ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)

In [61]:
ts_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   pickup_hour  3 non-null      datetime64[ns]
 1   rides        3 non-null      int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 180.0 bytes


In [64]:
ts_data.sort_values(["pickup_hour"])

Unnamed: 0,pickup_hour,rides
1,2025-03-02 11:00:00,175
0,2025-03-02 12:00:00,150
2,2025-03-02 13:00:00,100


In [66]:
pd.to_datetime(datetime.now()).ceil("h")

Timestamp('2025-03-02 16:00:00')