In [12]:
import pandas as pd
data = pd.read_parquet("../feast_store/feature_repo/data/driver_stats.parquet")

In [13]:
data

Unnamed: 0,event_timestamp,driver_id,conv_rate,acc_rate,avg_daily_trips,created
0,2024-10-30 21:00:00+00:00,1005,0.787068,0.551687,93,2024-11-14 21:00:20.784
1,2024-10-30 22:00:00+00:00,1005,0.295684,0.598839,112,2024-11-14 21:00:20.784
2,2024-10-30 23:00:00+00:00,1005,0.478915,0.675342,721,2024-11-14 21:00:20.784
3,2024-10-31 00:00:00+00:00,1005,0.139866,0.654633,419,2024-11-14 21:00:20.784
4,2024-10-31 01:00:00+00:00,1005,0.448477,0.339201,676,2024-11-14 21:00:20.784
...,...,...,...,...,...,...
1802,2024-11-14 19:00:00+00:00,1001,0.236733,0.277831,59,2024-11-14 21:00:20.784
1803,2024-11-14 20:00:00+00:00,1001,0.552931,0.122270,224,2024-11-14 21:00:20.784
1804,2021-04-12 07:00:00+00:00,1001,0.370526,0.337860,885,2024-11-14 21:00:20.784
1805,2024-11-07 09:00:00+00:00,1003,0.083284,0.594110,517,2024-11-14 21:00:20.784


# Generate data

In [14]:
from datetime import datetime
import pandas as pd

from feast import FeatureStore

# Note: see https://docs.feast.dev/getting-started/concepts/feature-retrieval for 
# more details on how to retrieve for all entities in the offline store instead
entity_df = pd.DataFrame.from_dict(
    {
        # entity's join key -> entity values
        "driver_id": [1001, 1002, 1003],
        # "event_timestamp" (reserved key) -> timestamps
        "event_timestamp": [
            datetime(2021, 4, 12, 10, 59, 42),
            datetime(2021, 4, 12, 8, 12, 10),
            datetime(2021, 4, 12, 16, 40, 26),
        ],
        # (optional) label name -> label values. Feast does not process these
        "label_driver_reported_satisfaction": [1, 5, 3],
        # values we're using for an on-demand transformation
        "val_to_add": [1, 2, 3],
        "val_to_add_2": [10, 20, 30],
    }
)
entity_df

Unnamed: 0,driver_id,event_timestamp,label_driver_reported_satisfaction,val_to_add,val_to_add_2
0,1001,2021-04-12 10:59:42,1,1,10
1,1002,2021-04-12 08:12:10,5,2,20
2,1003,2021-04-12 16:40:26,3,3,30


In [15]:
store = FeatureStore(repo_path="../feast_store/feature_repo/")

training_df = store.get_historical_features(
    entity_df=entity_df,
    features=[
        "driver_hourly_stats:conv_rate",
        "driver_hourly_stats:acc_rate",
        "driver_hourly_stats:avg_daily_trips",
        "transformed_conv_rate:conv_rate_plus_val1",
        "transformed_conv_rate:conv_rate_plus_val2",
    ],
).to_df()

training_df

Unnamed: 0,driver_id,event_timestamp,label_driver_reported_satisfaction,val_to_add,val_to_add_2,conv_rate,acc_rate,avg_daily_trips,conv_rate_plus_val1,conv_rate_plus_val2
0,1001,2021-04-12 10:59:42+00:00,1,1,10,0.370526,0.33786,885,1.370526,10.370526
1,1002,2021-04-12 08:12:10+00:00,5,2,20,0.782976,0.785573,682,2.782976,20.782976
2,1003,2021-04-12 16:40:26+00:00,3,3,30,0.710477,0.349192,211,3.710477,30.710477


# Make relevant batch

In [16]:
entity_df["event_timestamp"] = pd.to_datetime("now", utc=True)
training_df = store.get_historical_features(
    entity_df=entity_df,
    features=[
        "driver_hourly_stats:conv_rate",
        "driver_hourly_stats:acc_rate",
        "driver_hourly_stats:avg_daily_trips",
        "transformed_conv_rate:conv_rate_plus_val1",
        "transformed_conv_rate:conv_rate_plus_val2",
    ],
).to_df()
training_df

Unnamed: 0,driver_id,event_timestamp,label_driver_reported_satisfaction,val_to_add,val_to_add_2,conv_rate,acc_rate,avg_daily_trips,conv_rate_plus_val1,conv_rate_plus_val2
0,1001,2024-11-15 13:43:20.849251+00:00,1,1,10,0.552931,0.12227,224,1.552931,10.552931
1,1002,2024-11-15 13:43:20.849251+00:00,5,2,20,0.134929,0.564734,470,2.134929,20.134929
2,1003,2024-11-15 13:43:20.849251+00:00,3,3,30,0.992752,0.75137,355,3.992752,30.992752


# Get online features

In [17]:
from pprint import pprint


feature_vector = store.get_online_features(
    features=[
        "driver_hourly_stats:conv_rate",
        "driver_hourly_stats:acc_rate",
        "driver_hourly_stats:avg_daily_trips",
    ],
    entity_rows=[
        # {join_key: entity_value}
        {"driver_id": 1004},
        {"driver_id": 1005},
    ],
).to_dict()

pprint(feature_vector)



{'acc_rate': [0.15183794498443604, 0.9671500325202942],
 'avg_daily_trips': [411, 338],
 'conv_rate': [0.3750897943973541, 0.6953374743461609],
 'driver_id': [1004, 1005]}


  rows = cur.fetchall()


In [18]:
pd.DataFrame(feature_vector)

Unnamed: 0,driver_id,avg_daily_trips,conv_rate,acc_rate
0,1004,411,0.37509,0.151838
1,1005,338,0.695337,0.96715


In [19]:
data.sort_values(["event_timestamp", 'driver_id']).tail(10)

Unnamed: 0,event_timestamp,driver_id,conv_rate,acc_rate,avg_daily_trips,created
1802,2024-11-14 19:00:00+00:00,1001,0.236733,0.277831,59,2024-11-14 21:00:20.784
1441,2024-11-14 19:00:00+00:00,1002,0.093892,0.521932,514,2024-11-14 21:00:20.784
1080,2024-11-14 19:00:00+00:00,1003,0.181633,0.788953,719,2024-11-14 21:00:20.784
719,2024-11-14 19:00:00+00:00,1004,0.349781,0.369616,371,2024-11-14 21:00:20.784
358,2024-11-14 19:00:00+00:00,1005,0.817146,0.316519,779,2024-11-14 21:00:20.784
1803,2024-11-14 20:00:00+00:00,1001,0.552931,0.12227,224,2024-11-14 21:00:20.784
1442,2024-11-14 20:00:00+00:00,1002,0.134929,0.564734,470,2024-11-14 21:00:20.784
1081,2024-11-14 20:00:00+00:00,1003,0.992752,0.75137,355,2024-11-14 21:00:20.784
720,2024-11-14 20:00:00+00:00,1004,0.37509,0.151838,411,2024-11-14 21:00:20.784
359,2024-11-14 20:00:00+00:00,1005,0.695337,0.96715,338,2024-11-14 21:00:20.784


In [20]:
data

Unnamed: 0,event_timestamp,driver_id,conv_rate,acc_rate,avg_daily_trips,created
0,2024-10-30 21:00:00+00:00,1005,0.787068,0.551687,93,2024-11-14 21:00:20.784
1,2024-10-30 22:00:00+00:00,1005,0.295684,0.598839,112,2024-11-14 21:00:20.784
2,2024-10-30 23:00:00+00:00,1005,0.478915,0.675342,721,2024-11-14 21:00:20.784
3,2024-10-31 00:00:00+00:00,1005,0.139866,0.654633,419,2024-11-14 21:00:20.784
4,2024-10-31 01:00:00+00:00,1005,0.448477,0.339201,676,2024-11-14 21:00:20.784
...,...,...,...,...,...,...
1802,2024-11-14 19:00:00+00:00,1001,0.236733,0.277831,59,2024-11-14 21:00:20.784
1803,2024-11-14 20:00:00+00:00,1001,0.552931,0.122270,224,2024-11-14 21:00:20.784
1804,2021-04-12 07:00:00+00:00,1001,0.370526,0.337860,885,2024-11-14 21:00:20.784
1805,2024-11-07 09:00:00+00:00,1003,0.083284,0.594110,517,2024-11-14 21:00:20.784
