In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_parquet("data/driver_stats.parquet")

In [3]:
df.head()

Unnamed: 0,event_timestamp,driver_id,conv_rate,acc_rate,avg_daily_trips,created
0,2023-11-21 08:00:00+00:00,1005,0.335094,0.162989,351,2023-12-06 08:57:13.838
1,2023-11-21 09:00:00+00:00,1005,0.100321,0.105751,961,2023-12-06 08:57:13.838
2,2023-11-21 10:00:00+00:00,1005,0.66401,0.070998,780,2023-12-06 08:57:13.838
3,2023-11-21 11:00:00+00:00,1005,0.259892,0.082264,705,2023-12-06 08:57:13.838
4,2023-11-21 12:00:00+00:00,1005,0.505575,0.898033,830,2023-12-06 08:57:13.838


In [4]:
df.shape

(1807, 6)

### Generating training data

In [5]:
from datetime import datetime

from feast import FeatureStore



In [6]:
# Note: see https://docs.feast.dev/getting-started/concepts/feature-retrieval for 
# more details on how to retrieve for all entities in the offline store instead
entity_df = pd.DataFrame.from_dict(
    {
        # entity's join key -> entity values
        "driver_id": [1001, 1002, 1003],
        # "event_timestamp" (reserved key) -> timestamps
        "event_timestamp": [
            datetime(2021, 4, 12, 10, 59, 42),
            datetime(2021, 4, 12, 8, 12, 10),
            datetime(2021, 4, 12, 16, 40, 26),
        ],
        # (optional) label name -> label values. Feast does not process these
        "label_driver_reported_satisfaction": [1, 5, 3],
        # values we're using for an on-demand transformation
        "val_to_add": [1, 2, 3],
        "val_to_add_2": [10, 20, 30],
    }
)

In [7]:
entity_df

Unnamed: 0,driver_id,event_timestamp,label_driver_reported_satisfaction,val_to_add,val_to_add_2
0,1001,2021-04-12 10:59:42,1,1,10
1,1002,2021-04-12 08:12:10,5,2,20
2,1003,2021-04-12 16:40:26,3,3,30


In [8]:
store = FeatureStore(repo_path=".")

In [9]:
store

<feast.feature_store.FeatureStore at 0x17723ccd0>

In [12]:
training_df = store.get_historical_features(
    entity_df=entity_df,
    features=[
        "driver_hourly_stats:conv_rate",
        "driver_hourly_stats:acc_rate",
        "driver_hourly_stats:avg_daily_trips",
        "transformed_conv_rate:conv_rate_plus_val1",
        "transformed_conv_rate:conv_rate_plus_val2",
    ],
).to_df()

In [13]:
training_df

Unnamed: 0,driver_id,event_timestamp,label_driver_reported_satisfaction,val_to_add,val_to_add_2,conv_rate,acc_rate,avg_daily_trips,conv_rate_plus_val1,conv_rate_plus_val2
0,1001,2021-04-12 10:59:42+00:00,1,1,10,0.214921,0.114345,603,1.214921,10.214921
1,1002,2021-04-12 08:12:10+00:00,5,2,20,0.689639,0.121157,471,2.689639,20.689639
2,1003,2021-04-12 16:40:26+00:00,3,3,30,0.259663,0.206803,458,3.259663,30.259663



### 추론

In [14]:
from pprint import pprint
from feast import FeatureStore

In [15]:

store = FeatureStore(repo_path=".")

In [16]:
feature_vector = store.get_online_features(
    features=[
        "driver_hourly_stats:conv_rate",
        "driver_hourly_stats:acc_rate",
        "driver_hourly_stats:avg_daily_trips",
    ],
    entity_rows=[
        # {join_key: entity_value}
        {"driver_id": 1004},
        {"driver_id": 1005},
    ],
).to_dict()

In [17]:
feature_vector

{'driver_id': [1004, 1005],
 'conv_rate': [0.6024051308631897, 0.7258734107017517],
 'avg_daily_trips': [867, 150],
 'acc_rate': [0.9690748453140259, 0.18905644118785858]}

In [18]:
pprint(feature_vector)

{'acc_rate': [0.9690748453140259, 0.18905644118785858],
 'avg_daily_trips': [867, 150],
 'conv_rate': [0.6024051308631897, 0.7258734107017517],
 'driver_id': [1004, 1005]}


### Step 3e: Using a feature service to fetch online features instead.

The driver_activity_v1 feature service pulls all features from the driver_hourly_stats feature view:

In [20]:
from pprint import pprint
from feast import FeatureStore

In [21]:
feature_store = FeatureStore('.')  # Initialize the feature store

feature_service = feature_store.get_feature_service("driver_activity_v1")

In [23]:
from pprint import pprint
from feast import FeatureStore
feature_store = FeatureStore('.')  # Initialize the feature store

feature_service = feature_store.get_feature_service("driver_activity_v1")
feature_vector = feature_store.get_online_features(
    features=feature_service,
    entity_rows=[
        # {join_key: entity_value}
        {"driver_id": 1004},
        {"driver_id": 1005},
    ],
).to_dict()
pprint(feature_vector)

RequestDataNotFoundInEntityRowsException: Required request data source features ['val_to_add', 'val_to_add_2'] not found in the entity rows, but required by feature views

- 다시