## Criar dados samples

In [8]:
import os
import random

import pandas as pd
from datetime import datetime, timedelta

user = pd.DataFrame({
    "user_id": [random.randint(1,3) for x in range(100)],
    "user_category": [x for x in range(100)],
    "event_timestamp": [datetime(2021, 1, 1) + timedelta(hours=x) for x in range(100)]
})
user.to_parquet('user_categories.parquet')

driver = pd.DataFrame({
    "driver_id": [random.randint(1,3) for x in range(100)],
    "driver_category": [x for x in range(100)],
    "event_timestamp": [datetime(2021, 1, 1) + timedelta(hours=x) for x in range(100)]
})
driver.to_parquet('driver_categories.parquet')

display(user.tail(15), driver.tail(15))

Unnamed: 0,user_id,user_category,event_timestamp
85,2,85,2021-01-04 13:00:00
86,3,86,2021-01-04 14:00:00
87,1,87,2021-01-04 15:00:00
88,1,88,2021-01-04 16:00:00
89,3,89,2021-01-04 17:00:00
90,2,90,2021-01-04 18:00:00
91,1,91,2021-01-04 19:00:00
92,3,92,2021-01-04 20:00:00
93,3,93,2021-01-04 21:00:00
94,1,94,2021-01-04 22:00:00


Unnamed: 0,driver_id,driver_category,event_timestamp
85,3,85,2021-01-04 13:00:00
86,3,86,2021-01-04 14:00:00
87,2,87,2021-01-04 15:00:00
88,3,88,2021-01-04 16:00:00
89,3,89,2021-01-04 17:00:00
90,2,90,2021-01-04 18:00:00
91,2,91,2021-01-04 19:00:00
92,1,92,2021-01-04 20:00:00
93,1,93,2021-01-04 21:00:00
94,2,94,2021-01-04 22:00:00


## Configurar Repositorio - rodar em um .py

In [None]:
from google.protobuf.duration_pb2 import Duration

from feast import Entity, Feature, FeatureView, FileSource, ValueType

user_categories = FileSource(
    path=r"E:\repositorio\development\feast\repo\data\user_categories.parquet",
    event_timestamp_column="event_timestamp",
)

user = Entity(name="user_id", value_type=ValueType.INT64, description="user id",)

user_caterogicals_view = FeatureView(
    name="user_categories",
    entities=["user_id"],
    ttl=Duration(seconds=86400 * 1),
    features=[
        Feature(name="user_category", dtype=ValueType.INT64),
    ],
    online=True,
    batch_source=user_categories,
    tags={},
)

driver_categories = FileSource(
    path=r"E:\repositorio\development\feast\repo\data\driver_categories.parquet",
    event_timestamp_column="event_timestamp",
)

driver = Entity(name="driver_id", value_type=ValueType.INT64, description="user id",)

driver_caterogicals_view = FeatureView(
    name="driver_categories",
    entities=["driver_id"],
    ttl=Duration(seconds=86400 * 1),
    features=[
        Feature(name="driver_category", dtype=ValueType.INT64),
    ],
    online=True,
    batch_source=driver_categories,
    tags={},
)

## Verificar os efeitos

In [34]:
from datetime import datetime, timedelta
import pandas as pd

from feast import FeatureStore

store = FeatureStore(repo_path="./repo")

entity_df = pd.DataFrame(
    {
        "user_id": [2],
        "driver_id": [2],
        "event_timestamp": [datetime(2021,1,4,21,30)],
    }
)

store.get_historical_features(
    entity_df=entity_df,
    features=[
        "driver_categories:driver_category",
        "user_categories:user_category"
    ]
).to_df()

Unnamed: 0,event_timestamp,user_id,driver_id,driver_category,user_category
0,2021-01-04 21:30:00+00:00,2,2,91,90
