In [1]:
%%sh
mkdir -p stores/feature
mkdir -p data
feast init --minimal --template local features
cd features
touch features.py

The directory [1m[32mfeatures[0m contains an existing feature store repository that may cause a conflict



In [2]:
%%bash
FEATURE_STORE_YAML=features/feature_store.yaml
if test -f $FEATURE_STORE_YAML; then
    rm $FEATURE_STORE_YAML
fi
touch $FEATURE_STORE_YAML
echo "project: features" >> $FEATURE_STORE_YAML
echo "registry: ../stores/feature/registry.db" >> $FEATURE_STORE_YAML
echo "provider: local" >> $FEATURE_STORE_YAML
echo "online_store:" >> $FEATURE_STORE_YAML
echo "    path: ../stores/feature/online_store.db" >> $FEATURE_STORE_YAML
cat $FEATURE_STORE_YAML

project: features
registry: ../stores/feature/registry.db
provider: local
online_store:
    path: ../stores/feature/online_store.db


In [6]:
import pandas as pd
df = pd.read_parquet('features.parquet')

In [7]:
list1 = []
for x in range(0,100):
    list1.append(x)
df['created_on'] = pd.Series(pd.to_datetime(list1, unit='D', origin=pd.Timestamp('2022-01-01')))

In [10]:
# Convert to parquet
from pathlib import Path
import os
DATA_DIR = Path(os.getcwd(), "data")
df.to_parquet(
    Path(DATA_DIR, "features.parquet"),
    compression=None,
    allow_truncated_timestamps=True,
)

In [11]:
import os
import json
import pandas as pd
from datetime import datetime
from pathlib import Path
from feast import Entity, Feature, FeatureView, ValueType
from feast.data_source import FileSource
from google.protobuf.duration_pb2 import Duration

In [12]:
%%bash
FEATURES_PY=features/features.py
if test -f $FEATURES_PY; then
    rm $FEATURES_PY
fi
touch $FEATURES_PY
echo -e 'from datetime import datetime
from pathlib import Path

from feast import Entity, Feature, FeatureView, ValueType
from feast.data_source import FileSource
from google.protobuf.duration_pb2 import Duration


# Read data
START_TIME = "2022-01-01"
project_details = FileSource(
    path="/Users/hunr/PycharmProjects/feastdemo/data/features.parquet",
    event_timestamp_column="created_on",
)

# Define an entity for the project
project = Entity(
    name="store_sk_id",
    value_type=ValueType.INT64,
    description="project id",
)

# Define a Feature View for each project
# Can be used for fetching historical data and online serving
project_details_view = FeatureView(
    name="store_details",
    entities=["store_sk_id"],
    ttl=Duration(
        seconds=(datetime.today() - datetime.strptime(START_TIME, "%Y-%m-%d")).days * 24 * 60 * 60
    ),
    features=[
        Feature(name="current_ind", dtype=ValueType.STRING),
        Feature(name="store_nbr", dtype=ValueType.INT64),
        Feature(name="store_name", dtype=ValueType.STRING),
        Feature(name="street_addr_line1", dtype=ValueType.STRING),
        Feature(name="city_name", dtype=ValueType.STRING),
        Feature(name="state_prov_code", dtype=ValueType.STRING),
        Feature(name="postal_code", dtype=ValueType.STRING),
        Feature(name="subdiv_nbr", dtype=ValueType.STRING),
        Feature(name="subdiv_name", dtype=ValueType.STRING),
        Feature(name="region_name", dtype=ValueType.STRING),
        Feature(name="region_nbr", dtype=ValueType.INT64),
        Feature(name="market_area_nbr", dtype=ValueType.INT64),
        Feature(name="market_area_name", dtype=ValueType.STRING),
        Feature(name="primary_trait_name", dtype=ValueType.STRING),
        Feature(name="county_name", dtype=ValueType.STRING),
        Feature(name="phone_nbr", dtype=ValueType.STRING),
        Feature(name="store_size_qty", dtype=ValueType.INT64),
        Feature(name="store_size_uom_code", dtype=ValueType.STRING),
        Feature(name="store_size_uom_desc", dtype=ValueType.STRING),
        Feature(name="open_sunday_ind", dtype=ValueType.STRING),
        Feature(name="open_status_code", dtype=ValueType.STRING),
        Feature(name="open_status_desc", dtype=ValueType.STRING),
        Feature(name="expansion_size_qty", dtype=ValueType.INT64),
        Feature(name="real_store_nbr", dtype=ValueType.INT64),
        Feature(name="temp_store_nbr", dtype=ValueType.INT64),
        Feature(name="temp_store_ind", dtype=ValueType.STRING),
        Feature(name="store_type_code", dtype=ValueType.STRING),
        Feature(name="store_type_desc", dtype=ValueType.STRING),
        Feature(name="size_class_code", dtype=ValueType.INT64),
        Feature(name="sales_class_code", dtype=ValueType.INT64),
        Feature(name="store_comp_code", dtype=ValueType.STRING),
        Feature(name="store_comp_desc", dtype=ValueType.STRING),
        Feature(name="store_comp_ind", dtype=ValueType.STRING),
        Feature(name="financial_rpt_code", dtype=ValueType.STRING),
        Feature(name="financial_rpt_desc", dtype=ValueType.STRING),
        Feature(name="delivery_type_code", dtype=ValueType.STRING),
        Feature(name="delivery_type_desc", dtype=ValueType.STRING),
        Feature(name="prototype_nbr", dtype=ValueType.STRING),
        Feature(name="time_zone_code", dtype=ValueType.STRING),
        Feature(name="latitude_dgr", dtype=ValueType.FLOAT),
        Feature(name="longitude_dgr", dtype=ValueType.FLOAT),
        Feature(name="apparel_zone_nbr", dtype=ValueType.INT64),
        Feature(name="mdse_major_zone_nbr", dtype=ValueType.INT64),
        Feature(name="mdse_sub_zone_nbr", dtype=ValueType.INT64),
        Feature(name="banner_code", dtype=ValueType.STRING),
        Feature(name="banner_desc", dtype=ValueType.STRING),
        Feature(name="store_mgr_name", dtype=ValueType.STRING),
        Feature(name="buo_area_nbr", dtype=ValueType.INT64),
        Feature(name="base_div_nbr", dtype=ValueType.INT64),
        Feature(name="buo_area_name", dtype=ValueType.STRING),
        Feature(name="subregion_nbr", dtype=ValueType.STRING),
        Feature(name="subregion_name", dtype=ValueType.STRING),
        Feature(name="dec_store_nbr", dtype=ValueType.STRING),
        Feature(name="buo_area_mgr_name", dtype=ValueType.STRING),
        Feature(name="subdiv_mgr_name", dtype=ValueType.STRING),
        Feature(name="region_mgr_name", dtype=ValueType.STRING),
        Feature(name="subregion_mgr_name", dtype=ValueType.STRING),
        Feature(name="market_area_mgr_name", dtype=ValueType.STRING),
        Feature(name="geo_point_nbr", dtype=ValueType.STRING)
    ],
    online=True,
    input=project_details,
    tags={},
)' >> $FEATURES_PY

In [13]:
%%bash
cd features
feast apply

Registered entity store_sk_id
Registered feature view store_details
Deploying infrastructure for store_details


In [14]:
import pandas as pd
from feast import FeatureStore

In [15]:
# Identify entities
project_ids = df.store_sk_id[0:3].to_list()
now = datetime.now()
timestamps = [datetime(now.year, now.month, now.day)]*len(project_ids)
entity_df = pd.DataFrame.from_dict({"store_sk_id": project_ids, "event_timestamp": timestamps})
entity_df.head()

Unnamed: 0,store_sk_id,event_timestamp
0,752,2022-07-27
1,127,2022-07-27
2,801,2022-07-27


In [16]:
# Get historical features
store = FeatureStore(repo_path="features")
training_df = store.get_historical_features(
    entity_df=entity_df,
    feature_refs=["store_details:subregion_mgr_name", "store_details:latitude_dgr"],
).to_df()
training_df.head()

Unnamed: 0,event_timestamp,store_sk_id,store_details__subregion_mgr_name,store_details__latitude_dgr
0,2022-07-27 00:00:00+00:00,752,lnyirfnhrw,0.274
1,2022-07-27 00:00:00+00:00,127,oubcruropm,0.123
2,2022-07-27 00:00:00+00:00,801,buxoxljghc,0.612
