# Amazon SageMaker Feature Storeの基本的な使い方

## 使用するデータ
[SageMaker Feature Storeのサンプル](https://github.com/aws/amazon-sagemaker-examples/tree/main/sagemaker-featurestore)を利用。
以下の三種類のデータが存在する。

- 顧客データ (feature_store_introduction_customer.csv)
- 更新された顧客データ (feature_store_introduction_customer_updated.csv)
- 注文データ (feature_store_introduction_orders.csv)

In [1]:
import pandas as pd

customer_df = pd.read_csv('s3://machine-learning-workshop/feature-store/feature_store_introduction_customer.csv')
customer_df

Unnamed: 0,customer_id,city_code,state_code,country_code
0,573291,1,49,2
1,109382,2,40,2
2,828400,3,31,2
3,124013,4,5,2


In [2]:
order_df = pd.read_csv('s3://machine-learning-workshop/feature-store/feature_store_introduction_orders.csv')
order_df

Unnamed: 0,customer_id,order_id,order_status,store_id
0,573291,4132,1,303
1,109382,5724,0,201
2,828400,1942,0,431
3,124013,6782,1,213


## Feature Groupを作成
２種類のFeature Groupを作成する。
- 顧客用Feature Group
- 注文用Feature Group

In [5]:
import sys

import boto3
import pandas as pd
import numpy as np
import sagemaker
from sagemaker.session import Session
from sagemaker import get_execution_role

role = get_execution_role()

sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name

In [6]:
import time
from time import strftime, gmtime
customers_feature_group_name = 'customers-feature-group-' + strftime('%d-%H-%M-%S', gmtime())
orders_feature_group_name = 'orders-feature-group-' + strftime('%d-%H-%M-%S', gmtime())

In [7]:
from sagemaker.feature_store.feature_group import FeatureGroup

customers_feature_group = FeatureGroup(
    name=customers_feature_group_name, sagemaker_session=sagemaker_session
)
orders_feature_group = FeatureGroup(
    name=orders_feature_group_name, sagemaker_session=sagemaker_session
)


### データにevent timeを付与
Feature Groupに保存するデータは

- record識別子: データの各レコードを一意に識別するfeature (今回はcustomer_idを利用)
- event time: レコードの作成または更新に対応するfeature (今回はEventTimeというfeatureを作成)

の２つの定義が必要。

#### DataFrameにEvenTimeを付与

In [9]:
import time
current_time_sec = int(round(time.time()))

event_time_col = "EventTime"

customer_df[event_time_col] = pd.Series([current_time_sec]*len(customer_df), dtype="float64")
order_df[event_time_col] = pd.Series([current_time_sec]*len(order_df), dtype="float64")

In [19]:
customers_feature_group.load_feature_definitions(data_frame=customer_df)
orders_feature_group.load_feature_definitions(data_frame=order_df)

[FeatureDefinition(feature_name='customer_id', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='order_id', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='order_status', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='store_id', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='EventTime', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>)]

#### レコード識別子を指定して、Feature Groupを作成
Feature Groupを作成するとAmazon Glue Data Catalogも同時に作成される。

In [21]:
prefix = "feature-group-demo"
s3_bucket_name = "machine-learning-workshop"
record_identifier_feature_name = "customer_id"


customers_feature_group.create(
    s3_uri=f"s3://{s3_bucket_name}/{prefix}",
    record_identifier_name=record_identifier_feature_name,
    event_time_feature_name=event_time_col,
    role_arn=role,
    enable_online_store=True,
)

orders_feature_group.create(
    s3_uri=f"s3://{s3_bucket_name}/{prefix}",
    record_identifier_name=record_identifier_feature_name,
    event_time_feature_name=event_time_col,
    role_arn=role,
    enable_online_store=True,
)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:980831117329:feature-group/orders-feature-group-28-23-32-42',
 'ResponseMetadata': {'RequestId': '2f3fbd72-4a24-49df-88a8-503677602c4d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '2f3fbd72-4a24-49df-88a8-503677602c4d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '109',
   'date': 'Fri, 28 Oct 2022 23:49:28 GMT'},
  'RetryAttempts': 2}}

### Feature Groupの中身を確認

In [23]:
customers_feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:980831117329:feature-group/customers-feature-group-28-23-32-42',
 'FeatureGroupName': 'customers-feature-group-28-23-32-42',
 'RecordIdentifierFeatureName': 'customer_id',
 'EventTimeFeatureName': 'EventTime',
 'FeatureDefinitions': [{'FeatureName': 'customer_id',
   'FeatureType': 'Integral'},
  {'FeatureName': 'city_code', 'FeatureType': 'Integral'},
  {'FeatureName': 'state_code', 'FeatureType': 'Integral'},
  {'FeatureName': 'country_code', 'FeatureType': 'Integral'},
  {'FeatureName': 'EventTime', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2022, 10, 28, 23, 49, 26, 869000, tzinfo=tzlocal()),
 'OnlineStoreConfig': {'EnableOnlineStore': True},
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://machine-learning-workshop/feature-group-demo',
   'ResolvedOutputS3Uri': 's3://machine-learning-workshop/feature-group-demo/980831117329/sagemaker/us-east-1/offline-store/customers-feature-group-28-23-32-42-1667000

In [24]:
orders_feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:980831117329:feature-group/orders-feature-group-28-23-32-42',
 'FeatureGroupName': 'orders-feature-group-28-23-32-42',
 'RecordIdentifierFeatureName': 'customer_id',
 'EventTimeFeatureName': 'EventTime',
 'FeatureDefinitions': [{'FeatureName': 'customer_id',
   'FeatureType': 'Integral'},
  {'FeatureName': 'order_id', 'FeatureType': 'Integral'},
  {'FeatureName': 'order_status', 'FeatureType': 'Integral'},
  {'FeatureName': 'store_id', 'FeatureType': 'Integral'},
  {'FeatureName': 'EventTime', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2022, 10, 28, 23, 49, 28, 756000, tzinfo=tzlocal()),
 'OnlineStoreConfig': {'EnableOnlineStore': True},
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://machine-learning-workshop/feature-group-demo',
   'ResolvedOutputS3Uri': 's3://machine-learning-workshop/feature-group-demo/980831117329/sagemaker/us-east-1/offline-store/orders-feature-group-28-23-32-42-1667000968/data'},


### Feature Groupのリストを表示

In [25]:
sagemaker_session.boto_session.client('sagemaker', region_name=region).list_feature_groups()

{'FeatureGroupSummaries': [{'FeatureGroupName': 'orders-feature-group-28-23-32-42',
   'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:980831117329:feature-group/orders-feature-group-28-23-32-42',
   'CreationTime': datetime.datetime(2022, 10, 28, 23, 49, 28, 756000, tzinfo=tzlocal()),
   'FeatureGroupStatus': 'Created'},
  {'FeatureGroupName': 'customers-feature-group-28-23-32-42',
   'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:980831117329:feature-group/customers-feature-group-28-23-32-42',
   'CreationTime': datetime.datetime(2022, 10, 28, 23, 49, 26, 869000, tzinfo=tzlocal()),
   'FeatureGroupStatus': 'Created'}],
 'ResponseMetadata': {'RequestId': '2868bf5c-d2a5-45da-b4cf-1630cd15eee2',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '2868bf5c-d2a5-45da-b4cf-1630cd15eee2',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '487',
   'date': 'Fri, 28 Oct 2022 23:50:26 GMT'},
  'RetryAttempts': 0}}

In [26]:
!aws sagemaker list-feature-groups

{
    "FeatureGroupSummaries": [
        {
            "FeatureGroupName": "orders-feature-group-28-23-32-42",
            "FeatureGroupArn": "arn:aws:sagemaker:us-east-1:980831117329:feature-group/orders-feature-group-28-23-32-42",
            "CreationTime": 1667000968.756,
            "FeatureGroupStatus": "Created"
        },
        {
            "FeatureGroupName": "customers-feature-group-28-23-32-42",
            "FeatureGroupArn": "arn:aws:sagemaker:us-east-1:980831117329:feature-group/customers-feature-group-28-23-32-42",
            "CreationTime": 1667000966.869,
            "FeatureGroupStatus": "Created"
        }
    ]
}


### 既存のFeature Groupを呼び出す
Feature Group名を使えば既存のFeature Groupは簡単に呼び出せる

In [34]:
existing_customers_feature_group = FeatureGroup(
    name=customers_feature_group_name, sagemaker_session=sagemaker_session
)

In [35]:
existing_customers_feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:980831117329:feature-group/customers-feature-group-28-23-32-42',
 'FeatureGroupName': 'customers-feature-group-28-23-32-42',
 'RecordIdentifierFeatureName': 'customer_id',
 'EventTimeFeatureName': 'EventTime',
 'FeatureDefinitions': [{'FeatureName': 'customer_id',
   'FeatureType': 'Integral'},
  {'FeatureName': 'city_code', 'FeatureType': 'Integral'},
  {'FeatureName': 'state_code', 'FeatureType': 'Integral'},
  {'FeatureName': 'country_code', 'FeatureType': 'Integral'},
  {'FeatureName': 'EventTime', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2022, 10, 28, 23, 49, 26, 869000, tzinfo=tzlocal()),
 'OnlineStoreConfig': {'EnableOnlineStore': True},
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://machine-learning-workshop/feature-group-demo',
   'ResolvedOutputS3Uri': 's3://machine-learning-workshop/feature-group-demo/980831117329/sagemaker/us-east-1/offline-store/customers-feature-group-28-23-32-42-1667000

## データを保存

In [57]:
customers_feature_group.ingest(
    data_frame=customer_df, max_workers=3, wait=True
)

IngestionManagerPandas(feature_group_name='customers-feature-group-28-23-32-42', sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f72d00d1210>, max_workers=3, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7f72ca3b0c10>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

In [38]:
orders_feature_group.ingest(
    data_frame=order_df, max_workers=3, wait=True
)

IngestionManagerPandas(feature_group_name='orders-feature-group-28-23-32-42', sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f72d00d1210>, max_workers=3, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7f72caee5ad0>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

### Online Storeからの個別のデータの取得

In [54]:
customer_id = 573291
sagemaker_session.boto_session.client(
    'sagemaker-featurestore-runtime',
    region_name=region).get_record(FeatureGroupName=customers_feature_group_name,
                                   RecordIdentifierValueAsString=str(customer_id))

{'ResponseMetadata': {'RequestId': 'bf0e431d-8a66-4037-a83d-19b743d8e4c4',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'bf0e431d-8a66-4037-a83d-19b743d8e4c4',
   'content-type': 'application/json',
   'content-length': '15',
   'date': 'Sat, 29 Oct 2022 04:27:09 GMT'},
  'RetryAttempts': 0}}

### Online Storeからの複数データの取得

In [56]:
records = sagemaker_session.boto_session.client(
    "sagemaker-featurestore-runtime", region_name=region
).batch_get_record(
    Identifiers=[
        {
            "FeatureGroupName": customers_feature_group_name,
            "RecordIdentifiersValueAsString": ["573291", "109382", "828400", "124013"],
        },
        {
            "FeatureGroupName": orders_feature_group_name,
            "RecordIdentifiersValueAsString": ["573291", "109382", "828400", "124013"],
        },
    ]
)
records

{'ResponseMetadata': {'RequestId': 'b995d726-d9da-41ba-a259-dcb953115eb0',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'b995d726-d9da-41ba-a259-dcb953115eb0',
   'content-type': 'application/json',
   'content-length': '2662',
   'date': 'Sat, 29 Oct 2022 04:29:02 GMT'},
  'RetryAttempts': 0},
 'Records': [{'FeatureGroupName': 'customers-feature-group-28-23-32-42',
   'RecordIdentifierValueAsString': '109382',
   'Record': [{'FeatureName': 'customer_id', 'ValueAsString': '109382'},
    {'FeatureName': 'city_code', 'ValueAsString': '2'},
    {'FeatureName': 'state_code', 'ValueAsString': '40'},
    {'FeatureName': 'country_code', 'ValueAsString': '2'},
    {'FeatureName': 'EventTime', 'ValueAsString': '1667000392.0'}]},
  {'FeatureGroupName': 'customers-feature-group-28-23-32-42',
   'RecordIdentifierValueAsString': '124013',
   'Record': [{'FeatureName': 'customer_id', 'ValueAsString': '124013'},
    {'FeatureName': 'city_code', 'ValueAsString': '4'},
    {'FeatureNa

### データの削除

In [63]:
customer_id = 573291
sample_record = sagemaker_session.boto_session.client(
    'sagemaker-featurestore-runtime',
    region_name=region).delete_record(FeatureGroupName=customers_feature_group_name,
                                      RecordIdentifierValueAsString=str(customer_id),
                                      EventTime=str(current_time_sec))

In [64]:
customer_id = 573291
sagemaker_session.boto_session.client(
    'sagemaker-featurestore-runtime',
    region_name=region).get_record(FeatureGroupName=customers_feature_group_name,
                                   RecordIdentifierValueAsString=str(customer_id))

{'ResponseMetadata': {'RequestId': '1b74e23f-f7b6-47f5-8e3a-42bf1388e761',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '1b74e23f-f7b6-47f5-8e3a-42bf1388e761',
   'content-type': 'application/json',
   'content-length': '15',
   'date': 'Sat, 29 Oct 2022 04:36:31 GMT'},
  'RetryAttempts': 0}}

### クエリによるデータの取得

In [65]:
customer_query = customers_feature_group.athena_query()
customer_table = customer_query.table_name
customer_table

'customers-feature-group-28-23-32-42-1667000966'

In [66]:
query_string = f'SELECT * FROM "{customer_table}"'
customer_query.run(query_string=query_string, output_location=f"s3://{s3_bucket_name}/query_results/")
customer_query.wait()

In [67]:
customer_query.as_dataframe()

Unnamed: 0,customer_id,city_code,state_code,country_code,eventtime,write_time,api_invocation_time,is_deleted
0,124013,4.0,5.0,2.0,1667000000.0,2022-10-29 04:36:22.578,2022-10-29 04:31:27.000,False
1,573291,,,,1667000000.0,2022-10-29 04:31:32.047,2022-10-29 04:26:33.000,True
2,573291,,,,1667000000.0,2022-10-29 04:31:32.047,2022-10-29 04:26:39.000,True
3,573291,1.0,49.0,2.0,1667000000.0,2022-10-29 04:31:32.047,2022-10-29 04:31:26.000,False
4,109382,2.0,40.0,2.0,1667000000.0,2022-10-29 04:36:24.686,2022-10-29 04:31:27.000,False
5,828400,3.0,31.0,2.0,1667000000.0,2022-10-29 04:36:26.783,2022-10-29 04:31:26.000,False
6,573291,1.0,49.0,2.0,1667000000.0,2022-10-29 00:29:15.832,2022-10-29 00:24:21.000,False
7,124013,4.0,5.0,2.0,1667000000.0,2022-10-29 00:29:15.817,2022-10-29 00:24:22.000,False
8,828400,3.0,31.0,2.0,1667000000.0,2022-10-29 00:29:15.835,2022-10-29 00:24:21.000,False
9,109382,2.0,40.0,2.0,1667000000.0,2022-10-29 00:29:15.793,2022-10-29 00:24:22.000,False


In [68]:
current_time_sec = int(round(time.time()))
customer_df[event_time_col] = pd.Series([current_time_sec]*len(customer_df), dtype="float64")
customers_feature_group.ingest(
    data_frame=customer_df, max_workers=3, wait=True
)

IngestionManagerPandas(feature_group_name='customers-feature-group-28-23-32-42', sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f72d00d1210>, max_workers=3, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7f72caa49190>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

In [69]:
customer_query = customers_feature_group.athena_query()
customer_query.run(query_string=query_string, output_location=f"s3://{s3_bucket_name}/query_results/")
customer_query.wait()
customer_query.as_dataframe()

Unnamed: 0,customer_id,city_code,state_code,country_code,eventtime,write_time,api_invocation_time,is_deleted
0,124013,4.0,5.0,2.0,1667000000.0,2022-10-29 00:29:15.817,2022-10-29 00:24:22.000,False
1,124013,4.0,5.0,2.0,1667000000.0,2022-10-29 04:36:22.578,2022-10-29 04:31:27.000,False
2,573291,,,,1667000000.0,2022-10-29 04:31:32.047,2022-10-29 04:26:33.000,True
3,573291,,,,1667000000.0,2022-10-29 04:31:32.047,2022-10-29 04:26:39.000,True
4,573291,1.0,49.0,2.0,1667000000.0,2022-10-29 04:31:32.047,2022-10-29 04:31:26.000,False
5,828400,3.0,31.0,2.0,1667000000.0,2022-10-29 04:36:26.783,2022-10-29 04:31:26.000,False
6,573291,1.0,49.0,2.0,1667000000.0,2022-10-29 00:29:15.832,2022-10-29 00:24:21.000,False
7,109382,2.0,40.0,2.0,1667000000.0,2022-10-29 04:36:24.686,2022-10-29 04:31:27.000,False
8,109382,2.0,40.0,2.0,1667000000.0,2022-10-29 00:29:15.793,2022-10-29 00:24:22.000,False
9,828400,3.0,31.0,2.0,1667000000.0,2022-10-29 00:29:15.835,2022-10-29 00:24:21.000,False


In [70]:
query_string = f"""
SELECT *
FROM
    (SELECT *,
         row_number()
        OVER (PARTITION BY {record_identifier_feature_name}
    ORDER BY {event_time_col} desc, Api_Invocation_Time DESC, write_time DESC) AS row_num
    FROM "{customer_table}")
WHERE row_num = 1;
"""
customer_query.run(query_string=query_string, output_location=f"s3://{s3_bucket_name}/query_results/")
customer_query.wait()
customer_query.as_dataframe()

Unnamed: 0,customer_id,city_code,state_code,country_code,eventtime,write_time,api_invocation_time,is_deleted,row_num
0,109382,2,40,2,1667000000.0,2022-10-29 04:36:24.686,2022-10-29 04:31:27.000,False,1
1,573291,1,49,2,1667000000.0,2022-10-29 04:31:32.047,2022-10-29 04:31:26.000,False,1
2,828400,3,31,2,1667000000.0,2022-10-29 04:36:26.783,2022-10-29 04:31:26.000,False,1
3,124013,4,5,2,1667000000.0,2022-10-29 04:36:22.578,2022-10-29 04:31:27.000,False,1


In [71]:
customers_feature_group.update_feature_metadata(
    feature_name="customer_id",
    description="カスタマーの識別番号",
)

{'ResponseMetadata': {'RequestId': '817b3f5e-c8c0-449c-888e-3945f3638f78',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '817b3f5e-c8c0-449c-888e-3945f3638f78',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Sat, 29 Oct 2022 04:37:08 GMT'},
  'RetryAttempts': 0}}

In [72]:
customers_feature_group.describe_feature_metadata(feature_name="customer_id")

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:980831117329:feature-group/customers-feature-group-28-23-32-42',
 'FeatureGroupName': 'customers-feature-group-28-23-32-42',
 'FeatureName': 'customer_id',
 'FeatureType': 'Integral',
 'CreationTime': datetime.datetime(2022, 10, 28, 23, 49, 26, 869000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2022, 10, 29, 4, 37, 9, 135000, tzinfo=tzlocal()),
 'Description': 'カスタマーの識別番号',
 'Parameters': [],
 'ResponseMetadata': {'RequestId': '7ab71afa-60e6-4ca5-a011-a8e409dc8778',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '7ab71afa-60e6-4ca5-a011-a8e409dc8778',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '353',
   'date': 'Sat, 29 Oct 2022 04:37:17 GMT'},
  'RetryAttempts': 0}}

In [73]:
from sagemaker.feature_store.inputs import FeatureParameter

customers_feature_group.update_feature_metadata(
    feature_name="customer_id",
    parameter_additions=[FeatureParameter("idType", "primaryKey")],
)

{'ResponseMetadata': {'RequestId': '5d08b5f5-2102-435c-8a4e-5a1bbca5d250',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '5d08b5f5-2102-435c-8a4e-5a1bbca5d250',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Sat, 29 Oct 2022 04:37:17 GMT'},
  'RetryAttempts': 0}}

In [74]:
customers_feature_group.describe_feature_metadata(feature_name="customer_id")

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:980831117329:feature-group/customers-feature-group-28-23-32-42',
 'FeatureGroupName': 'customers-feature-group-28-23-32-42',
 'FeatureName': 'customer_id',
 'FeatureType': 'Integral',
 'CreationTime': datetime.datetime(2022, 10, 28, 23, 49, 26, 869000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2022, 10, 29, 4, 37, 18, 2000, tzinfo=tzlocal()),
 'Description': 'カスタマーの識別番号',
 'Parameters': [{'Key': 'idType', 'Value': 'primaryKey'}],
 'ResponseMetadata': {'RequestId': '4bce4467-9009-4f39-b3fd-69a0ee5b23fe',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '4bce4467-9009-4f39-b3fd-69a0ee5b23fe',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '390',
   'date': 'Sat, 29 Oct 2022 04:37:18 GMT'},
  'RetryAttempts': 0}}

In [75]:
customer_df.loc[0, 'city_code'] = 5
customer_df

Unnamed: 0,customer_id,city_code,state_code,country_code,EventTime
0,573291,5,49,2,1667018000.0
1,109382,2,40,2,1667018000.0
2,828400,3,31,2,1667018000.0
3,124013,4,5,2,1667018000.0


In [76]:
customers_feature_group.ingest(
    data_frame=customer_df, max_workers=3, wait=True
)

IngestionManagerPandas(feature_group_name='customers-feature-group-28-23-32-42', sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f72d00d1210>, max_workers=3, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7f72cab2c7d0>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

In [77]:
sagemaker_session.boto_session.client('sagemaker-featurestore-runtime', region_name=region).get_record(FeatureGroupName=customers_feature_group_name, RecordIdentifierValueAsString=str(customer_id))

{'ResponseMetadata': {'RequestId': '03f27d3c-dcd3-41d0-ae01-ee1b6ce7fd07',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '03f27d3c-dcd3-41d0-ae01-ee1b6ce7fd07',
   'content-type': 'application/json',
   'content-length': '275',
   'date': 'Sat, 29 Oct 2022 04:37:21 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'customer_id', 'ValueAsString': '573291'},
  {'FeatureName': 'city_code', 'ValueAsString': '5'},
  {'FeatureName': 'state_code', 'ValueAsString': '49'},
  {'FeatureName': 'country_code', 'ValueAsString': '2'},
  {'FeatureName': 'EventTime', 'ValueAsString': '1667018210.0'}]}

In [78]:
customer_updated_df = pd.read_csv('s3://machine-learning-workshop/feature-store/feature_store_introduction_customer_updated.csv')
current_time_sec = int(round(time.time()))
customer_updated_df[event_time_col] = pd.Series([]*len(customer_updated_df), dtype="float64")
customer_updated_df

Unnamed: 0,customer_id,city_code,state_code,country_code,email,name,EventTime
0,573291,1,49,2,john.lee@gmail.com,John Lee,
1,109382,2,40,2,olivequil@gmail.com,Olive Quil,
2,828400,3,31,2,liz.knee@gmail.com,Liz Knee,
3,124013,4,5,2,eileenbook@gmail.com,Eileen Book,


In [79]:
customer_df

Unnamed: 0,customer_id,city_code,state_code,country_code,EventTime
0,573291,5,49,2,1667018000.0
1,109382,2,40,2,1667018000.0
2,828400,3,31,2,1667018000.0
3,124013,4,5,2,1667018000.0


In [80]:
customers_feature_group.ingest(data_frame=customer_updated_df, max_workers=3, wait=True)

Failed to ingest row 2: An error occurred (ValidationError) when calling the PutRecord operation: Validation Error: Provided values for Feature(s) [email, name] which do not exist in FeatureGroup [customers-feature-group-28-23-32-42].
Failed to ingest row 0: An error occurred (ValidationError) when calling the PutRecord operation: Validation Error: Provided values for Feature(s) [email, name] which do not exist in FeatureGroup [customers-feature-group-28-23-32-42].
Failed to ingest row 3: An error occurred (ValidationError) when calling the PutRecord operation: Validation Error: Provided values for Feature(s) [email, name] which do not exist in FeatureGroup [customers-feature-group-28-23-32-42].
Failed to ingest row 2 to 4
Failed to ingest row 1: An error occurred (ValidationError) when calling the PutRecord operation: Validation Error: Provided values for Feature(s) [email, name] which do not exist in FeatureGroup [customers-feature-group-28-23-32-42].
Failed to ingest row 0 to 2


IngestionError: [2, 3, 0, 1] -> Failed to ingest some data into FeatureGroup customers-feature-group-28-23-32-42

In [95]:
customers_feature_group.delete()

In [96]:
orders_feature_group.delete()