In [1]:
import pandas as pd

customer_df = pd.read_csv('feature_store_sample.csv')

In [2]:
import sys

import boto3
import pandas as pd
import numpy as np
import sagemaker
from sagemaker.session import Session
from sagemaker import get_execution_role

role = get_execution_role()

sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name

In [3]:
import time
from time import strftime, gmtime
customers_feature_group_name = 'customers-feature-group-' + strftime('%d-%H-%M-%S', gmtime())

In [4]:
from sagemaker.feature_store.feature_group import FeatureGroup

customers_feature_group = FeatureGroup(
    name=customers_feature_group_name, sagemaker_session=sagemaker_session
)

In [5]:
import time
current_time_sec = int(round(time.time()))

event_time_col = "EventTime"

customer_df[event_time_col] = pd.Series([current_time_sec]*len(customer_df), dtype="float64")
customer_df

Unnamed: 0,customer_id,city_code,EventTime
0,100,1,1669423000.0
1,101,2,1669423000.0
2,102,3,1669423000.0
3,103,4,1669423000.0


In [6]:
customers_feature_group.load_feature_definitions(data_frame=customer_df)

[FeatureDefinition(feature_name='customer_id', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='city_code', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>),
 FeatureDefinition(feature_name='EventTime', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>)]

In [7]:
prefix = "feature-group-demo"
s3_bucket_name = "machine-learning-workshop"
record_identifier_feature_name = "customer_id"

customers_feature_group.load_feature_definitions(data_frame=customer_df)
customers_feature_group.create(
    s3_uri=f"s3://{s3_bucket_name}/{prefix}",
    record_identifier_name=record_identifier_feature_name,
    event_time_feature_name=event_time_col,
    role_arn=role,
    enable_online_store=True,
)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:980831117329:feature-group/customers-feature-group-26-00-39-31',
 'ResponseMetadata': {'RequestId': 'c66cdbec-0be7-4402-8ad5-0e471870bf22',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'c66cdbec-0be7-4402-8ad5-0e471870bf22',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '112',
   'date': 'Sat, 26 Nov 2022 00:39:35 GMT'},
  'RetryAttempts': 0}}

In [9]:
customers_feature_group.ingest(
    data_frame=customer_df, max_workers=3, wait=True
)

IngestionManagerPandas(feature_group_name='customers-feature-group-26-00-39-31', sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7fe1c0e25d90>, max_workers=3, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7fe1c0c06650>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

In [10]:
customer_id = 100
sagemaker_session.boto_session.client(
    'sagemaker-featurestore-runtime',
    region_name=region).get_record(FeatureGroupName=customers_feature_group_name,
                                   RecordIdentifierValueAsString=str(customer_id))

{'ResponseMetadata': {'RequestId': 'f0281eff-af69-442b-b41d-4e32604c0e7d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f0281eff-af69-442b-b41d-4e32604c0e7d',
   'content-type': 'application/json',
   'content-length': '171',
   'date': 'Sat, 26 Nov 2022 00:40:41 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'customer_id', 'ValueAsString': '100'},
  {'FeatureName': 'city_code', 'ValueAsString': '1'},
  {'FeatureName': 'EventTime', 'ValueAsString': '1669423173.0'}]}

In [11]:
customer_query = customers_feature_group.athena_query()
customer_table = customer_query.table_name

query_string = f'SELECT * FROM "{customer_table}" WHERE customer_id = {customer_id}'
customer_query = customers_feature_group.athena_query()
customer_query.run(query_string=query_string, output_location=f"s3://{s3_bucket_name}/query_results/")
customer_query.wait()
customer_query.as_dataframe()

Unnamed: 0,customer_id,city_code,eventtime,write_time,api_invocation_time,is_deleted


In [12]:
customer_df.loc[0, "city_code"] = 5
customer_df

Unnamed: 0,customer_id,city_code,EventTime
0,100,5,1669423000.0
1,101,2,1669423000.0
2,102,3,1669423000.0
3,103,4,1669423000.0


In [13]:
customers_feature_group.ingest(
    data_frame=customer_df, max_workers=3, wait=True
)

IngestionManagerPandas(feature_group_name='customers-feature-group-26-00-39-31', sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7fe1c0e25d90>, max_workers=3, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7fe1c0171b90>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

In [14]:
customer_id = 100
sagemaker_session.boto_session.client(
    'sagemaker-featurestore-runtime',
    region_name=region).get_record(FeatureGroupName=customers_feature_group_name,
                                   RecordIdentifierValueAsString=str(customer_id))

{'ResponseMetadata': {'RequestId': '1cdba659-5a5b-4e07-9c3d-533e41a068ce',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '1cdba659-5a5b-4e07-9c3d-533e41a068ce',
   'content-type': 'application/json',
   'content-length': '171',
   'date': 'Sat, 26 Nov 2022 00:41:08 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'customer_id', 'ValueAsString': '100'},
  {'FeatureName': 'city_code', 'ValueAsString': '5'},
  {'FeatureName': 'EventTime', 'ValueAsString': '1669423173.0'}]}

In [21]:
customer_query = customers_feature_group.athena_query()
customer_table = customer_query.table_name

query_string = f'SELECT * FROM "{customer_table}" WHERE customer_id = {customer_id}'
customer_query = customers_feature_group.athena_query()
customer_query.run(query_string=query_string, output_location=f"s3://{s3_bucket_name}/query_results/")
customer_query.wait()
customer_query.as_dataframe()

Unnamed: 0,customer_id,city_code,eventtime,write_time,api_invocation_time,is_deleted


In [22]:
sample_record = sagemaker_session.boto_session.client(
    'sagemaker-featurestore-runtime',
    region_name=region).delete_record(FeatureGroupName=customers_feature_group_name,
                                      RecordIdentifierValueAsString=str(customer_id),
                                      EventTime=str(current_time_sec))

In [23]:
sagemaker_session.boto_session.client(
    'sagemaker-featurestore-runtime',
    region_name=region).get_record(FeatureGroupName=customers_feature_group_name,
                                   RecordIdentifierValueAsString=str(customer_id))

{'ResponseMetadata': {'RequestId': 'c378e2f1-1b8a-4ec5-9a86-b56efa848008',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'c378e2f1-1b8a-4ec5-9a86-b56efa848008',
   'content-type': 'application/json',
   'content-length': '15',
   'date': 'Sat, 26 Nov 2022 00:43:30 GMT'},
  'RetryAttempts': 0}}

In [17]:
customer_query = customers_feature_group.athena_query()
customer_table = customer_query.table_name

query_string = f'SELECT * FROM "{customer_table}" WHERE customer_id = {customer_id}'
customer_query = customers_feature_group.athena_query()
customer_query.run(query_string=query_string, output_location=f"s3://{s3_bucket_name}/query_results/")
customer_query.wait()
customer_query.as_dataframe()

Unnamed: 0,customer_id,city_code,eventtime,write_time,api_invocation_time,is_deleted


In [57]:
customers_feature_group.delete()