In [1]:
import sagemaker
import boto3
import sys
import pandas as pd
import numpy as np 
import io 
from sagemaker.session import Session
from sagemaker import get_execution_role



In [2]:
prefix = "aws-mlops-sagemaker"
role = get_execution_role()

sagemaker_session = Session() 
region = sagemaker_session.boto_region_name
s3_bucket_name = sagemaker_session.default_bucket()


In [3]:
region

'us-west-2'

In [4]:
s3_bucket_name

'sagemaker-us-west-2-975050337104'

In [5]:
role

'arn:aws:iam::975050337104:role/service-role/AmazonSageMaker-ExecutionRole-20250401T145569'

In [21]:
# Data 
customer_data = pd.read_csv("transformed/customers.csv") 
orders_data = pd.read_csv("transformed/orders.csv")

In [22]:
customer_data.head()

Unnamed: 0,customer_id,sex,is_married,event_time,age_18-29,age_30-39,age_40-49,age_50-59,age_60-69,age_70-plus,n_days_active
0,C1,0,1,2024-05-02T05:39:10.965Z,0,0,0,0,0,1,0.203425
1,C2,1,1,2024-05-02T05:39:10.966Z,0,0,0,0,0,1,0.859589
2,C3,1,1,2024-05-02T05:39:10.967Z,0,1,0,0,0,0,0.527397
3,C4,1,0,2024-05-02T05:39:10.967Z,0,0,0,0,1,0,0.780822
4,C5,1,0,2024-05-02T05:39:10.968Z,0,0,0,0,1,0,0.691096


In [23]:
orders_data.head()

Unnamed: 0,order_id,customer_id,product_id,purchase_amount,is_reordered,event_time,n_days_since_last_purchase
0,O1,C9765,P11660,0.572673,0,2024-05-02T05:39:17.172Z,0.273256
1,O2,C3674,P6868,0.693861,0,2024-05-02T05:39:17.172Z,0.846899
2,O3,C2139,P4749,0.556139,1,2024-05-02T05:39:17.172Z,0.408915
3,O4,C7794,P542,0.043069,1,2024-05-02T05:39:17.172Z,0.843023
4,O5,C2229,P7605,0.463861,1,2024-05-02T05:39:17.172Z,0.265504


In [24]:
# Create feature group
from time import gmtime, strftime, sleep

customers_feature_group_name = "customers-fg-"+strftime("%d-%H-%M-%S", gmtime()) 
orders_feature_group_name = "orders-fg-"+strftime("%d-%H-%M-%S", gmtime()) 

In [25]:
customers_feature_group_name

'customers-fg-02-04-57-46'

In [26]:
orders_feature_group_name

'orders-fg-02-04-57-46'

In [27]:
from sagemaker.feature_store.feature_group import FeatureGroup

customers_feature_group = FeatureGroup(name=customers_feature_group_name , 
                                       sagemaker_session = sagemaker_session)

orders_feature_group = FeatureGroup(name=orders_feature_group_name , 
                                       sagemaker_session = sagemaker_session)

In [28]:
record_identifier_feature_name = "customer_id"

In [29]:
orders_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 7 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   order_id                    100000 non-null  object 
 1   customer_id                 100000 non-null  object 
 2   product_id                  100000 non-null  object 
 3   purchase_amount             100000 non-null  float64
 4   is_reordered                100000 non-null  int64  
 5   event_time                  100000 non-null  object 
 6   n_days_since_last_purchase  100000 non-null  float64
dtypes: float64(2), int64(1), object(4)
memory usage: 5.3+ MB


In [30]:
customers_feature_group.load_feature_definitions(data_frame = customer_data)

[FeatureDefinition(feature_name='customer_id', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='sex', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>, collection_type=None),
 FeatureDefinition(feature_name='is_married', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>, collection_type=None),
 FeatureDefinition(feature_name='event_time', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='age_18-29', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>, collection_type=None),
 FeatureDefinition(feature_name='age_30-39', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>, collection_type=None),
 FeatureDefinition(feature_name='age_40-49', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>, collection_type=None),
 FeatureDefinition(feature_name='age_50-59', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>, collection_type=None),
 FeatureDefinition(feature_name='age_60-69

In [31]:
# Load Feature definitions
orders_feature_group.load_feature_definitions(data_frame = orders_data)

[FeatureDefinition(feature_name='order_id', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='customer_id', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='product_id', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='purchase_amount', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>, collection_type=None),
 FeatureDefinition(feature_name='is_reordered', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>, collection_type=None),
 FeatureDefinition(feature_name='event_time', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='n_days_since_last_purchase', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>, collection_type=None)]

In [32]:
customers_feature_group.create(s3_uri = f"s3://{s3_bucket_name}/{prefix}",
                               record_identifier_name=record_identifier_feature_name, event_time_feature_name="event_time",
                                role_arn=role,
                                enable_online_store=True)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:975050337104:feature-group/customers-fg-02-04-57-46',
 'ResponseMetadata': {'RequestId': '153c5640-6b24-4c96-837b-0161c40776bd',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '153c5640-6b24-4c96-837b-0161c40776bd',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '101',
   'date': 'Wed, 02 Apr 2025 04:57:59 GMT'},
  'RetryAttempts': 0}}

In [33]:
orders_feature_group.create(s3_uri = f"s3://{s3_bucket_name}/{prefix}",
                               record_identifier_name=record_identifier_feature_name, event_time_feature_name="event_time",
                                role_arn=role,
                                enable_online_store=True)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:975050337104:feature-group/orders-fg-02-04-57-46',
 'ResponseMetadata': {'RequestId': '26eefb03-25ec-4921-a5ec-9cfa7f6448c6',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '26eefb03-25ec-4921-a5ec-9cfa7f6448c6',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '98',
   'date': 'Wed, 02 Apr 2025 04:58:29 GMT'},
  'RetryAttempts': 0}}

In [34]:
customers_feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:975050337104:feature-group/customers-fg-02-04-57-46',
 'FeatureGroupName': 'customers-fg-02-04-57-46',
 'RecordIdentifierFeatureName': 'customer_id',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'customer_id',
   'FeatureType': 'String'},
  {'FeatureName': 'sex', 'FeatureType': 'Integral'},
  {'FeatureName': 'is_married', 'FeatureType': 'Integral'},
  {'FeatureName': 'event_time', 'FeatureType': 'String'},
  {'FeatureName': 'age_18-29', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_30-39', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_40-49', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_50-59', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_60-69', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_70-plus', 'FeatureType': 'Integral'},
  {'FeatureName': 'n_days_active', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2025, 4, 2, 4, 57, 59, 208000, tzinfo=tzlocal()),


In [35]:
orders_feature_group.describe() 

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:975050337104:feature-group/orders-fg-02-04-57-46',
 'FeatureGroupName': 'orders-fg-02-04-57-46',
 'RecordIdentifierFeatureName': 'customer_id',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'order_id', 'FeatureType': 'String'},
  {'FeatureName': 'customer_id', 'FeatureType': 'String'},
  {'FeatureName': 'product_id', 'FeatureType': 'String'},
  {'FeatureName': 'purchase_amount', 'FeatureType': 'Fractional'},
  {'FeatureName': 'is_reordered', 'FeatureType': 'Integral'},
  {'FeatureName': 'event_time', 'FeatureType': 'String'},
  {'FeatureName': 'n_days_since_last_purchase', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2025, 4, 2, 4, 58, 29, 417000, tzinfo=tzlocal()),
 'OnlineStoreConfig': {'EnableOnlineStore': True},
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://sagemaker-us-west-2-975050337104/aws-mlops-sagemaker',
   'ResolvedOutputS3Uri': 's3://sagemaker-us-west-2-97505

In [36]:
# Add metadata to a feature
# description and parameters

from sagemaker.feature_store.inputs import FeatureParameter

customers_feature_group.update_feature_metadata(feature_name = "customer_id", description= "The ID of the customer, it is also part of Order feature group",
                                                parameter_additions=[FeatureParameter("idType","primarykey")])

{'ResponseMetadata': {'RequestId': 'e6a66869-203f-4c1e-8b30-4b17bce9c22b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'e6a66869-203f-4c1e-8b30-4b17bce9c22b',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Wed, 02 Apr 2025 05:03:53 GMT',
   'content-length': '0'},
  'RetryAttempts': 0}}

In [38]:
customers_feature_group.describe_feature_metadata("customer_id")

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:975050337104:feature-group/customers-fg-02-04-57-46',
 'FeatureGroupName': 'customers-fg-02-04-57-46',
 'FeatureName': 'customer_id',
 'FeatureType': 'String',
 'CreationTime': datetime.datetime(2025, 4, 2, 4, 57, 59, 208000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2025, 4, 2, 5, 3, 53, 330000, tzinfo=tzlocal()),
 'Description': 'The ID of the customer, it is also part of Order feature group',
 'Parameters': [{'Key': 'idType', 'Value': 'primarykey'}],
 'ResponseMetadata': {'RequestId': '1695d05e-60f1-4ff8-a4f6-c597fc8d38d5',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '1695d05e-60f1-4ff8-a4f6-c597fc8d38d5',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '397',
   'date': 'Wed, 02 Apr 2025 05:07:25 GMT'},
  'RetryAttempts': 0}}

In [39]:
sagemaker_session.boto_session.client("sagemaker", region_name=region).search(
    Resource="FeatureMetadata",
    SearchExpression={
        "Filters":[
            {
                "Name": "FeatureGroupName",
                "Operator": "Contains",
                "Value": "customers-fg"
            },
            {"Name": "Parameters.idType", "Operator": "Equals", "Value": "primarykey"}
        ]
    })

{'Results': [{'FeatureMetadata': {'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:975050337104:feature-group/customers-fg-02-04-57-46',
    'FeatureGroupName': 'customers-fg-02-04-57-46',
    'FeatureName': 'customer_id',
    'FeatureType': 'String',
    'CreationTime': datetime.datetime(2025, 4, 2, 4, 57, 59, tzinfo=tzlocal()),
    'LastModifiedTime': datetime.datetime(2025, 4, 2, 5, 3, 53, tzinfo=tzlocal()),
    'Description': 'The ID of the customer, it is also part of Order feature group',
    'Parameters': [{'Key': 'idType', 'Value': 'primarykey'}]}}],
 'ResponseMetadata': {'RequestId': 'd758b614-7247-424e-8716-9b938b936668',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd758b614-7247-424e-8716-9b938b936668',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '507',
   'date': 'Wed, 02 Apr 2025 05:07:28 GMT'},
  'RetryAttempts': 0}}

In [40]:
customers_feature_group.ingest(data_frame = customer_data, max_workers=3,wait=True)

[2;36m[04/02/25 05:09:38][0m[2;36m [0m[1;94mINFO    [0m Started ingesting index [1;36m0[0m to   ]8;id=221168;file:///opt/conda/lib/python3.11/site-packages/sagemaker/feature_store/feature_group.py\[2mfeature_group.py[0m]8;;\[2m:[0m]8;id=936074;file:///opt/conda/lib/python3.11/site-packages/sagemaker/feature_store/feature_group.py#251\[2m251[0m]8;;\
[2;36m                    [0m         [1;36m3334[0m                           [2m                    [0m
[2;36m                   [0m[2;36m [0m[1;94mINFO    [0m Started ingesting index [1;36m6668[0m   ]8;id=683837;file:///opt/conda/lib/python3.11/site-packages/sagemaker/feature_store/feature_group.py\[2mfeature_group.py[0m]8;;\[2m:[0m]8;id=141116;file:///opt/conda/lib/python3.11/site-packages/sagemaker/feature_store/feature_group.py#251\[2m251[0m]8;;\
[2;36m                    [0m         to [1;36m10000[0m                       [2m                    [0m
[2;36m                   [0m[

IngestionManagerPandas(feature_group_name='customers-fg-02-04-57-46', feature_definitions={'customer_id': {'FeatureName': 'customer_id', 'FeatureType': 'String'}, 'sex': {'FeatureName': 'sex', 'FeatureType': 'Integral'}, 'is_married': {'FeatureName': 'is_married', 'FeatureType': 'Integral'}, 'event_time': {'FeatureName': 'event_time', 'FeatureType': 'String'}, 'age_18-29': {'FeatureName': 'age_18-29', 'FeatureType': 'Integral'}, 'age_30-39': {'FeatureName': 'age_30-39', 'FeatureType': 'Integral'}, 'age_40-49': {'FeatureName': 'age_40-49', 'FeatureType': 'Integral'}, 'age_50-59': {'FeatureName': 'age_50-59', 'FeatureType': 'Integral'}, 'age_60-69': {'FeatureName': 'age_60-69', 'FeatureType': 'Integral'}, 'age_70-plus': {'FeatureName': 'age_70-plus', 'FeatureType': 'Integral'}, 'n_days_active': {'FeatureName': 'n_days_active', 'FeatureType': 'Fractional'}}, sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f434df27a50>, sagemaker_session=<sagemaker.session.Session o

In [41]:
orders_feature_group.ingest(data_frame = orders_data, max_workers=3,wait=True)

[2;36m[04/02/25 05:10:35][0m[2;36m [0m[1;94mINFO    [0m Started ingesting index [1;36m66668[0m  ]8;id=940576;file:///opt/conda/lib/python3.11/site-packages/sagemaker/feature_store/feature_group.py\[2mfeature_group.py[0m]8;;\[2m:[0m]8;id=888813;file:///opt/conda/lib/python3.11/site-packages/sagemaker/feature_store/feature_group.py#251\[2m251[0m]8;;\
[2;36m                    [0m         to [1;36m100000[0m                      [2m                    [0m
[2;36m                   [0m[2;36m [0m[1;94mINFO    [0m Started ingesting index [1;36m0[0m to   ]8;id=845117;file:///opt/conda/lib/python3.11/site-packages/sagemaker/feature_store/feature_group.py\[2mfeature_group.py[0m]8;;\[2m:[0m]8;id=663051;file:///opt/conda/lib/python3.11/site-packages/sagemaker/feature_store/feature_group.py#251\[2m251[0m]8;;\
[2;36m                    [0m         [1;36m33334[0m                          [2m                    [0m
[2;36m                   [0m[

IngestionManagerPandas(feature_group_name='orders-fg-02-04-57-46', feature_definitions={'order_id': {'FeatureName': 'order_id', 'FeatureType': 'String'}, 'customer_id': {'FeatureName': 'customer_id', 'FeatureType': 'String'}, 'product_id': {'FeatureName': 'product_id', 'FeatureType': 'String'}, 'purchase_amount': {'FeatureName': 'purchase_amount', 'FeatureType': 'Fractional'}, 'is_reordered': {'FeatureName': 'is_reordered', 'FeatureType': 'Integral'}, 'event_time': {'FeatureName': 'event_time', 'FeatureType': 'String'}, 'n_days_since_last_purchase': {'FeatureName': 'n_days_since_last_purchase', 'FeatureType': 'Fractional'}}, sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f434df27a50>, sagemaker_session=<sagemaker.session.Session object at 0x7f43506e6390>, max_workers=3, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7f434cb9d350>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

In [42]:
customer_id = "C1"
sample_record = sagemaker_session.boto_session.client(
    "sagemaker-featurestore-runtime", region_name=region
).get_record(
    FeatureGroupName=customers_feature_group_name, RecordIdentifierValueAsString=str(customer_id)
)

In [43]:
sample_record

{'ResponseMetadata': {'RequestId': 'f6502a37-122c-4ca1-a5a7-9e4479e57ccb',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f6502a37-122c-4ca1-a5a7-9e4479e57ccb',
   'content-type': 'application/json',
   'content-length': '877',
   'date': 'Wed, 02 Apr 2025 05:19:31 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'customer_id', 'ValueAsString': 'C1'},
  {'FeatureName': 'sex', 'ValueAsString': '0'},
  {'FeatureName': 'is_married', 'ValueAsString': '1'},
  {'FeatureName': 'event_time', 'ValueAsString': '2024-05-02T05:39:10.965Z'},
  {'FeatureName': 'age_18-29', 'ValueAsString': '0'},
  {'FeatureName': 'age_30-39', 'ValueAsString': '0'},
  {'FeatureName': 'age_40-49', 'ValueAsString': '0'},
  {'FeatureName': 'age_50-59', 'ValueAsString': '0'},
  {'FeatureName': 'age_60-69', 'ValueAsString': '0'},
  {'FeatureName': 'age_70-plus', 'ValueAsString': '1'},
  {'FeatureName': 'n_days_active', 'ValueAsString': '0.2034246575342464'}]}

In [44]:
all_records = sagemaker_session.boto_session.client(
    "sagemaker-featurestore-runtime", region_name=region
).batch_get_record(
    Identifiers=[
        {
            "FeatureGroupName": customers_feature_group_name,
            "RecordIdentifiersValueAsString": ["C1","C2"],
        },
        {
            "FeatureGroupName": orders_feature_group_name,
            "RecordIdentifiersValueAsString": ["C1","C2"],
        },
    ]
)

In [45]:
all_records

{'ResponseMetadata': {'RequestId': 'e06ce4de-ef79-4519-a938-226fffe5e3a3',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'e06ce4de-ef79-4519-a938-226fffe5e3a3',
   'content-type': 'application/json',
   'content-length': '3408',
   'date': 'Wed, 02 Apr 2025 05:20:22 GMT'},
  'RetryAttempts': 0},
 'Records': [{'FeatureGroupName': 'customers-fg-02-04-57-46',
   'RecordIdentifierValueAsString': 'C1',
   'Record': [{'FeatureName': 'customer_id', 'ValueAsString': 'C1'},
    {'FeatureName': 'sex', 'ValueAsString': '0'},
    {'FeatureName': 'is_married', 'ValueAsString': '1'},
    {'FeatureName': 'event_time', 'ValueAsString': '2024-05-02T05:39:10.965Z'},
    {'FeatureName': 'age_18-29', 'ValueAsString': '0'},
    {'FeatureName': 'age_30-39', 'ValueAsString': '0'},
    {'FeatureName': 'age_40-49', 'ValueAsString': '0'},
    {'FeatureName': 'age_50-59', 'ValueAsString': '0'},
    {'FeatureName': 'age_60-69', 'ValueAsString': '0'},
    {'FeatureName': 'age_70-plus', 'ValueAsStr

In [46]:
# Add features to feature group
from sagemaker.feature_store.feature_definition import StringFeatureDefinition

customers_feature_group.update(feature_additions=[StringFeatureDefinition("email")])

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:975050337104:feature-group/customers-fg-02-04-57-46',
 'ResponseMetadata': {'RequestId': '599b0296-c59d-442a-a639-21bb9a303452',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '599b0296-c59d-442a-a639-21bb9a303452',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '101',
   'date': 'Wed, 02 Apr 2025 05:22:12 GMT'},
  'RetryAttempts': 0}}

In [54]:
customers_feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:975050337104:feature-group/customers-fg-02-04-57-46',
 'FeatureGroupName': 'customers-fg-02-04-57-46',
 'RecordIdentifierFeatureName': 'customer_id',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'customer_id',
   'FeatureType': 'String'},
  {'FeatureName': 'sex', 'FeatureType': 'Integral'},
  {'FeatureName': 'is_married', 'FeatureType': 'Integral'},
  {'FeatureName': 'event_time', 'FeatureType': 'String'},
  {'FeatureName': 'age_18-29', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_30-39', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_40-49', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_50-59', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_60-69', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_70-plus', 'FeatureType': 'Integral'},
  {'FeatureName': 'n_days_active', 'FeatureType': 'Fractional'},
  {'FeatureName': 'email', 'FeatureType': 'String'}],
 'CreationTime': datetime.dateti

In [55]:
customers_feature_group.delete()
orders_feature_group.delete()