# Amazon SageMaker Feature Store Demo
## Complete Guide: Data Transformation, Feature Store Creation, and Data Ingestion

This comprehensive demo shows how to:
1. Transform raw customer and order data for ML use cases
2. Create feature groups in Amazon SageMaker Feature Store
3. Add metadata and descriptions to features
4. Ingest data into the feature store (both online and offline stores)
5. Validate and retrieve data from the feature store

Prerequisites:
- AWS account with SageMaker permissions
- SageMaker notebook instance or SageMaker Studio
- Raw CSV files: customers.csv and orders.csv

## Step 3: Feature Store Setup and Configuration

### Step 3.1 Prepare Environment

In [190]:
import sagemaker
import boto3
import sys
import pandas as pd
import numpy as np 
import io 
from sagemaker.session import Session
from sagemaker import get_execution_role

In [None]:
prefix = "mlops-data-preprocessing-pipeline"
role = get_execution_role()
sagemaker_session = Session() 
region = sagemaker_session.boto_region_name

In [None]:
# Create feature group
from time import gmtime, strftime, sleep

employee_feature_group_name = f"{prefix}-employee-features" 
print(f"Customer Feature Group Name: {employee_feature_group_name}")

Customer Feature Group Name: customers-feature-group
Orders Feature Group Name: orders-feature-group


In [None]:
from sagemaker.feature_store.feature_group import FeatureGroup

employee_feature_group_name = FeatureGroup(name=employee_feature_group_name , 
                                       sagemaker_session = sagemaker_session)

In [None]:
employee_feature_group_name.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:590183953604:feature-group/customers-feature-group',
 'FeatureGroupName': 'customers-feature-group',
 'RecordIdentifierFeatureName': 'customer_id',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'customer_id',
   'FeatureType': 'String'},
  {'FeatureName': 'sex', 'FeatureType': 'Integral'},
  {'FeatureName': 'is_married', 'FeatureType': 'Integral'},
  {'FeatureName': 'event_time', 'FeatureType': 'String'},
  {'FeatureName': 'age_18-29', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_30-39', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_40-49', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_50-59', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_60-69', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_70-plus', 'FeatureType': 'Integral'},
  {'FeatureName': 'n_days_active', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2025, 8, 9, 20, 1, 55, tzinfo=tzlocal()),
 'OnlineSt

### Step 3.4: Add Metadata to Features

In [215]:
# description and parameters

from sagemaker.feature_store.inputs import FeatureParameter

customers_feature_group.update_feature_metadata(feature_name = "customer_id", description= "The ID of the customer, it is also part of Order feature group",
                                                parameter_additions=[FeatureParameter("idType","primarykey")])

{'ResponseMetadata': {'RequestId': 'e63bf0bf-9e37-4579-b2e0-2205a26e792b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'e63bf0bf-9e37-4579-b2e0-2205a26e792b',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Sat, 09 Aug 2025 20:03:59 GMT',
   'content-length': '0'},
  'RetryAttempts': 0}}

In [216]:
customers_feature_group.describe_feature_metadata("customer_id")

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:590183953604:feature-group/customers-feature-group',
 'FeatureGroupName': 'customers-feature-group',
 'FeatureName': 'customer_id',
 'FeatureType': 'String',
 'CreationTime': datetime.datetime(2025, 8, 9, 20, 1, 55, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2025, 8, 9, 20, 3, 59, 743000, tzinfo=tzlocal()),
 'Description': 'The ID of the customer, it is also part of Order feature group',
 'Parameters': [{'Key': 'idType', 'Value': 'primarykey'}],
 'ResponseMetadata': {'RequestId': '089f7892-a3cc-47a3-89da-e8628c01bace',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '089f7892-a3cc-47a3-89da-e8628c01bace',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '393',
   'date': 'Sat, 09 Aug 2025 20:03:59 GMT'},
  'RetryAttempts': 0}}

In [217]:
sagemaker_session.boto_session.client("sagemaker", region_name=region).search(
    Resource="FeatureMetadata",
    SearchExpression={
        "Filters":[
            {
                "Name": "FeatureGroupName",
                "Operator": "Contains",
                "Value": "customers-fg"
            },
            {"Name": "Parameters.idType", "Operator": "Equals", "Value": "primarykey"}
        ]
    })

{'Results': [{'FeatureMetadata': {'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:590183953604:feature-group/customers-fg-09-19-26-55',
    'FeatureGroupName': 'customers-fg-09-19-26-55',
    'FeatureName': 'customer_id',
    'FeatureType': 'String',
    'CreationTime': datetime.datetime(2025, 8, 9, 19, 26, 55, tzinfo=tzlocal()),
    'LastModifiedTime': datetime.datetime(2025, 8, 9, 19, 58, 2, tzinfo=tzlocal()),
    'Description': 'The ID of the customer, it is also part of Order feature group',
    'Parameters': [{'Key': 'idType', 'Value': 'primarykey'}]}}],
 'ResponseMetadata': {'RequestId': '84d6739c-8933-4023-a8f5-1b3b03c340fc',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '84d6739c-8933-4023-a8f5-1b3b03c340fc',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '507',
   'date': 'Sat, 09 Aug 2025 20:03:59 GMT'},
  'RetryAttempts': 0}}

### Step 3.5: Ingest data into Feature Store

In [218]:
customers_feature_group.ingest(data_frame = customer_data, max_workers=3,wait=True)

IngestionManagerPandas(feature_group_name='customers-feature-group', feature_definitions={'customer_id': {'FeatureName': 'customer_id', 'FeatureType': 'String'}, 'sex': {'FeatureName': 'sex', 'FeatureType': 'Integral'}, 'is_married': {'FeatureName': 'is_married', 'FeatureType': 'Integral'}, 'event_time': {'FeatureName': 'event_time', 'FeatureType': 'String'}, 'age_18-29': {'FeatureName': 'age_18-29', 'FeatureType': 'Integral'}, 'age_30-39': {'FeatureName': 'age_30-39', 'FeatureType': 'Integral'}, 'age_40-49': {'FeatureName': 'age_40-49', 'FeatureType': 'Integral'}, 'age_50-59': {'FeatureName': 'age_50-59', 'FeatureType': 'Integral'}, 'age_60-69': {'FeatureName': 'age_60-69', 'FeatureType': 'Integral'}, 'age_70-plus': {'FeatureName': 'age_70-plus', 'FeatureType': 'Integral'}, 'n_days_active': {'FeatureName': 'n_days_active', 'FeatureType': 'Fractional'}}, sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f833788dd90>, sagemaker_session=<sagemaker.session.Session ob

In [None]:
orders_feature_group.ingest(data_frame = orders_data, max_workers=3,wait=True)

### Step 3.6: Validate Feature Store Data

In [None]:
customer_id = "C1"
sample_record = sagemaker_session.boto_session.client(
    "sagemaker-featurestore-runtime", region_name=region
).get_record(
    FeatureGroupName=customers_feature_group_name, RecordIdentifierValueAsString=str(customer_id)
)

In [None]:
sample_record

{'ResponseMetadata': {'RequestId': '4061fcf9-0612-42cd-a75f-aea0481208ba',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '4061fcf9-0612-42cd-a75f-aea0481208ba',
   'content-type': 'application/json',
   'content-length': '877',
   'date': 'Sat, 09 Aug 2025 20:12:27 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'customer_id', 'ValueAsString': 'C1'},
  {'FeatureName': 'sex', 'ValueAsString': '0'},
  {'FeatureName': 'is_married', 'ValueAsString': '1'},
  {'FeatureName': 'event_time', 'ValueAsString': '2024-05-02T05:39:10.965Z'},
  {'FeatureName': 'age_18-29', 'ValueAsString': '0'},
  {'FeatureName': 'age_30-39', 'ValueAsString': '0'},
  {'FeatureName': 'age_40-49', 'ValueAsString': '0'},
  {'FeatureName': 'age_50-59', 'ValueAsString': '0'},
  {'FeatureName': 'age_60-69', 'ValueAsString': '0'},
  {'FeatureName': 'age_70-plus', 'ValueAsString': '1'},
  {'FeatureName': 'n_days_active', 'ValueAsString': '0.2034246575342466'}]}

In [None]:
all_records = sagemaker_session.boto_session.client(
    "sagemaker-featurestore-runtime", region_name=region
).batch_get_record(
    Identifiers=[
        {
            "FeatureGroupName": customers_feature_group_name,
            "RecordIdentifiersValueAsString": ["C1","C2"],
        },
        {
            "FeatureGroupName": orders_feature_group_name,
            "RecordIdentifiersValueAsString": ["C1","C2"],
        },
    ]
)

In [None]:
all_records

{'ResponseMetadata': {'RequestId': 'ad0ec71b-d5eb-4f0b-a2bf-1d2448298799',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ad0ec71b-d5eb-4f0b-a2bf-1d2448298799',
   'content-type': 'application/json',
   'content-length': '3404',
   'date': 'Sat, 09 Aug 2025 20:12:27 GMT'},
  'RetryAttempts': 0},
 'Records': [{'FeatureGroupName': 'orders-feature-group',
   'RecordIdentifierValueAsString': 'C1',
   'Record': [{'FeatureName': 'order_id', 'ValueAsString': 'O99056'},
    {'FeatureName': 'customer_id', 'ValueAsString': 'C1'},
    {'FeatureName': 'product_id', 'ValueAsString': 'P5567'},
    {'FeatureName': 'purchase_amount', 'ValueAsString': '0.6516831683168316'},
    {'FeatureName': 'is_reordered', 'ValueAsString': '1'},
    {'FeatureName': 'event_time', 'ValueAsString': '2024-05-02T05:39:18.632Z'},
    {'FeatureName': 'n_days_since_last_purchase',
     'ValueAsString': '0.5484496124031009'}]},
  {'FeatureGroupName': 'orders-feature-group',
   'RecordIdentifierValueAsString'