# SageMaker Feature Store Example

>__NOTE:__ This Notebook uses the _Python 3 (Data Science)_ Kernel

## Setup

In [None]:
import warnings
import time
import sagemaker
import boto3
import numpy as np
import pandas as pd
from time import gmtime, strftime
from sklearn import preprocessing
from sagemaker.feature_store.feature_group import FeatureGroup

warnings.filterwarnings("ignore")

role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session()
region_name = sagemaker_session.boto_region_name
data_bucket = f"""{boto3.client("ssm", region_name=region_name).get_parameter(Name="DataBucket")["Parameter"]["Value"]}"""
prefix = "sagemaker-featurestore"

def check_feature_group_status(feature_group):
    status = feature_group.describe().get("FeatureGroupStatus")
    while status == "Creating":
        print("Waiting for Feature Group to be Created")
        time.sleep(5)
        status = feature_group.describe().get("FeatureGroupStatus")
    print(f"FeatureGroup {feature_group.name} successfully created.")

def check_data_availability(feature_group, bucket):
    s3_client = sagemaker_session.boto_session.client('s3', region_name=region_name)
    offline_store_contents = None
    feature_group_s3_uri = feature_group.describe().get("OfflineStoreConfig").get("S3StorageConfig").get("ResolvedOutputS3Uri")
    feature_group_s3_prefix = feature_group_s3_uri.replace(f"s3://{bucket}/", "")
    while offline_store_contents is None:
        objects_in_bucket = s3_client.list_objects(Bucket=bucket, Prefix=feature_group_s3_prefix)
        if ('Contents' in objects_in_bucket and len(objects_in_bucket['Contents']) > 1):
            offline_store_contents = objects_in_bucket['Contents']
        else:
            print('Waiting for data into the offline store...\n')
            time.sleep(60)
    print('Data available.')    

---

## Data Preparation

### Import Python Libraries and Helper Funcitons

### Download the Data

In [None]:
column_names = ["sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "rings"]
abalone_data = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data", names=column_names)
abalone_data.head()

### Data Processing and Feature Engineering

In [None]:
data = abalone_data[["rings", "sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight"]]
processed_data = pd.get_dummies(data)
processed_data.head()

---

## SageMaker Feature Store

### Define the Feature Group

In [None]:
fg_name = f"abalone-fg-{strftime('%d-%H-%M-%S', gmtime())}"
fg = FeatureGroup(name=fg_name, sagemaker_session=sagemaker_session)

### Create Ingestion Timestamp Identifier (Event Time Feature)

In [None]:
time_stamp = int(round(time.time()))
processed_data["TimeStamp"] = pd.Series([time_stamp] * len(processed_data), dtype="float64")

### Create Feature Definition Schema

In [None]:
fg.load_feature_definitions(data_frame=processed_data)

### Create the Feature Group

In [None]:
fg.create(
    s3_uri=f"s3://{data_bucket}/{prefix}",
    record_identifier_name="rings",
    event_time_feature_name="TimeStamp",
    role_arn=role,
    enable_online_store=False
)

check_feature_group_status(fg)

### Ingest Data into the Feature Group

In [None]:
fg.ingest(data_frame=processed_data, max_workers=5, wait=True)

check_data_availability(fg, data_bucket)

### Describe the Feature Group

>__NOTE:__ Make sure to capture the name of the Feature Group _(FeatureGroupName)_, as we will be using this later.

In [None]:
fg.describe()