# Assignment 4.1: Feature Store - Exercise
Sinthuja Bates

In [2]:
import time
import os
from sagemaker import get_execution_role, session
import boto3

region = boto3.Session().region_name
role = get_execution_role()
sm_client = boto3.client('sagemaker', region_name=region)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


## Part 1: Set Up Model Group

In [3]:
model_package_group_name = 'xgboost-breast-cancer-detection'

response = sm_client.create_model_package_group(
    ModelPackageGroupName=model_package_group_name,
    ModelPackageGroupDescription="Group for XGBoost models aimed at detecting breast cancer using various clinical features."
)
print('ModelPackageGroupArn: {}'.format(response['ModelPackageGroupArn']))

ModelPackageGroupArn: arn:aws:sagemaker:us-east-1:711667138246:model-package-group/xgboost-breast-cancer-detection


In [4]:
description = sm_client.describe_model_package_group(
    ModelPackageGroupName=model_package_group_name
)
print(description)

{'ModelPackageGroupName': 'xgboost-breast-cancer-detection', 'ModelPackageGroupArn': 'arn:aws:sagemaker:us-east-1:711667138246:model-package-group/xgboost-breast-cancer-detection', 'ModelPackageGroupDescription': 'Group for XGBoost models aimed at detecting breast cancer using various clinical features.', 'CreationTime': datetime.datetime(2024, 6, 2, 17, 21, 57, 596000, tzinfo=tzlocal()), 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:711667138246:user-profile/d-bnyjs4fnucfp/snagalingam', 'UserProfileName': 'snagalingam', 'DomainId': 'd-bnyjs4fnucfp', 'IamIdentity': {'Arn': 'arn:aws:sts::711667138246:assumed-role/LabRole/SageMaker', 'PrincipalId': 'AROA2LMVKX3DIVFDWDQ6T:SageMaker'}}, 'ModelPackageGroupStatus': 'Completed', 'ResponseMetadata': {'RequestId': '5edc342d-49a6-4db3-b530-5e2b6086e4b0', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '5edc342d-49a6-4db3-b530-5e2b6086e4b0', 'content-type': 'application/x-amz-json-1.1', 'content-length': '674', 'date': '

## Part 2: Set Up Model Package

In [5]:
image = '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1'
model_artifact = 's3://sagemaker-us-east-1-711667138246/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2024-06-01-16-26-21/xgb-2024-06-01-16-26-21/output/model.tar.gz'
model_data_source = 's3://sagemaker-us-east-1-711667138246/DEMO-breast-cancer-prediction-xgboost-highlevel/train/'

response = sm_client.create_model_package(
    ModelPackageGroupName=model_package_group_name,
    ModelPackageDescription='Model package for breast cancer detection using XGBoost',
    InferenceSpecification={
        'Containers': [{
            'Image': image,
            'ModelDataUrl': model_artifact,
            'Environment': {
                'SAGEMAKER_SUBMIT_DIRECTORY': "s3://sagemaker-us-east-1-711667138246/model-scripts/",
                'SAGEMAKER_PROGRAM': 'inference.py',
                'SAGEMAKER_CONTAINER_LOG_LEVEL': '20',
                'SAGEMAKER_REGION': region
            }
        }],
        'SupportedTransformInstanceTypes': ['ml.m5.large'],
        'SupportedRealtimeInferenceInstanceTypes': ['ml.m5.large'],
        'SupportedContentTypes': ['text/csv'],
        'SupportedResponseMIMETypes': ['text/csv']
    }
)

print('ModelPackageArn: {}'.format(response['ModelPackageArn']))

ModelPackageArn: arn:aws:sagemaker:us-east-1:711667138246:model-package/xgboost-breast-cancer-detection/1


In [6]:
model_package_arn = response['ModelPackageArn']

description = sm_client.describe_model_package(
    ModelPackageName=model_package_arn
)
print(description)

{'ModelPackageGroupName': 'xgboost-breast-cancer-detection', 'ModelPackageVersion': 1, 'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:711667138246:model-package/xgboost-breast-cancer-detection/1', 'ModelPackageDescription': 'Model package for breast cancer detection using XGBoost', 'CreationTime': datetime.datetime(2024, 6, 2, 17, 21, 58, 86000, tzinfo=tzlocal()), 'InferenceSpecification': {'Containers': [{'Image': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1', 'ImageDigest': 'sha256:cf81520a3b695293022793e292cf8bc3732b79231a6ebe1fb308086f6163a875', 'ModelDataUrl': 's3://sagemaker-us-east-1-711667138246/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2024-06-01-16-26-21/xgb-2024-06-01-16-26-21/output/model.tar.gz', 'Environment': {'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', 'SAGEMAKER_PROGRAM': 'inference.py', 'SAGEMAKER_REGION': 'us-east-1', 'SAGEMAKER_SUBMIT_DIRECTORY': 's3://sagemaker-us-east-1-711667138246/model-scripts/'}}], 'SupportedTransformInsta

## Part 3: Write the Model Card

In [7]:
import sagemaker
from sagemaker.model_card import (
    ModelCard,
    ModelOverview,
    ObjectiveFunction,
    Function,
    TrainingDetails,
    IntendedUses,
    BusinessDetails,
    EvaluationJob,
    AdditionalInformation,
    Metric,
    MetricGroup,
    ModelCardStatusEnum,
    ObjectiveFunctionEnum,
    FacetEnum,
    RiskRatingEnum,
    MetricTypeEnum,
    EvaluationMetricTypeEnum,
)
sess = sagemaker.Session()

model_overview = ModelOverview.from_model_name(
    model_name="sagemaker-xgboost-2024-06-01-16-29-16-090",
    sagemaker_session=sess,
    model_description="This model uses XGBoost to detect breast cancer based on various clinical features. It is designed to provide healthcare professionals with a predictive tool to improve diagnostic accuracy.",
    problem_type="Binary Classification",
    algorithm_type="XGBoost",
    model_creator="Sinthuja Bates",
    model_owner="USD",
)

objective_function = ObjectiveFunction(
    function=Function(
        function=ObjectiveFunctionEnum.MINIMIZE, 
        facet=FacetEnum.LOSS, 
    ),
    notes="Objective is to minimize the binary logistic loss for better classification accuracy."
)

training_details = TrainingDetails.from_model_overview(
    model_overview=model_overview,
    sagemaker_session=sess,
    objective_function=objective_function,
    training_observations="Model trained on Breast Cancer Wisconsin (Diagnostic) Dataset with XGBoost. \
                           Key hyperparameters included max_depth=5, eta=0.2, and num_round=100. \
                           The model aims to optimize accuracy while minimizing the log loss."
)

manual_metric_group = MetricGroup(
    name="binary classification metrics",
    metric_data=[
        Metric(name="accuracy", type=MetricTypeEnum.NUMBER, value=0.95),  
        Metric(name="precision", type=MetricTypeEnum.NUMBER, value=0.94), 
        Metric(name="recall", type=MetricTypeEnum.NUMBER, value=0.93),  
        Metric(name="AUC", type=MetricTypeEnum.NUMBER, value=0.96)  
    ]
)


example_evaluation_job = EvaluationJob(
    name="BreastCancerDetection-Evaluation",
    evaluation_observation="The model evaluation was conducted on a held-out test set to assess performance metrics such as accuracy, precision, recall, and AUC, which are critical for clinical diagnostic tools.",
    metric_groups=[manual_metric_group]
)


evaluation_details = [example_evaluation_job]

intended_uses = IntendedUses(
    purpose_of_model="Assist in early detection and diagnosis of breast cancer.",
    intended_uses="This model is intended for use in clinical settings to support healthcare professionals by providing an additional diagnostic tool.",
    factors_affecting_model_efficiency="Quality of input data, proper feature selection, and regular model retraining on up-to-date clinical data.",
    risk_rating=RiskRatingEnum.MEDIUM,
    explanations_for_risk_rating="While the model aims to assist healthcare professionals, it should not be used as the sole method of diagnosis without human oversight.",
)

model_card_name = "BreastCancerDetection-XGBoost-ModelCard"
my_card = ModelCard(
    name=model_card_name,
    status=ModelCardStatusEnum.DRAFT,
    model_overview=model_overview,
    training_details=training_details,
    evaluation_details=evaluation_details,
    intended_uses=intended_uses,
    sagemaker_session=sess,
)
my_card.create()
print(f"Model card {my_card.name} is successfully created with id {my_card.arn}")

Model card BreastCancerDetection-XGBoost-ModelCard is successfully created with id arn:aws:sagemaker:us-east-1:711667138246:model-card/BreastCancerDetection-XGBoost-ModelCard


In [8]:
model_card_arn = my_card.arn

description = sm_client.describe_model_card(
    ModelCardName=model_card_arn
)
print(description)

{'ModelCardArn': 'arn:aws:sagemaker:us-east-1:711667138246:model-card/BreastCancerDetection-XGBoost-ModelCard', 'ModelCardName': 'BreastCancerDetection-XGBoost-ModelCard', 'ModelCardVersion': 1, 'Content': '{"model_overview": {"model_id": "arn:aws:sagemaker:us-east-1:711667138246:model/sagemaker-xgboost-2024-06-01-16-29-16-090", "model_name": "sagemaker-xgboost-2024-06-01-16-29-16-090", "model_description": "This model uses XGBoost to detect breast cancer based on various clinical features. It is designed to provide healthcare professionals with a predictive tool to improve diagnostic accuracy.", "problem_type": "Binary Classification", "algorithm_type": "XGBoost", "model_creator": "Sinthuja Bates", "model_owner": "USD", "model_artifact": ["s3://sagemaker-us-east-1-711667138246/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2024-06-01-16-26-21/xgb-2024-06-01-16-26-21/output/model.tar.gz"], "inference_environment": {"container_image": ["683313688378.dkr.ecr.us-east-1.amazona