This notebook demonstrates:
1. Creating a Model Group for versioned models
2. Registering a Model Package with deployment specifications
3. Creating a comprehensive Model Card with qualitative information

**Student Name:** [Your Name Here]  
**Date:** January 28, 2026  
**Course:** AAI-540

In [21]:
import sagemaker

sess = sagemaker.Session()
region = sess.boto_region_name
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()

print(region, bucket, role)


us-east-1 sagemaker-us-east-1-424015581839 arn:aws:iam::424015581839:role/LabRole


In [22]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput
from sagemaker.model_metrics import MetricsSource, ModelMetrics
from datetime import datetime
import json


# Initialize SageMaker session and clients
sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
role = get_execution_role()
bucket = sagemaker_session.default_bucket()

# Boto3 clients
sm_client = boto3.client('sagemaker', region_name=region)

print(f"Region: {region}")
print(f"Role: {role}")
print(f"Default Bucket: {bucket}")

Region: us-east-1
Role: arn:aws:iam::424015581839:role/LabRole
Default Bucket: sagemaker-us-east-1-424015581839


## Model Training and Deployment
### (Using Lab 4.1 as reference)

This section trains and deploys a model. If you already have a trained model from Lab 4.1 or your final project, you can skip this section and update the variables below with your existing model information.

In [23]:
# Example: XGBoost model for breast cancer detection
# Update these variables with your actual model information

# If you're using an existing model, set these variables:
# model_data_url = "s3://your-bucket/path/to/model.tar.gz"
# training_job_name = "your-training-job-name"
# Training completed!
# Training job name: sagemaker-xgboost-2026-01-30-16-20-06-633
# Model artifact location: s3://sagemaker-us-east-1-424015581839/breast-cancer-xgboost/output/sagemaker-xgboost-2026-01-30-16-20-06-633/output/model.tar.gz

# For this example, let's use XGBoost for breast cancer classification
from sagemaker.image_uris import retrieve

# Get XGBoost container image
container = retrieve('xgboost', region, version='1.5-1')
print(f"XGBoost container image: {container}")

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


XGBoost container image: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.5-1


In [24]:
# Prepare training data (example with breast cancer dataset)
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Load dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Save to CSV for XGBoost (label first column)
train_data = pd.concat([y_train, X_train], axis=1)
test_data = pd.concat([y_test, X_test], axis=1)

# Save locally
train_data.to_csv('train.csv', index=False, header=False)
test_data.to_csv('test.csv', index=False, header=False)

print(f"Training data shape: {train_data.shape}")
print(f"Test data shape: {test_data.shape}")

Training data shape: (455, 31)
Test data shape: (114, 31)


In [25]:
# Upload data to S3
prefix = 'breast-cancer-xgboost'
train_path = sagemaker_session.upload_data('train.csv', bucket=bucket, key_prefix=f'{prefix}/data')
test_path = sagemaker_session.upload_data('test.csv', bucket=bucket, key_prefix=f'{prefix}/data')

print(f"Training data uploaded to: {train_path}")
print(f"Test data uploaded to: {test_path}")

Training data uploaded to: s3://sagemaker-us-east-1-424015581839/breast-cancer-xgboost/data/train.csv
Test data uploaded to: s3://sagemaker-us-east-1-424015581839/breast-cancer-xgboost/data/test.csv


In [26]:
# Configure and train the model
xgb_estimator = Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    output_path=f's3://{bucket}/{prefix}/output',
    sagemaker_session=sagemaker_session
)

# Set hyperparameters
xgb_estimator.set_hyperparameters(
    objective='binary:logistic',
    num_round=100,
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.8,
    eval_metric='auc'
)

print("Starting training job...")
print("This may take 5-10 minutes.")

Starting training job...
This may take 5-10 minutes.


In [28]:
# Train the model
# xgb_estimator.fit({'train': train_path, 'validation': test_path})

from sagemaker.inputs import TrainingInput

train_input = TrainingInput(
    s3_data=train_path,
    content_type="text/csv"
)

validation_input = TrainingInput(
    s3_data=test_path,
    content_type="text/csv"
)

xgb_estimator.fit({
    "train": train_input,
    "validation": validation_input
})


training_job_name = xgb_estimator.latest_training_job.name
model_data_url = xgb_estimator.model_data

print(f"\nTraining completed!")
print(f"Training job name: {training_job_name}")
print(f"Model artifact location: {model_data_url}")

INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2026-01-30-16-20-06-633


2026-01-30 16:20:06 Starting - Starting the training job......
2026-01-30 16:21:06 Downloading - Downloading input data...
2026-01-30 16:21:27 Downloading - Downloading the training image......
2026-01-30 16:22:41 Training - Training image download completed. Training in progress.
2026-01-30 16:22:41 Uploading - Uploading generated training model
2026-01-30 16:22:41 Completed - Training job completed
  from pandas import MultiIndex, Int64Index[0m
[34m[2026-01-30 16:22:24.796 ip-10-0-81-173.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2026-01-30 16:22:24.822 ip-10-0-81-173.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2026-01-30:16:22:25:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2026-01-30:16:22:25:INFO] Failed to parse hyperparameter eval_metric value auc to Json.[0m
[34mReturning the value itself[0m
[34m[2026-01-30:16:22:25:INFO] Failed to parse hyperparameter objective val

In [29]:
# Deploy model to endpoint
predictor = xgb_estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.xlarge',
    endpoint_name=f'breast-cancer-endpoint-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
)

endpoint_name = predictor.endpoint_name
print(f"\nModel deployed to endpoint: {endpoint_name}")

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2026-01-30-16-23-01-710
INFO:sagemaker:Creating endpoint-config with name breast-cancer-endpoint-20260130-162301
INFO:sagemaker:Creating endpoint with name breast-cancer-endpoint-20260130-162301


------!
Model deployed to endpoint: breast-cancer-endpoint-20260130-162301


In [34]:
# Define Model Group details
model_package_group_name = 'xgboost-breast-cancer-detection'
model_package_group_description = (
    "XGBoost binary classifier for breast cancer detection using Wisconsin dataset. "
    "Predicts malignant vs benign tumors based on cell nucleus measurements. "
    "Tracks all model versions and experiments."
)

print(f"Model Package Group Name: {model_package_group_name}")
print(f"Description: {model_package_group_description}")

Model Package Group Name: xgboost-breast-cancer-detection
Description: XGBoost binary classifier for breast cancer detection using Wisconsin dataset. Predicts malignant vs benign tumors based on cell nucleus measurements. Tracks all model versions and experiments.


In [35]:
# Create Model Package Group
try:
    create_model_package_group_response = sm_client.create_model_package_group(
        ModelPackageGroupName=model_package_group_name,
        ModelPackageGroupDescription=model_package_group_description
    )
    print("Model Package Group created successfully!")
    print(f"ARN: {create_model_package_group_response['ModelPackageGroupArn']}")
except sm_client.exceptions.ResourceInUse:
    print(f"Model Package Group '{model_package_group_name}' already exists.")
    print("Using existing group.")

In [36]:
# Describe Model Package Group (SCREENSHOT THIS OUTPUT)
describe_model_package_group_response = sm_client.describe_model_package_group(
    ModelPackageGroupName=model_package_group_name
)

print("="*80)
print("PART 1: MODEL PACKAGE GROUP DESCRIPTION")
print("="*80)
print(json.dumps(describe_model_package_group_response, indent=2, default=str))
print("="*80)

PART 1: MODEL PACKAGE GROUP DESCRIPTION
{
  "ModelPackageGroupName": "xgboost-breast-cancer-detection",
  "ModelPackageGroupArn": "arn:aws:sagemaker:us-east-1:424015581839:model-package-group/xgboost-breast-cancer-detection",
  "ModelPackageGroupDescription": "XGBoost binary classifier for breast cancer detection using Wisconsin dataset. Predicts malignant vs benign tumors based on cell nucleus measurements. Tracks all model versions and experiments.",
  "CreationTime": "2026-01-30 16:06:55.973000+00:00",
  "CreatedBy": {
    "UserProfileArn": "arn:aws:sagemaker:us-east-1:424015581839:user-profile/d-jcworzprebpn/default-1768838248240",
    "UserProfileName": "default-1768838248240",
    "DomainId": "d-jcworzprebpn",
    "IamIdentity": {
      "Arn": "arn:aws:sts::424015581839:assumed-role/LabRole/SageMaker",
      "PrincipalId": "AROAWFOKMKKH6OVU4TZIO:SageMaker"
    }
  },
  "ModelPackageGroupStatus": "Completed",
  "ResponseMetadata": {
    "RequestId": "34941c62-722c-40d7-9a0e-94b04d

In [37]:
# Prepare inference specification
inference_specification = {
    'InferenceSpecification': {
        'Containers': [
            {
                'Image': container,
                'ModelDataUrl': model_data_url,
                'Environment': {
                    'SAGEMAKER_PROGRAM': 'inference.py',
                    'SAGEMAKER_SUBMIT_DIRECTORY': model_data_url
                }
            }
        ],
        'SupportedContentTypes': ['text/csv', 'application/json'],
        'SupportedResponseMIMETypes': ['text/csv', 'application/json'],
        'SupportedRealtimeInferenceInstanceTypes': [
            'ml.t2.medium',
            'ml.m5.large',
            'ml.m5.xlarge',
            'ml.m5.2xlarge'
        ],
        'SupportedTransformInstanceTypes': [
            'ml.m5.large',
            'ml.m5.xlarge',
            'ml.m5.2xlarge'
        ]
    }
}

print("Inference specification prepared.")

Inference specification prepared.


In [38]:
# Create Model Package
training_job_name = "sagemaker-xgboost-2026-01-30-16-20-06-633"
model_package_description = (
    "XGBoost breast cancer classifier v1.0. "
    "Trained on 455 samples with 30 features. "
    f"Training job: {training_job_name}. "
    "Hyperparameters: max_depth=5, eta=0.2, num_round=100. "
    "Evaluation metric: AUC."
)

create_model_package_input_dict = {
    'ModelPackageGroupName': model_package_group_name,
    'ModelPackageDescription': model_package_description,
    'ModelApprovalStatus': 'PendingManualApproval',
    **inference_specification
}

create_model_package_response = sm_client.create_model_package(**create_model_package_input_dict)
model_package_arn = create_model_package_response['ModelPackageArn']

print("Model Package created successfully!")
print(f"Model Package ARN: {model_package_arn}")

Model Package created successfully!
Model Package ARN: arn:aws:sagemaker:us-east-1:424015581839:model-package/xgboost-breast-cancer-detection/1


In [39]:
# Wait for model package to be created
import time

print("Waiting for model package to be created...")
while True:
    response = sm_client.describe_model_package(ModelPackageName=model_package_arn)
    status = response['ModelPackageStatus']
    if status == 'Completed':
        print("Model package created successfully!")
        break
    elif status == 'Failed':
        print(f"Model package creation failed: {response.get('ModelPackageStatusDetails')}")
        break
    else:
        print(f"Status: {status}... waiting...")
        time.sleep(5)

Waiting for model package to be created...
Model package created successfully!


In [40]:
# Describe Model Package (SCREENSHOT THIS OUTPUT)
model_package_arn = "arn:aws:sagemaker:us-east-1:424015581839:model-package/xgboost-breast-cancer-detection/1"
describe_model_package_response = sm_client.describe_model_package(
    ModelPackageName=model_package_arn
)

print("="*80)
print("PART 2: MODEL PACKAGE DESCRIPTION")
print("="*80)
print(json.dumps(describe_model_package_response, indent=2, default=str))
print("="*80)

PART 2: MODEL PACKAGE DESCRIPTION
{
  "ModelPackageGroupName": "xgboost-breast-cancer-detection",
  "ModelPackageVersion": 1,
  "ModelPackageRegistrationType": "Registered",
  "ModelPackageArn": "arn:aws:sagemaker:us-east-1:424015581839:model-package/xgboost-breast-cancer-detection/1",
  "ModelPackageDescription": "XGBoost breast cancer classifier v1.0. Trained on 455 samples with 30 features. Training job: sagemaker-xgboost-2026-01-30-16-20-06-633. Hyperparameters: max_depth=5, eta=0.2, num_round=100. Evaluation metric: AUC.",
  "CreationTime": "2026-01-30 16:29:27.967000+00:00",
  "InferenceSpecification": {
    "Containers": [
      {
        "Image": "683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.5-1",
        "ImageDigest": "sha256:c764382b16cd0c921f1b2e66de8684fb999ccbd0c042c95679f0b69bc9cdd12c",
        "ModelDataUrl": "s3://sagemaker-us-east-1-424015581839/breast-cancer-xgboost/output/sagemaker-xgboost-2026-01-30-16-20-06-633/output/model.tar.gz",
        "En

# Part 3: Write the Model Card
---

The Model Card contains qualitative information about our model including:
- Model details and algorithm
- Intended use cases
- Training details and hyperparameters
- Evaluation results
- Ethical considerations
- Model owner and contact information

In [41]:
# Create comprehensive Model Card content
model_card_name = f'breast-cancer-xgboost-model-card'

model_card_content = {
    "model_overview": {
        "model_description": (
            "XGBoost binary classifier for breast cancer detection. "
            "The model predicts whether a tumor is malignant (1) or benign (0) "
            "based on measurements of cell nuclei from fine needle aspirate (FNA) images."
        ),
        "model_id": model_package_arn,
        "model_name": "Breast Cancer XGBoost Classifier v1.0",
        "model_owner": "[Your Name] - AAI-540 Student",
        "model_version": "1.0.0",
        "problem_type": "Binary Classification",
        "algorithm_type": "XGBoost (Gradient Boosting)",
        "model_creator": "AWS SageMaker XGBoost Algorithm",
        "model_artifact": model_data_url
    },
    "intended_uses": {
        "purpose_of_model": (
            "Assist in early detection of breast cancer by classifying tumors "
            "as malignant or benign based on cell characteristics."
        ),
        "intended_uses": (
            "Support medical decision-making; NOT for standalone diagnosis. "
            "Designed for research and educational purposes in ML model deployment."
        ),
        "out_of_scope_uses": (
            "Should NOT be used as sole diagnostic tool. "
            "Not validated for clinical use. "
            "Not intended for deployment without regulatory approval."
        )
    },
    "training_details": {
        "training_data": (
            "Wisconsin Breast Cancer Dataset (569 samples, 30 features). "
            "Features include radius, texture, perimeter, area, smoothness, "
            "compactness, concavity, concave points, symmetry, and fractal dimension "
            "for cell nuclei (mean, SE, and worst values)."
        ),
        "training_job_name": training_job_name,
        "training_samples": 455,
        "validation_samples": 114,
        "input_features": 30,
        "target_variable": "Binary (0=Benign, 1=Malignant)",
        "preprocessing": "None - raw features from dataset",
        "training_duration": "~5-10 minutes",
        "training_instance": "ml.m5.xlarge"
    },
    "hyperparameters": {
        "objective": "binary:logistic",
        "num_round": 100,
        "max_depth": 5,
        "eta": 0.2,
        "gamma": 4,
        "min_child_weight": 6,
        "subsample": 0.8,
        "eval_metric": "auc"
    },
    "evaluation_details": {
        "evaluation_metric": "AUC (Area Under ROC Curve)",
        "performance_goal": "AUC > 0.95 for clinical relevance",
        "testing_data": "20% holdout from Wisconsin dataset (114 samples)",
        "evaluation_notes": (
            "Model evaluated using AUC metric. "
            "Additional metrics (precision, recall, F1) should be computed "
            "for comprehensive evaluation."
        )
    },
    "deployment_details": {
        "deployment_endpoint": endpoint_name if 'endpoint_name' in locals() else "Not yet deployed",
        "inference_instance": "ml.m5.xlarge",
        "supported_content_types": ["text/csv", "application/json"],
        "latency_requirements": "Real-time inference (< 100ms)",
        "deployment_date": datetime.now().strftime("%Y-%m-%d")
    },
    "considerations": {
        "ethical_considerations": (
            "Medical diagnosis must involve licensed healthcare professionals. "
            "Model predictions should be used only as supplementary information. "
            "Patient privacy and data security must be maintained."
        ),
        "caveats_and_recommendations": (
            "Model trained on limited dataset (569 samples). "
            "May not generalize to all populations or imaging technologies. "
            "Requires validation on diverse patient cohorts before clinical use. "
            "Regular retraining recommended with new data."
        ),
        "fairness_and_bias": (
            "Dataset demographics unknown - potential for bias. "
            "Should evaluate performance across different patient groups. "
            "Monitor for disparate impact on protected groups."
        )
    },
    "additional_information": {
        "model_card_authors": "[Your Name]",
        "model_card_contact": "[Your Email]",
        "creation_date": datetime.now().strftime("%Y-%m-%d"),
        "last_updated": datetime.now().strftime("%Y-%m-%d"),
        "license": "Educational Use Only",
        "citations": (
            "Wisconsin Diagnostic Breast Cancer (WDBC) dataset. "
            "Wolberg, W.H., Street, W.N., and Mangasarian, O.L. (1995)"
        ),
        "feedback": "For questions or issues, contact model owner"
    }
}

# Convert to JSON string for Model Card
model_card_content_json = json.dumps(model_card_content, indent=2)

print("Model Card content prepared.")
print(f"Model Card Name: {model_card_name}")

Model Card content prepared.
Model Card Name: breast-cancer-xgboost-model-card


In [57]:
from sagemaker.model_card import ModelCard

model_card = ModelCard(
    name=model_card_name,
    status="Draft"
)

model_card.content = model_card_content  # dict

model_card.create(
    
)

print("Model Card created successfully!")
print(model_card)


INFO:sagemaker.model_card.model_card:Creating model card with name: breast-cancer-xgboost-model-card


In [60]:
print(model_card.content)

{'model_overview': {'model_description': 'XGBoost binary classifier for breast cancer detection. The model predicts whether a tumor is malignant (1) or benign (0) based on measurements of cell nuclei from fine needle aspirate (FNA) images.', 'model_id': 'arn:aws:sagemaker:us-east-1:424015581839:model-package/xgboost-breast-cancer-detection/1', 'model_name': 'Breast Cancer XGBoost Classifier v1.0', 'model_owner': '[Your Name] - AAI-540 Student', 'model_version': '1.0.0', 'problem_type': 'Binary Classification', 'algorithm_type': 'XGBoost (Gradient Boosting)', 'model_creator': 'AWS SageMaker XGBoost Algorithm', 'model_artifact': 's3://sagemaker-us-east-1-424015581839/breast-cancer-xgboost/output/sagemaker-xgboost-2026-01-30-16-20-06-633/output/model.tar.gz'}, 'intended_uses': {'purpose_of_model': 'Assist in early detection of breast cancer by classifying tumors as malignant or benign based on cell characteristics.', 'intended_uses': 'Support medical decision-making; NOT for standalone di