In [1]:
# Update SageMaker to the latest version
%pip install -U sagemaker



In [2]:
# Import libraries and setup session

import boto3
import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession

sagemaker_session = sagemaker.session.Session()
region = sagemaker_session.boto_region_name
role = "arn:aws:iam::619071320705:role/service-role/AmazonSageMaker-ExecutionRole-20250204T212210"
pipeline_session = PipelineSession()
default_bucket = sagemaker_session.default_bucket()
model_package_group_name = f"AbaloneModelPackageGroupName"



sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/julienlook/Library/Application Support/sagemaker/config.yaml


In [3]:
# Create directories for data and code
!mkdir -p data
!mkdir -p code

In [4]:
# Download and upload dataset to S3
local_path = "data/abalone-dataset.csv"

s3 = boto3.resource("s3")
s3.Bucket(f"sagemaker-example-files-prod-{region}").download_file(
    "datasets/tabular/uci_abalone/abalone.csv", local_path
)

base_uri = f"s3://{default_bucket}/abalone"
input_data_uri = sagemaker.s3.S3Uploader.upload(
    local_path=local_path,
    desired_s3_uri=base_uri,
)
print(input_data_uri)

s3://sagemaker-ap-southeast-1-619071320705/abalone/abalone-dataset.csv


In [5]:
# Define pipeline parameters
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
    ParameterFloat,
)

processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge")
model_approval_status = ParameterString(
    name="ModelApprovalStatus", default_value="PendingManualApproval"
)
input_data = ParameterString(
    name="InputData",
    default_value=input_data_uri,
)
mse_threshold = ParameterFloat(name="MseThreshold", default_value=6.0)

In [6]:
# Setup SKLearnProcessor
from sagemaker.sklearn.processing import SKLearnProcessor


framework_version = "1.2-1"

sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    instance_type="ml.m5.xlarge",
    instance_count=processing_instance_count,
    base_job_name="sklearn-abalone-process",
    role=role,
    sagemaker_session=pipeline_session,
)

In [7]:
# Define processing step
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep

processor_args = sklearn_processor.run(
    inputs=[
        ProcessingInput(source=input_data, destination="/opt/ml/processing/input"),
    ],
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
        ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"),
        ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
    ],
    code="code/preprocessing.py",
)

step_process = ProcessingStep(name="AbaloneProcess", step_args=processor_args)



In [1]:
# Define training step
from steps.training_step import init_training_step, setup_xgboost_model
from sagemaker.workflow.steps import TrainingStep

train_args = setup_xgboost_model(sagemaker_session,pipeline_session)
step_train = init_training_step(train_args)

NameError: name 'sagemaker_session' is not defined

In [10]:
# Create model
from sagemaker.model import Model

model = Model(
    image_uri=image_uri,
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=pipeline_session,
    role=role,
)

In [11]:
# Define model step
from sagemaker.inputs import CreateModelInput
from sagemaker.workflow.model_step import ModelStep

step_create_model = ModelStep(
    name="AbaloneCreateModel",
    step_args=model.create(
        instance_type="ml.m5.large", accelerator_type="ml.eia1.medium"
    ),
)

In [12]:
# Define model metrics and register model
register_args = model.register(
    content_types=["text/csv"],
    response_types=["text/csv"],
    inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances=["ml.m5.xlarge"],
    model_package_group_name=model_package_group_name,
    approval_status=model_approval_status,
)
step_register = ModelStep(name="AbaloneRegisterModel", step_args=register_args)

In [13]:
# Define pipeline
from sagemaker.workflow.pipeline import Pipeline


pipeline_name = f"AbalonePipeline4"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        processing_instance_count,
        instance_type,
        model_approval_status,
        input_data,
        mse_threshold,
    ],
    steps=[
        step_process,
        step_train,
        step_create_model
    ],
)

In [14]:
# Print pipeline definition
import json
definition = json.loads(pipeline.definition())

In [15]:
# Upsert pipeline
pipeline.upsert(role_arn=role)


{'PipelineArn': 'arn:aws:sagemaker:ap-southeast-1:619071320705:pipeline/AbalonePipeline4',
 'ResponseMetadata': {'RequestId': 'ea371c1c-9a11-4911-a361-08fa54a08752',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ea371c1c-9a11-4911-a361-08fa54a08752',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '89',
   'date': 'Thu, 06 Feb 2025 07:58:22 GMT'},
  'RetryAttempts': 0}}

In [16]:
# Start pipeline execution
execution = pipeline.start()
