# Electronics Sales Pipeline - Multi-Category

Run XGBoost hyperparameter optimization per product category using Optuna.

In [5]:
import os
import sagemaker
from sagemaker.workflow.notebook_job_step import NotebookJobStep
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_context import LocalPipelineSession, PipelineSession

In [6]:
import sagemaker
image_uri = sagemaker.image_uris.retrieve(
    framework="sagemaker-distribution",
    region="us-east-1",
    version="latest",
    instance_type="ml.m5.xlarge"  # determines cpu vs gpu
)
image_uri

'885854791233.dkr.ecr.us-east-1.amazonaws.com/sagemaker-distribution-prod:3.0.0-cpu'

## Configuration

In [None]:
# Pipeline settings
pipeline_name = "electronics-sales-pipeline"
train_notebook = "fm_train.ipynb"
kernel_name = "python3"

# AWS settings - UPDATE THESE
bucket = "sm-mlflow-optuna"
region = "us-east-1"
image_uri = image_uri
notebook_artifacts = f"s3://{bucket}/electronics-training"
instance_type = "ml.m5.xlarge"


In [8]:
# Category-specific training configurations
# Each category runs as a separate pipeline step

category_configs = {
    "smartphones": {
        "n_trials": 50,
        "experiment_name": "electronics-smartphones",
        "test_size": 0.25,
    },
    "laptops": {
        "n_trials": 50,
        "experiment_name": "electronics-laptops",
        "test_size": 0.25,
    },
    "tablets": {
        "n_trials": 50,
        "experiment_name": "electronics-tablets",
        "test_size": 0.25,
    },
    "accessories": {
        "n_trials": 50,
        "experiment_name": "electronics-accessories",
        "test_size": 0.25,
    },
}



## Define Pipeline Steps

In [9]:
pipeline_steps = []
for category, config in category_configs.items():
    nb_job_params = {
        "category": f'["{category}"]',
        **{k: str(v) for k, v in config.items()},
    }
    print(nb_job_params)

    step_name = f"electronics-train-{category}"

    nb_step = NotebookJobStep(
        name=step_name,
        description=f"XGBoost training for {category}",
        notebook_job_name=step_name,
        image_uri=image_uri,
        kernel_name=kernel_name,
        display_name=step_name,
        role=sagemaker.get_execution_role(),
        s3_root_uri=notebook_artifacts,
        additional_dependencies=[
                "/home/sagemaker-user/sagemaker_mlflow_optuna/scripts"
            ],
        initialization_script="nb_job_init.sh",
        input_notebook=train_notebook,
        instance_type=instance_type,
        parameters=nb_job_params,
        max_runtime_in_seconds=3600,
        max_retry_attempts=2,
    )
    pipeline_steps.append(nb_step)



{'category': '["smartphones"]', 'n_trials': '50', 'experiment_name': 'electronics-smartphones', 'test_size': '0.25'}
{'category': '["laptops"]', 'n_trials': '50', 'experiment_name': 'electronics-laptops', 'test_size': '0.25'}
{'category': '["tablets"]', 'n_trials': '50', 'experiment_name': 'electronics-tablets', 'test_size': '0.25'}
{'category': '["accessories"]', 'n_trials': '50', 'experiment_name': 'electronics-accessories', 'test_size': '0.25'}


## SageMaker Pipeline Execution

In [10]:
session = PipelineSession()
role = sagemaker.get_execution_role()

pipeline = Pipeline(
    name=pipeline_name,
    steps=pipeline_steps,
    sagemaker_session=session,
)

pipeline.upsert(role_arn=role)
execution = pipeline.start()

print(f"Pipeline: {pipeline_name}")
print(f"Execution: {execution.arn}")

Pipeline: electronics-sales-pipeline
Execution: arn:aws:sagemaker:us-east-1:376337229415:pipeline/electronics-sales-pipeline/execution/l82ihlepz6sh


In [11]:
# Monitor execution
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:376337229415:pipeline/electronics-sales-pipeline',
 'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:376337229415:pipeline/electronics-sales-pipeline/execution/l82ihlepz6sh',
 'PipelineExecutionDisplayName': 'execution-1767487393682',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2026, 1, 4, 0, 43, 13, 612000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2026, 1, 4, 0, 43, 13, 612000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:376337229415:user-profile/d-ruitxitoqhuw/ryan',
  'UserProfileName': 'ryan',
  'DomainId': 'd-ruitxitoqhuw',
  'IamIdentity': {'Arn': 'arn:aws:sts::376337229415:assumed-role/fm-gambling-recommender-dev-role/SageMaker',
   'PrincipalId': 'AROAVPH3SMZTYVDXJKNCS:SageMaker'}},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:376337229415:user-profile/d-ruitxitoqhuw/ryan',
  'UserProfileName': 'ryan',
  'DomainId': 'd-ruit

In [12]:
# Wait for completion (optional)
execution.wait()