# FM Optuna Pipeline - Multi-Brand

Run Factorization Machines hyperparameter optimization per gambling brand.

In [21]:
import os
import sagemaker
from sagemaker.workflow.notebook_job_step import NotebookJobStep
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_context import LocalPipelineSession, PipelineSession

In [22]:
def define_brand_pipeline_steps(
    brand_configs: dict,
    image_uri: str,
    notebook_artifacts: str,
    input_notebook_name: str,
    kernel_name: str = "python3",
    instance_type: str = "ml.m5.xlarge",
    role: str = None,
) -> list:
    """
    Define notebook job steps per brand.
    """
    pipeline_steps = []
    if role is None:
        role = sagemaker.get_execution_role()

    for brand, config in brand_configs.items():
        nb_job_params = {
            "brand": brand,
            **{k: str(v) for k, v in config.items()},
        }

        step_name = f"fm-train-{brand}"

        nb_step = NotebookJobStep(
            name=step_name,
            description=f"FM training for {brand}",
            notebook_job_name=step_name,
            image_uri=image_uri,
            kernel_name=kernel_name,
            display_name=step_name,
            role=role,
            s3_root_uri=notebook_artifacts,
            input_notebook=input_notebook_name,
            instance_type=instance_type,
            parameters=nb_job_params,
            max_runtime_in_seconds=7200,  # 2 hours
            max_retry_attempts=2,
        )
        pipeline_steps.append(nb_step)

    return pipeline_steps

## Configuration

In [23]:
# Pipeline settings
pipeline_name = "fm-brand-pipeline"
train_notebook = "fm_train.ipynb"
bucket = "fm-gambling-recommender-dev-376337229415"
region = "us-east-1"
image_uri = f"arn:aws:sagemaker:{region}:885854791233:image/sagemaker-distribution-cpu"
notebook_artifacts = f"s3://{bucket}/fm-training"
instance_type = "ml.m5.xlarge"

# MLflow setup
os.environ["MLFLOW_TRACKING_URI"] = "arn:aws:sagemaker:us-east-1:376337229415:mlflow-tracking-server/fm-gambling-recommender-dev-mlflow"

In [None]:
# Brand-specific training configurations
# Data size is static (500 users, 50 games), only training params vary per brand


# mlflow server details

os.environ["MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING"] = "true"
os.environ["MLFLOW_TRACKING_URI"] = "arn:aws:sagemaker:us-east-1:376337229415:mlflow-app/app-JZITH5VWKAWZ"



## Define Pipeline Steps

In [24]:
params = {
    "max_trials": max_trials,
    "experiment_name": experiment_name,
    "early_stopping": early_stopping,
}


config_dict = {
    "small": {
        "n_users": "1000",
        "n_games": "50",
        "n_days": "90",
    },
    "luckyspin": {
        "max_trials": 8,
        "early_stopping": 3,
        "num_factors_min": 8,
        "num_factors_max": 32,
        "epochs_min": 5,
        "epochs_max": 20,
        "experiment_name": "fm_luckyspin",
    },
    "royalbet": {
        "max_trials": 12,
        "early_stopping": 4,
        "num_factors_min": 32,
        "num_factors_max": 128,
        "epochs_min": 15,
        "epochs_max": 40,
        "experiment_name": "fm_royalbet",
    },
}


pipeline_steps = define_steps_for_pipeline(
    config_dict,
    image_uri,
    notebook_artifacts,
    train_notebook,
    kernel_name,
    instance_type,
    **params
)

pipeline_steps

[<sagemaker.workflow.notebook_job_step.NotebookJobStep at 0x7fc158394bc0>,
 <sagemaker.workflow.notebook_job_step.NotebookJobStep at 0x7fc151ae3b60>,
 <sagemaker.workflow.notebook_job_step.NotebookJobStep at 0x7fc151ae29c0>]

## Local Mode Execution (Testing)

Use local mode to test the pipeline before running on SageMaker.

In [20]:

session = LocalPipelineSession()
pipeline = Pipeline(name=pipeline_name, steps=pipeline_steps, sagemaker_session=session)
role = sagemaker.get_execution_role()
pipeline.create(role)
execution = pipeline.start()

INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.local.entities:Starting execution for pipeline fm-optuna-pipeline. Execution ID is 74b37890-6f17-4dd7-bcd6-aa53f1e5ca70
INFO:sagemaker.local.entities:Starting pipeline step: 'fm-train-large'
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#

## SageMaker Pipeline Execution

In [None]:
session = PipelineSession()
role = sagemaker.get_execution_role()

pipeline = Pipeline(
    name=pipeline_name,
    steps=pipeline_steps,
    sagemaker_session=session,
)

pipeline.upsert(role_arn=role)
execution = pipeline.start()

print(f"Pipeline: {pipeline_name}")
print(f"Execution: {execution.arn}")

In [None]:
# Monitor execution
execution.describe()

In [None]:
# Wait for completion (optional)
# execution.wait()