In [None]:
# --- Configuration (Run after the standard SageMaker Setup block) ---
import sagemaker
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput
import os

# --- 1. Define ECR Repository and Image Tag ---
account_id = sagemaker.Session().account_id()
ecr_repo_name = 'sagemaker-byoc-titanic-sklearn'
tag = ':latest'
# ECR URI format: <account_id>.dkr.ecr.<region>.amazonaws.com/<repo_name>:<tag>
full_ecr_uri = f"{account_id}.dkr.ecr.{region}.amazonaws.com/{ecr_repo_name}{tag}"

# --- 2. Build and Push Docker Image (Requires local Docker on your Mac mini) ---
# NOTE: This step is run in the SageMaker Studio Terminal or your local Mac terminal.
# If running locally, ensure you have AWS CLI configured and Docker running.

print("--- Step 2a: Log into ECR ---")
# Get the docker login command for ECR
!aws ecr get-login-password --region {region} | docker login --username AWS --password-stdin {account_id}.dkr.ecr.{region}.amazonaws.com

print("\n--- Step 2b: Build the image ---")
# Ensure the Dockerfile is built from the root of the repo
!docker build -t {ecr_repo_name} container/

print("\n--- Step 2c: Tag the image ---")
!docker tag {ecr_repo_name} {full_ecr_uri}

print("\n--- Step 2d: Push to ECR ---")
!docker push {full_ecr_uri}

# --- 3. Configure and Launch Training Job (Using the custom image URI) ---

# The S3 path where your processed data was saved
INPUT_S3_URI = f"s3://{bucket}/{prefix}/processed"

# The Custom Image is used here!
custom_estimator = Estimator(
    image_uri=full_ecr_uri, # THIS IS THE BYOC IMAGE
    role=role,
    instance_count=1,
    instance_type='ml.c5.xlarge',
    hyperparameters={
        'n_estimators': 250,
        'max_depth': 7
    },
    base_job_name='byoc-titanic-sklearn'
)

# Prepare Data Input Channel
inputs = {
    'train': TrainingInput(
        s3_data=INPUT_S3_URI, 
        distribution='FullyReplicated', 
        content_type='text/csv', 
        s3_data_type='S3Prefix'
    )
}

# Launch Training Job
print("\nLaunching BYOC training job in SageMaker...")
custom_estimator.fit(inputs, wait=False)
print(f"Training job launched: {custom_estimator.latest_training_job.job_name}")