# 03 â€” PyTorch Experimentation
Local experiments using PyTorch.

In [None]:
import sagemaker
import boto3
import pandas as pd
import numpy as np

# --- 1. SageMaker Session & Role ---
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = sagemaker_session.default_bucket()
region = sagemaker_session.boto_region_name
prefix = 'titanic-ml' # Project prefix for S3 organization

print(f"SageMaker Role ARN: {role}")
print(f"S3 Bucket: {bucket}")
print(f"S3 Prefix: {prefix}")

In [None]:
# --- Configuration (Run after the Prerequisites Setup block) ---
from sagemaker.pytorch import PyTorch
from sagemaker.inputs import TrainingInput

# The S3 path where your processed data was saved in the previous notebook
INPUT_S3_URI = f"s3://{bucket}/{prefix}/processed"

# --- 1. Configure PyTorch Estimator ---
pytorch_estimator = PyTorch(
    entry_point='model_pytorch.py',
    source_dir='src', # Directory containing the script
    role=role,
    framework_version='1.13', # Specify PyTorch version
    py_version='py39',
    instance_count=1,
    instance_type='ml.m5.large', # Use a suitable instance type (e.g., ml.g4dn.xlarge for GPU)
    hyperparameters={
        'epochs': 20,
        'batch-size': 64,
        'learning-rate': 1e-4
    },
    base_job_name='sagemaker-titanic-pytorch'
)

# --- 2. Prepare Data Input Channel ---
inputs = {
    'train': TrainingInput(
        s3_data=INPUT_S3_URI, 
        distribution='FullyReplicated', 
        content_type='text/csv', 
        s3_data_type='S3Prefix'
    )
}

# --- 3. Launch Training Job ---
print("Launching PyTorch training job in SageMaker...")
pytorch_estimator.fit(inputs, wait=False) # Use wait=True for synchronous execution
print(f"Training job launched: {pytorch_estimator.latest_training_job.job_name}")