In [1]:
# Create new notebook: notebooks/02_sagemaker_deployment.ipynb
# Cell 1: Upload yesterday's model to S3

import boto3
import os
from datetime import datetime
import joblib
import json

# Initialize S3 client
s3 = boto3.client('s3')

# Create S3 bucket for SageMaker (if you didn't yesterday)
bucket_name = f"ml-learning-sagemaker-{datetime.now().strftime('%Y%m%d')}"
region = 'ap-southeast-2'

try:
    # Create bucket
    s3.create_bucket(
        Bucket=bucket_name,
        CreateBucketConfiguration={'LocationConstraint': region}
    )
    print(f"✅ Created S3 bucket: {bucket_name}")
except Exception as e:
    if 'BucketAlreadyExists' in str(e):
        print(f"📁 Bucket {bucket_name} already exists")
    else:
        print(f"⚠️ Error creating bucket: {e}")

# Find your saved model from yesterday
model_files = [f for f in os.listdir('../models') if f.endswith('.pkl')]
if not model_files:
    print("❌ No model files found. Please complete Day 1 first.")
else:
    latest_model = max(model_files, key=lambda f: os.path.getctime(f'../models/{f}'))
    model_path = f'../models/{latest_model}'
    
    # Upload model to S3
    s3_model_key = f'models/{latest_model}'
    s3.upload_file(model_path, bucket_name, s3_model_key)
    
    # Upload metadata too
    metadata_file = latest_model.replace('.pkl', '_metadata.json')
    metadata_path = f'../models/{metadata_file}'
    if os.path.exists(metadata_path):
        s3_metadata_key = f'models/{metadata_file}'
        s3.upload_file(metadata_path, bucket_name, s3_metadata_key)
    
    print(f"✅ Uploaded model to s3://{bucket_name}/{s3_model_key}")
    
    # Save these for later use
    with open('../config.json', 'w') as f:
        json.dump({
            'bucket_name': bucket_name,
            'model_s3_key': s3_model_key,
            'region': region
        }, f)

✅ Created S3 bucket: ml-learning-sagemaker-20250922
✅ Uploaded model to s3://ml-learning-sagemaker-20250922/models/iris_classifier_20250922_141057.pkl


In [10]:
# Cell 2: Set up SageMaker IAM role

import boto3
import json

iam = boto3.client('iam')
role_name = 'SageMakerExecutionRole-MLLearning'

# Define the trust policy for SageMaker
trust_policy = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "sagemaker.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}

# Define permissions policy
permissions_policy = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:GetObject",
                "s3:PutObject",
                "s3:DeleteObject",
                "s3:ListBucket"
            ],
            "Resource": [
                f"arn:aws:s3:::{bucket_name}",
                f"arn:aws:s3:::{bucket_name}/*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "logs:CreateLogGroup",
                "logs:CreateLogStream",
                "logs:PutLogEvents",
                "cloudwatch:PutMetricData"
            ],
            "Resource": "*"
        }
    ]
}

try:
    # Create the role
    role_response = iam.create_role(
        RoleName=role_name,
        AssumeRolePolicyDocument=json.dumps(trust_policy),
        Description='SageMaker execution role for ML learning'
    )
    role_arn = role_response['Role']['Arn']
    print(f"✅ Created role: {role_arn}")
    
    # Create and attach policy
    policy_response = iam.create_policy(
        PolicyName='SageMakerMLLearningPolicy',
        PolicyDocument=json.dumps(permissions_policy)
    )
    
    iam.attach_role_policy(
        RoleName=role_name,
        PolicyArn=policy_response['Policy']['Arn']
    )
    print("✅ Attached permissions policy")
    
except Exception as e:
    if 'EntityAlreadyExists' in str(e):
        # Role already exists, get its ARN
        role_response = iam.get_role(RoleName=role_name)
        role_arn = role_response['Role']['Arn']
        print(f"📁 Using existing role: {role_arn}")
    else:
        print(f"⚠️ Error creating role: {e}")
        # Fallback to using SageMaker's default service role
        role_arn = f"arn:aws:iam::{boto3.client('sts').get_caller_identity()['Account']}:role/service-role/AmazonSageMaker-ExecutionRole-*"

# Save role ARN for later
config = json.load(open('../config.json'))
config['role_arn'] = role_arn
with open('../config.json', 'w') as f:
    json.dump(config, f)
    
print(f"🎯 SageMaker setup complete!")
print(f"   Bucket: {bucket_name}")
print(f"   Role: {role_arn}")

📁 Using existing role: arn:aws:iam::409633134924:role/SageMakerExecutionRole-MLLearning
🎯 SageMaker setup complete!
   Bucket: ml-learning-sagemaker-20250922
   Role: arn:aws:iam::409633134924:role/SageMakerExecutionRole-MLLearning


In [5]:
# Cell 3: Initialize SageMaker

import sagemaker
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.sklearn.model import SKLearnModel

# Load configuration
config = json.load(open('../config.json'))

# Create SageMaker session
sagemaker_session = sagemaker.Session(default_bucket=config['bucket_name'])

print(f"📋 SageMaker Session Information:")
print(f"   Default bucket: {sagemaker_session.default_bucket()}")
print(f"   Region: {sagemaker_session.boto_region_name}")
print(f"   Role ARN: {config['role_arn']}")

# Test SageMaker access
try:
    # List any existing models
    sm_client = boto3.client('sagemaker')
    models = sm_client.list_models(MaxResults=5)
    print(f"✅ SageMaker access confirmed")
    print(f"   Found {len(models['Models'])} existing models in account")
except Exception as e:
    print(f"⚠️ SageMaker access issue: {e}")

print("\n🚀 Ready to deploy your first model to SageMaker!")

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/jean/Library/Application Support/sagemaker/config.yaml
📋 SageMaker Session Information:
   Default bucket: ml-learning-sagemaker-20250922
   Region: ap-southeast-2
   Role ARN: arn:aws:iam::409633134924:role/SageMakerExecutionRole-MLLearning
✅ SageMaker access confirmed
   Found 0 existing models in account

🚀 Ready to deploy your first model to SageMaker!


In [6]:
# Cell 4: Create model inference script

# SageMaker requires specific entry point scripts
# Create the script that SageMaker will use to load and run your model

inference_script = '''
import joblib
import json
import numpy as np
import pandas as pd
from io import StringIO

def model_fn(model_dir):
    """Load the model from the model_dir. This is called once per worker."""
    import os
    model_path = os.path.join(model_dir, 'iris_model.pkl')
    model = joblib.load(model_path)
    return model

def input_fn(request_body, request_content_type):
    """Parse input data for inference."""
    if request_content_type == 'application/json':
        # Parse JSON input
        input_data = json.loads(request_body)
        
        # Handle different input formats
        if isinstance(input_data, list):
            # Direct list of features: [5.1, 3.5, 1.4, 0.2]
            return np.array([input_data])
        elif isinstance(input_data, dict):
            if 'instances' in input_data:
                # Batch format: {"instances": [[5.1, 3.5, 1.4, 0.2], [...]]}
                return np.array(input_data['instances'])
            else:
                # Named features: {"sepal_length": 5.1, "sepal_width": 3.5, ...}
                features = [
                    input_data.get('sepal_length', 0),
                    input_data.get('sepal_width', 0),
                    input_data.get('petal_length', 0),
                    input_data.get('petal_width', 0)
                ]
                return np.array([features])
    
    elif request_content_type == 'text/csv':
        # Parse CSV input
        df = pd.read_csv(StringIO(request_body), header=None)
        return df.values
    
    else:
        raise ValueError(f"Unsupported content type: {request_content_type}")

def predict_fn(input_data, model):
    """Make prediction using the loaded model."""
    predictions = model.predict(input_data)
    probabilities = model.predict_proba(input_data)
    
    # Return both prediction and confidence
    results = []
    class_names = ['setosa', 'versicolor', 'virginica']
    
    for i, pred in enumerate(predictions):
        results.append({
            'predicted_class': class_names[pred],
            'predicted_class_id': int(pred),
            'confidence': float(max(probabilities[i])),
            'probabilities': {
                class_names[j]: float(probabilities[i][j]) 
                for j in range(len(class_names))
            }
        })
    
    return results

def output_fn(prediction, content_type):
    """Format the prediction output."""
    if content_type == 'application/json':
        return json.dumps(prediction), content_type
    else:
        raise ValueError(f"Unsupported content type: {content_type}")
'''

# Save the inference script
os.makedirs('../sagemaker_code', exist_ok=True)
with open('../sagemaker_code/inference.py', 'w') as f:
    f.write(inference_script)

print("✅ Created SageMaker inference script")
print("📄 Script includes:")
print("   - model_fn: Loads your trained model")
print("   - input_fn: Parses incoming requests")
print("   - predict_fn: Makes predictions")
print("   - output_fn: Formats responses")

✅ Created SageMaker inference script
📄 Script includes:
   - model_fn: Loads your trained model
   - input_fn: Parses incoming requests
   - predict_fn: Makes predictions
   - output_fn: Formats responses


In [7]:
# Cell to create iris_processed.csv
from sklearn.datasets import load_iris
import pandas as pd
import os

print("📊 Creating iris dataset...")

# Load iris dataset from sklearn
iris = load_iris()

# Create DataFrame with features
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

# Rename columns to simpler names
df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

# Add target and species names
df['target'] = iris.target
df['species'] = df['target'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

# Create data directory if it doesn't exist
os.makedirs('data', exist_ok=True)

# Save the processed data
df.to_csv('data/iris_processed.csv', index=False)

print("✅ Created data/iris_processed.csv")
print(f"📁 Shape: {df.shape}")
print(f"\nFirst few rows:")
print(df.head())
print(f"\nTarget distribution:")
print(df['target'].value_counts().sort_index())

📊 Creating iris dataset...
✅ Created data/iris_processed.csv
📁 Shape: (150, 6)

First few rows:
   sepal_length  sepal_width  petal_length  petal_width  target species
0           5.1          3.5           1.4          0.2       0  setosa
1           4.9          3.0           1.4          0.2       0  setosa
2           4.7          3.2           1.3          0.2       0  setosa
3           4.6          3.1           1.5          0.2       0  setosa
4           5.0          3.6           1.4          0.2       0  setosa

Target distribution:
target
0    50
1    50
2    50
Name: count, dtype: int64


In [None]:
# Combined training and deployment in SageMaker
from sagemaker.sklearn.estimator import SKLearn
from datetime import datetime
import json
import numpy as np
import pandas as pd
import boto3
import os

# Load configuration
config = json.load(open('../config.json'))

print("🚀 Training and deploying directly in SageMaker...")

# Prepare training data in S3
train_path = f"s3://{config['bucket_name']}/data/train/iris.csv"

# Upload your training data if not already there
df = pd.read_csv('../data/iris_processed.csv')
df.to_csv('/tmp/iris.csv', index=False)
s3 = boto3.client('s3')
s3.upload_file('/tmp/iris.csv', config['bucket_name'], 'data/train/iris.csv')

# Create training script
os.makedirs('../source', exist_ok=True)
with open('../source/train.py', 'w') as f:
    f.write('''
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import os

if __name__ == '__main__':
    # Read data
    input_path = '/opt/ml/input/data/train'
    df = pd.read_csv(os.path.join(input_path, 'iris.csv'))
    
    # Prepare features
    X = df.drop(['target'], axis=1, errors='ignore')
    y = df['target']
    
    # Train
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
    model.fit(X_train, y_train)
    
    # Save model
    model_path = '/opt/ml/model'
    joblib.dump(model, os.path.join(model_path, 'model.joblib'))
''')

# Train in SageMaker
sklearn_estimator = SKLearn(
    entry_point='train.py',
    source_dir='../source',
    framework_version='1.2-1',
    instance_type='ml.m5.xlarge',
    role=config['role_arn'],
    sagemaker_session=sagemaker_session
)

sklearn_estimator.fit({'train': train_path})

# Deploy
endpoint_name = f'iris-classifier-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
predictor = sklearn_estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large',
    endpoint_name=endpoint_name
)

print(f"✅ Endpoint deployed: {endpoint_name}")

# Save config
config['endpoint_name'] = endpoint_name
with open('../config.json', 'w') as f:
    json.dump(config, f)

In [None]:
# Cell 7: Test the deployed endpoint
import numpy as np
import joblib

# Test data - different iris flower measurements
test_cases = [
    {
        "name": "Typical Setosa",
        "features": [5.1, 3.5, 1.4, 0.2],
        "expected": "setosa"
    },
    {
        "name": "Typical Versicolor", 
        "features": [6.0, 2.8, 4.5, 1.3],
        "expected": "versicolor"
    },
    {
        "name": "Typical Virginica",
        "features": [7.2, 3.0, 5.8, 2.3],
        "expected": "virginica"
    },
    {
        "name": "Edge case",
        "features": [5.8, 2.7, 4.1, 1.0],
        "expected": "versicolor or virginica"
    }
]

print("🧪 Testing deployed model with various inputs...")
print("=" * 60)

if 'endpoint_name' in config:
    for test_case in test_cases:
        try:
            # Make prediction
            result = predictor.predict(test_case["features"])
            
            print(f"\n🌸 Test: {test_case['name']}")
            print(f"   Input: {test_case['features']}")
            print(f"   Expected: {test_case['expected']}")
            print(f"   Predicted: {result[0]['predicted_class']}")
            print(f"   Confidence: {result[0]['confidence']:.2%}")
            
            # Show all probabilities
            print("   Probabilities:")
            for species, prob in result[0]['probabilities'].items():
                print(f"     {species}: {prob:.2%}")
                
        except Exception as e:
            print(f"❌ Test failed: {e}")
else:
    print("⚠️ No endpoint available. Testing with local model instead...")
    # Fallback to local testing
    # Find the latest model file
    import os
    import glob
    model_files = glob.glob('../models/*.pkl')
    if model_files:
        latest_model = max(model_files, key=os.path.getctime)
        local_model = joblib.load(latest_model)
        class_names = ['setosa', 'versicolor', 'virginica']
        
        for test_case in test_cases:
            features = np.array([test_case['features']])
            prediction = local_model.predict(features)[0]
            probabilities = local_model.predict_proba(features)[0]
            
            print(f"\n🌸 Test: {test_case['name']}")
            print(f"   Input: {test_case['features']}")
            print(f"   Expected: {test_case['expected']}")
            print(f"   Predicted: {class_names[prediction]}")
            print(f"   Confidence: {max(probabilities):.2%}")
    else:
        print("❌ No local model files found either.")

print("\n✅ Model testing completed!")