# Deploy Hugging Face Model to AWS SageMaker

This notebook demonstrates how to deploy a Hugging Face model to AWS SageMaker.

In [None]:
# Install required packages
!pip install sagemaker transformers torch boto3

In [None]:
# Import libraries
import os
import tempfile
import shutil
import tarfile
import sagemaker
import boto3
from sagemaker import get_execution_role
from sagemaker.huggingface import HuggingFaceModel
from transformers import AutoTokenizer, AutoModel

In [None]:
# Configuration
MODEL_ID = "microsoft/DialoGPT-medium"  # Change to your desired model
INSTANCE_TYPE = "ml.m5.large"  # Adjust based on your needs
TRANSFORMERS_VERSION = "4.26.0"
PYTORCH_VERSION = "1.13.1"
PY_VERSION = "py39"

print(f"Model ID: {MODEL_ID}")
print(f"Instance Type: {INSTANCE_TYPE}")

In [None]:
# Get SageMaker role and session
try:
    role = get_execution_role()
    print(f"SageMaker execution role: {role}")
except Exception as e:
    print(f"Error getting execution role: {e}")
    print("Please set SAGEMAKER_ROLE environment variable or ensure you're in a SageMaker instance")
    # If not in SageMaker, use environment variable
    role = os.environ.get('SAGEMAKER_ROLE')
    if role:
        print(f"Using role from environment: {role}")
    else:
        raise ValueError("Please set SAGEMAKER_ROLE environment variable")

sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
region = sagemaker_session.boto_region_name

print(f"AWS Region: {region}")
print(f"Default S3 bucket: {bucket}")

In [None]:
# Create inference script
inference_code = '''
import json
import torch
from transformers import AutoTokenizer, AutoModel

def model_fn(model_dir):
    \"\"\"Load the model for inference\"\"\"
    model = AutoModel.from_pretrained(model_dir)
    tokenizer = AutoTokenizer.from_pretrained(model_dir, padding_side='left')
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    return {"model": model, "tokenizer": tokenizer}

def input_fn(request_body, request_content_type):
    \"\"\"Parse input data\"\"\"
    if request_content_type == 'application/json':
        input_data = json.loads(request_body)
        return input_data.get('inputs', input_data)
    return request_body

def predict_fn(input_data, model_artifacts):
    \"\"\"Make predictions\"\"\"
    model = model_artifacts["model"]
    tokenizer = model_artifacts["tokenizer"]
    
    if isinstance(input_data, str):
        input_data = [input_data]
    
    inputs = tokenizer(input_data, return_tensors="pt", padding=True, truncation=True, max_length=512)
    
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = outputs.last_hidden_state.mean(dim=1).tolist()
    
    return predictions

def output_fn(prediction, content_type):
    \"\"\"Format output\"\"\"
    return json.dumps({"predictions": prediction})
'''

print("Inference script created!")

In [None]:
# Load and prepare model
print("Loading model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, padding_side='left')
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model = AutoModel.from_pretrained(MODEL_ID)
print("Model loaded successfully!")

In [None]:
# Package model for SageMaker
temp_dir = tempfile.mkdtemp()
model_dir = os.path.join(temp_dir, "model")
os.makedirs(model_dir, exist_ok=True)

print("Saving model artifacts...")
tokenizer.save_pretrained(model_dir)
model.save_pretrained(model_dir)

# Save inference script
with open(os.path.join(model_dir, "inference.py"), 'w') as f:
    f.write(inference_code)

print("Model artifacts prepared!")

In [None]:
# Create tar.gz archive and upload to S3
tar_path = os.path.join(temp_dir, "model.tar.gz")

print("Creating model archive...")
with tarfile.open(tar_path, "w:gz") as tar:
    tar.add(model_dir, arcname=".")

print("Uploading model to S3...")
model_s3_key = f"model-artifacts/{MODEL_ID.replace('/', '-')}/model.tar.gz"
sagemaker_session.upload_data(
    path=tar_path,
    bucket=bucket,
    key_prefix=f"model-artifacts/{MODEL_ID.replace('/', '-')}"
)

model_s3_path = f"s3://{bucket}/{model_s3_key}"
print(f"Model uploaded to: {model_s3_path}")

# Cleanup temporary files
shutil.rmtree(temp_dir, ignore_errors=True)

In [None]:
# Create and deploy HuggingFace model
print("Creating SageMaker model...")
huggingface_model = HuggingFaceModel(
    model_data=model_s3_path,
    role=role,
    transformers_version=TRANSFORMERS_VERSION,
    pytorch_version=PYTORCH_VERSION,
    py_version=PY_VERSION,
    entry_point="inference.py"
)

print(f"Deploying to SageMaker endpoint (instance: {INSTANCE_TYPE})...")
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type=INSTANCE_TYPE
)

print(f"\n✅ Deployment successful!")
print(f"Endpoint name: {predictor.endpoint_name}")

In [None]:
# Test the endpoint
test_input = "Hello, how are you?"
print(f"Testing endpoint with input: '{test_input}'")

response = predictor.predict({"inputs": test_input})
print(f"Response: {response}")

print("\n✅ Model deployed and tested successfully!")

In [None]:
# Cleanup function
def cleanup_endpoint(endpoint_name=None):
    """Delete SageMaker endpoint"""
    if endpoint_name is None:
        endpoint_name = predictor.endpoint_name
    
    sagemaker_client = boto3.client('sagemaker')
    print(f"Deleting endpoint: {endpoint_name}")
    sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
    print(f"Endpoint {endpoint_name} deletion initiated.")

# Uncomment the line below to delete the endpoint
# cleanup_endpoint()

print("\n⚠️  Remember to delete the endpoint when done to avoid charges:")
print(f"cleanup_endpoint('{predictor.endpoint_name}')")