In [12]:
import boto3
import json
import numpy as np
import os
from PIL import Image
import sagemaker
import shutil
import tarfile
import torch
from torchvision import models, transforms
import urllib.request
from urllib.error import HTTPError
import zipfile

sm_boto3 = boto3.client("sagemaker")
sess = sagemaker.Session()
region = sess.boto_session.region_name
bucket = "aipi510-ml-deployment-bucket"
print("Using Bucket: " + bucket)

Using Bucket: aipi510-ml-deployment-bucket


### Download The Dataset

In [5]:
# Github URL where the dataset is stored for this tutorial
base_url = "https://raw.githubusercontent.com/phlippe/saved_models/main/tutorial10/"

# Create paths if they don't exist yet
DATASET_PATH = "./data"

os.makedirs(DATASET_PATH, exist_ok=True)

# For each file, check whether it already exists. If not, try downloading it.
file_name = "TinyImageNet.zip"
file_path = os.path.join(DATASET_PATH, file_name)
if not os.path.isfile(file_path):
    file_url = base_url + file_name
    print(f"Downloading {file_url}...")
    try:
        urllib.request.urlretrieve(file_url, file_path)
    except HTTPError as e:
        print("Something went wrong. Please try to download the file from the GDrive folder, or contact the author with the full output including the following error:\n", e)
    if file_name.endswith(".zip"):
        print("Unzipping file...")
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            zip_ref.extractall(file_path.rsplit("/",1)[0])
            print("Unzip complete")

Downloading https://raw.githubusercontent.com/phlippe/saved_models/main/tutorial10/TinyImageNet.zip...
Unzipping file...
Unzip complete


### Load the Dataset and Label Names

In [6]:
# Load dataset and create data loader
imagenet_path = os.path.join(DATASET_PATH, "TinyImageNet/")
assert os.path.isdir(imagenet_path), f"Could not find the ImageNet dataset at expected path \"{imagenet_path}\". " + \
                                     f"Please make sure to have downloaded the ImageNet dataset here, or change the {DATASET_PATH=} variable."

# Load label names to interpret the label numbers 0 to 999
with open(os.path.join(imagenet_path, "label_list.json"), "r") as f:
    label_names = json.load(f)

# get a list of folders in sorted order for retrieving pictures by label
folders = sorted([f for f in os.listdir(imagenet_path) if os.path.isdir(os.path.join(imagenet_path, f))])

### Load the Pretrained Model

In [None]:
# Load the ResNet-50 model pre-trained on ImageNet
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
model.eval()

# Define file paths
pth_path = './resnet50.pth'
tar_path = './model.tar.gz'

# Save as .pth if it doesn't exist
if not os.path.exists(pth_path):
    print(f"Saving model to {pth_path}")
    torch.save(model.state_dict(), pth_path)
    print("Model saved as .pth file")

# Create .tar.gz if it doesn't exist
if not os.path.exists(tar_path):
    print(f"Creating {tar_path}")
    # Create a temporary directory for the tar file
    temp_dir = './temp'
    os.makedirs(temp_dir, exist_ok=True)
    
    # Copy the .pth file to temp directory
    temp_pth = os.path.join(temp_dir, 'resnet50.pth')
    shutil.copy2(pth_path, temp_pth)
    
    # Create the tar.gz file
    with tarfile.open(tar_path, "w:gz") as tar:
        tar.add(temp_pth, arcname='resnet50.pth')
    
    # Clean up temporary directory
    shutil.rmtree(temp_dir)
    print("Model saved as .tar.gz file")

### Create The Transform

In [8]:
img_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

### Get All Images For a Specific Label

In [9]:
def get_images(label_name):
    '''
    gets a list of images in RGB format by label name from the TinyImageNet dataset

    Inputs:
        label_name (str): the label for which to retrieve the images

    Return:
        images (list): a list of the images retrieved
    '''
    #get the index of the label from label_list.json
    index = label_names.index(label_name)

    #get the corresponding folder of images from TinyImageNet
    folder = imagenet_path + folders[index] + '/'

    #get the images from the selected folder
    image_names = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]

    images = []
    for image_name in image_names:
        #open the image
        with open(os.path.relpath(folder + image_name), 'rb') as f:
            with Image.open(f) as img:
                #convert the image to RGB and add it to the output list
                images.append(img.convert('RGB'))

    return images

### Predict on an Image

In [10]:
#get the images by label
images_rgb = get_images('wallaby')
img = images_rgb[0]
img_tensor = img_transform(img).unsqueeze(0)
with torch.no_grad():
    output = model(img_tensor)

probabilities = torch.nn.functional.softmax(output[0], dim=0)
_, predicted_class = probabilities.max(0)
print(label_names[predicted_class.item()])

wallaby


### Send Data to S3

In [None]:
sm_data_prefix = "sagemaker/data"
data = sess.upload_data(
    path="./data",
    bucket=bucket,
    key_prefix=sm_data_prefix
)

sm_model_prefix = "sagemaker/model"
data = sess.upload_data(
    path="model.tar.gz",
    bucket=bucket,
    key_prefix=sm_model_prefix
)

data = sess.upload_data(
    path="resnet50.pth",
    bucket=bucket,
    key_prefix=sm_model_prefix
)

### Write The Script.py File to Deploy To SageMaker

In [None]:
%%writefile script.py

import json
import os
import torch
from PIL import Image
from torchvision import models, transforms

def model_fn(model_dir):
    # Load model once and set to evaluation mode
    model = models.resnet50()
    model.load_state_dict(torch.load(os.path.join(model_dir, 'model.pth')))
    model.eval()
    
    # Load label names only once
    with open(os.path.join(model_dir, "label_list.json"), "r") as f:
        label_names = json.load(f)

    folders = sorted([f for f in os.listdir(imagenet_path) if os.path.isdir(os.path.join(imagenet_path, f))])
    
    # Return both model and label names
    return model, folders, label_names

def get_images(imagenet_path, folders, label_name):
    '''
    gets a list of images in RGB format by label name from the TinyImageNet dataset

    Inputs:
        label_name (str): the label for which to retrieve the images

    Return:
        images (list): a list of the images retrieved
    '''
    #get the index of the label from label_list.json
    index = label_names.index(label_name)

    #get the corresponding folder of images from TinyImageNet
    folder = imagenet_path + folders[index] + '/'

    #get the images from the selected folder
    image_names = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]

    images = []
    for image_name in image_names:
        #open the image
        with open(os.path.relpath(folder + image_name), 'rb') as f:
            with Image.open(f) as img:
                #convert the image to RGB and add it to the output list
                images.append(img.convert('RGB'))

    return images

def get_transform():
    return transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def predict_fn(input_data, model, label_names):
    with torch.no_grad():
        output = model(input_data)
        probabilities = torch.nn.functional.softmax(output[0], dim=0)
        _, predicted_class = probabilities.max(0)
        
        # Return the label corresponding to the predicted class
        return label_names[predicted_class.item()]

if __name__ == "__main__":
    # Load label names from a json file
    DATASET_PATH = "./data"
    imagenet_path = os.path.join(DATASET_PATH, "TinyImageNet/")
    
    # Load the model
    model, folders, label_names = model_fn("./resnet50.pth")

    # get image and transform it
    img_transform = get_transform()
    images_rgb = get_images(imagenet_path=imagenet_path, folders=folders, label_name='wallaby')
    img = images_rgb[0]
    input_data = img_transform(img).unsqueeze(0)

    # Run prediction
    predicted_label = predict_fn(input_data, model, label_names)
    print("Predicted label:", predicted_label)


Writing script.py


In [33]:
from sagemaker.pytorch import PyTorchModel

model_artifact = "s3://aipi510-ml-deployment-bucket/sagemaker/model/model.tar.gz"
role = "arn:aws:iam::567126052638:role/service-role/AmazonSageMaker-ExecutionRole-20241001T175836"

# Create the SageMaker PyTorchModel
pytorch_model = PyTorchModel(
    model_data=model_artifact,
    role=role,
    entry_point="script.py",  # Your inference script
    framework_version="1.9",  # Update based on your PyTorch version
    py_version="py38"
)

In [34]:
# Deploy the model
predictor = pytorch_model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large'
)

-------!

In [35]:
images_rgb = get_images('wallaby')
img = images_rgb[0]
input_data = img_transform(img).unsqueeze(0)

# Preprocess your image data as needed
prediction = predictor.predict(input_data)
print(prediction)

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (0) from primary with message "Your invocation timed out while waiting for a response from container primary. Review the latency metrics for each container in Amazon CloudWatch, resolve the issue, and try again.". See https://us-east-2.console.aws.amazon.com/cloudwatch/home?region=us-east-2#logEventViewer:group=/aws/sagemaker/Endpoints/pytorch-inference-2024-11-01-19-38-07-465 in account 567126052638 for more information.

In [36]:
predictor.delete_endpoint()