In [2]:
import numpy as np
import pandas as pd
import requests
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import sagemaker
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [4]:
import boto3
import io
import pandas as pd

s3 = boto3.resource('s3')
# bucket_name = 'sagemaker-us-west-2-529165531209'
bucket_name = 'sagemaker-us-east-1-529165531209'
# file_name = 'sagemaker/bank-additional/bank-additional-full.csv'

bucket= sagemaker_session.default_bucket()
print("Default Bucket: ",bucket)

region = sagemaker_session.boto_region_name
print("AWS Region: ",region)


print("RoleArn: ",role)

Default Bucket:  sagemaker-us-east-1-529165531209
AWS Region:  us-east-1
RoleArn:  arn:aws:iam::529165531209:role/SagemakerFullAccess


In [5]:
# Use the image_uris function to retrieve the latest 'image-classification' image 
image_classification_model_image = sagemaker.image_uris.retrieve('image-classification',region,version='latest')
s3_path = f"s3://{bucket}/isic_classification_model"
image_classification_model_image

Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: latest.


'811284229777.dkr.ecr.us-east-1.amazonaws.com/image-classification:1'

In [6]:
image_classification_model = sagemaker.estimator.Estimator(
    image_uri=image_classification_model_image,
    instance_count=1,
    instance_type="ml.g4dn.xlarge",
    volume_size=50,
    max_run=360000,
    input_mode="File",
    role=role,
    output_path=s3_path,
    sagemaker_session=sagemaker_session
)

In [7]:
image_classification_model.set_hyperparameters(
    image_shape='3,224,224' ,
    num_classes=8, 
    num_layers=18,
    num_training_samples=1336,
    use_pretrained_model=1,
    multi_label=1,
    learning_rate=0.001,
    epochs=5
)

In [14]:
from sagemaker.debugger import Rule, rule_configs
from sagemaker.session import TrainingInput
model_inputs = {
        "train": sagemaker.inputs.TrainingInput(
            s3_data=f"s3://{bucket}/isic/train/",
            content_type="application/x-image"
        ),
        "validation": sagemaker.inputs.TrainingInput(
            s3_data=f"s3://{bucket}/isic/validation/",
            content_type="application/x-image"
        ),
        "train_lst": sagemaker.inputs.TrainingInput(
            s3_data=f"s3://{bucket}/isic/train.lst",
            content_type="application/x-image"
        ),
        "validation_lst": sagemaker.inputs.TrainingInput(
            s3_data=f"s3://{bucket}/isic/validation.lst",
            content_type="application/x-image"
        )
}

In [15]:
image_classification_model.fit(model_inputs)

INFO:sagemaker:Creating training-job with name: image-classification-2024-03-18-02-03-12-104


2024-03-18 02:03:12 Starting - Starting the training job...
2024-03-18 02:03:27 Starting - Preparing the instances for training...
2024-03-18 02:03:59 Downloading - Downloading input data...
2024-03-18 02:04:24 Downloading - Downloading the training image..................
2024-03-18 02:07:34 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34mNvidia gpu devices, drivers and cuda toolkit versions (only available on hosts with GPU):[0m
[34mMon Mar 18 02:07:45 2024       [0m
[34m+---------------------------------------------------------------------------------------+[0m
[34m| NVIDIA-SMI 535.104.12             Driver Version: 535.104.12   CUDA Version: 12.2     |[0m
[34m|-----------------------------------------+----------------------+----------------------+[0m
[34m| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Unco

In [16]:
from sagemaker.serializers import IdentitySerializer

deployment = image_classification_model.deploy(
    initial_instance_count=1, instance_type='ml.m5.xlarge',
    serializer=IdentitySerializer(content_type="application/x-image")
    )

endpoint = deployment.endpoint_name
print(endpoint)

INFO:sagemaker:Creating model with name: image-classification-2024-03-18-02-09-11-583
INFO:sagemaker:Creating endpoint-config with name image-classification-2024-03-18-02-09-11-583
INFO:sagemaker:Creating endpoint with name image-classification-2024-03-18-02-09-11-583


--------!image-classification-2024-03-18-02-09-11-583


The below function randomly selects an object from test folder and stores it locally in a tmp folder for making predictions.

In [17]:
import os
import boto3
import random

# Create the 'tmp' folder if it doesn't exist
if not os.path.exists('tmp'):
    os.makedirs('tmp')

# Define the S3 bucket name
bucket = 'sagemaker-us-east-1-529165531209'  # Replace 'your_bucket_name_here' with your actual bucket name

# Initialize S3 client
s3_client = boto3.client('s3')

# List objects in the bucket with the specified prefix
objects = s3_client.list_objects(Bucket=bucket, Prefix="isic/test")

# Check if any objects are found
if 'Contents' in objects:
    # Select a random object
    obj = random.choice(objects['Contents'])
    
    # Extract the key (filename) of the selected object
    obj_key = obj['Key']
    print('File selected:', obj_key)

    # Download the selected file to the 'tmp' folder
    local_file_path = os.path.join('tmp', os.path.basename(obj_key))
    s3_client.download_file(bucket, obj_key, local_file_path)
    print(f"File downloaded to: {local_file_path}")
else:
    print("No objects found in the specified prefix.")


File selected: isic/test/ISIC_0031442.jpg
File downloaded to: tmp/ISIC_0031442.jpg


In [18]:
from sagemaker.serializers import IdentitySerializer
import boto3
import random
import os

s3_resource = boto3.resource('s3')
objects = s3_resource.Bucket(bucket).objects.filter(Prefix="isic/test")
obj = random.choice([x.key for x in objects])
print('File selected ',obj)

s3_client = boto3.client('s3')

s3_client.download_file(bucket, obj, 'tmp/ISIC_0031442.jpg')
    

File selected  isic/test/ISIC_0031479.jpg


Create the predictor object for making predictions. You should pass the name of the endpoint and the session variable as parameters. IdentitySerializer helps in serializing the input for the inference endpoint and here we will specify the file type. We would have already specified the serializer in the deployment function. Both ways are acceptable.

The predictor will return all class probabilites as the output. Class label for the input image corresponds to the index of the highest probability value in the list.

In [19]:
predictor = sagemaker.predictor.Predictor(endpoint_name=endpoint,
                                         sagemaker_session=sagemaker_session)

predictor.serializer = IdentitySerializer("image/png")

with open("tmp/ISIC_0031442.jpg", "rb") as f:
    image = f.read()

    
result = predictor.predict(image)
print(result.decode('utf-8'))

[0.011411451734602451, 0.0200594961643219, 0.00980268232524395, 0.0987604483962059, 0.7711982131004333, 0.04661271348595619, 0.03543773666024208, 0.008181829005479813]


There is an alternate way for getting predictions and this method would be handy while deploying the model in lambda function. We can make predictions by the invoke_endpoint method of the runtime object.

In [21]:
import json
runtime= boto3.client('runtime.sagemaker')

response = runtime.invoke_endpoint(EndpointName=endpoint,
                                       ContentType='image/png',
                                       Body=image)

result = json.loads(response['Body'].read().decode())
    
print(result)

[0.011411451734602451, 0.0200594961643219, 0.00980268232524395, 0.0987604483962059, 0.7711982131004333, 0.04661271348595619, 0.03543773666024208, 0.008181829005479813]


In [23]:
classes = ['actinic keratoses','basal cell carcinoma','dermatofibroma','keratosis','melanoma','nevus',
               'squamous cell carcinoma','vascular skin']
for i, val in enumerate(classes):
    print(classes[i], round(result[i]*100,2), end="% ")

actinic keratoses 1.14% basal cell carcinoma 2.01% dermatofibroma 0.98% keratosis 9.88% melanoma 77.12% nevus 4.66% squamous cell carcinoma 3.54% vascular skin 0.82% 

# delete endpoint

In [25]:
sagemaker.Session().delete_endpoint(predictor.endpoint_name)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


INFO:sagemaker:Deleting endpoint with name: image-classification-2024-03-18-02-09-11-583
