## Mordenizing ML Workloads with AWS Sagemaker

### Install and import dependencies

In [1]:
!pip install transformers torch scipy --quiet

In [2]:
import time
import os
import torch
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from scipy.special import softmax
import boto3
import sagemaker
from sagemaker.utils import name_from_base

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


### Load Model and tokenizer from pretrained weights

In [4]:
# Build tokenizer and model from pretrained weights
# tokenizer = AutoTokenizer.from_pretrained("model/tokenizer")
# model = AutoModelForSequenceClassification.from_pretrained("model")


### Create a model.tar.gz file to be used by SageMaker endpoint 


In [5]:
# change directory
%cd model

/home/ec2-user/SageMaker/Github/model


In [6]:
# model folder has model.bin & config.json files for the model and a tokenizer folder with tokenizer files
!ls

config.json  pytorch_model.bin	tokenizer


In [7]:
!tar zcvf model.tar.gz *

config.json
pytorch_model.bin
tokenizer/
tokenizer/tokenizer.json
tokenizer/config.json
tokenizer/vocab.json
tokenizer/special_tokens_map.json
tokenizer/tokenizer_config.json
tokenizer/merges.txt
tokenizer/vocab.txt


In [9]:
# model.tar.gz is created with above directory structure
!ls

config.json  model.tar.gz  pytorch_model.bin  tokenizer


In [10]:
# change the directory
%cd ..

/home/ec2-user/SageMaker/Github


### Upload the model to S3

In [11]:
# upload model to S3
role = sagemaker.get_execution_role()
sess=sagemaker.Session()
region=sess.boto_region_name
bucket=sess.default_bucket()
sm_client=boto3.client('sagemaker')

In [12]:
model_key = '{}/model/model.tar.gz'.format('locobuzzmodel')
model_path = 's3://{}/{}'.format(bucket, model_key)
boto3.resource('s3').Bucket(bucket).upload_file('model/model.tar.gz', model_key)
print("Uploaded model to S3:")
print(model_path)

Uploaded model to S3:
s3://sagemaker-ap-south-1-128015641074/locobuzzmodel/model/model.tar.gz


### Define a HuggingFace model and deploy

In [13]:
# define an inference.py file in the code folder
%cat code/inference.py

import os
import json
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import numpy
from scipy.special import softmax

def allot(prediction):
    x = prediction.index(max(prediction))
    if x == 0:
        return "negative"
    elif x == 1:
        return "neutral"
    elif x == 2:
        return "positive"
    else:
        return "unknown"

def model_fn(model_dir):
    """
    Load the model for inference
    """
    model_path = os.path.join(model_dir, 'tokenizer/')

    # Load BERT tokenizer from disk.
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    # Load BERT model from disk.
    model = AutoModelForSequenceClassification.from_pretrained(model_dir)
    
    model_dict = {'model': model, 'tokenizer':tokenizer}
    return model_dict

def predict_fn(input_data, model):
    """
    Apply model to the incoming request
    """

    tokenizer = model['tokenizer']
    bert_model = model['model']
    

    encoded_input = tokenizer(input_data, tru

In [14]:
from sagemaker.huggingface import HuggingFaceModel

hub = {
    'HF_TASK':'text-classification'
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   env = hub,
   model_data=model_path,  # path to your trained sagemaker model
   role=role, # iam role with permissions to create an Endpoint
   transformers_version="4.26", # transformers version used
   pytorch_version='1.13', # Pytorch version used
   py_version="py39", # python version of the DLC
   entry_point ="code/inference.py" # path to the inference.py file
)

In [15]:
%%time
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.m5.xlarge"
)

---!CPU times: user 35.9 s, sys: 4.24 s, total: 40.1 s
Wall time: 2min 47s


### Testing the deployed Model for real time inferencing

In [19]:
%%time
data = {
"inputs": "Sagemaker makes machine learning very efficient."
}

output = predictor.predict(data)

CPU times: user 4.4 ms, sys: 0 ns, total: 4.4 ms
Wall time: 128 ms


In [20]:
print(type(output))
print(output)

<class 'dict'>
{'result': 'positive', 'scores': [0.0016569155268371105, 0.11447102576494217, 0.8838720917701721]}


### Testing for real time inferencing

In [25]:
import json
endpoint_name = predictor.endpoint_name

data = {
    "inputs": "This is very good product, comes with great packaging as well"
}

# Create a SageMaker runtime client
sagemaker_runtime = boto3.client("sagemaker-runtime")

# Convert the data dictionary to JSON
payload = json.dumps(data)


# Invoke the SageMaker endpoint
response = sagemaker_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Body=payload
)

# Parse the response
result = response["Body"].read().decode("utf-8")
final_result = json.loads(result)
final_result

{'result': 'positive',
 'scores': [0.000438973045675084, 0.000360532954800874, 0.9992005228996277]}

### Deleting the endpoint

In [26]:
predictor.delete_endpoint()