# Test the endpoint

In [1]:
import json
import pandas as pd
import numpy as np

In [2]:
import boto3
import sagemaker

client = boto3.client('sagemaker-runtime')

## Run Inference on deployed endpoint

In [45]:
df = pd.read_csv("data/mtsamples.csv")

idx = np.random.randint(len(df))
input_txt = df.loc[idx].transcription
category = df.loc[idx].medical_specialty
print(category)
print(input_txt)

 Surgery
PREOPERATIVE DIAGNOSES:,1.  Right ankle trimalleolar fracture.,2.  Right distal tibia plafond fracture with comminuted posterolateral impacted fragment.,OPERATIVE PROCEDURE:  ,Delayed open reduction internal fixation with plates and screws, 6-hole contoured distal fibular plate and screws reducing posterolateral malleolar fragment, as well as medial malleolar fragment.,POSTOPERATIVE DIAGNOSES:,1.  Right ankle trimalleolar fracture.,2.  Right distal tibia plafond fracture with comminuted posterolateral impacted fragment.,TOURNIQUET TIME: , 80 minutes.,HISTORY: , This 50-year-old gentleman was from the area and riding his motorcycle in Kentucky.,The patient lost control of his motorcycle when he was traveling approximately 40 mile per hour.  He was on a curve and lost control.  He is unsure what exactly happened, but he thinks his right ankle was pinned underneath the motorcycle while he was sliding.  There were no other injuries.  He was treated in Kentucky.  A close reduction 

In [46]:
endpoint_name = "31-2023-04-05-07-46-59-107"
content_type = "text/csv" # "application/json" #
accept = "application/json" #"text/csv"
# payload = "test"#json.dumps({"instances":["test"]})

response = client.invoke_endpoint(
    EndpointName=endpoint_name,
    # CustomAttributes=custom_attributes,
    ContentType=content_type,
    Accept=accept,
    Body=input_txt,
)

json.loads(response["Body"].read())


[' Orthopedic']

## Run batch inference (Transformer)

In [47]:
def get_latest_approved_model(model_package_group_name):
    sm_client = boto3.client('sagemaker')
    df = pd.DataFrame(sm_client.list_model_packages(
        ModelPackageGroupName=model_package_group_name)["ModelPackageSummaryList"])
    
    return df.loc[df.ModelApprovalStatus == "Approved"].iloc[0].ModelPackageArn

In [48]:
session = sagemaker.Session()
default_bucket = session.default_bucket()

model_name = "22-2023-04-03-13-43-05-767"
input_data_path = f"s3://{default_bucket}/data/test.csv"
output_data_path = f"s3://{default_bucket}/data/out"
CONTENT_TYPE_CSV = 'text/csv'

iam = boto3.client('iam')
role_arn = iam.get_role(RoleName=f'101436505502-sagemaker-exec')['Role']['Arn']
model_package_group_name="training-pipelineModelGroup"

# model_package_arn = "arn:aws:sagemaker:eu-west-3:101436505502:" \
#                         f"model-package/training-pipelineModelGroup/latest"

model_package_arn = get_latest_approved_model(model_package_group_name)

# load model
model = sagemaker.ModelPackage(role=role_arn, model_package_arn=model_package_arn,
                         sagemaker_session=session)

# create transformer
transformer =model.transformer(
    instance_count = 1,
    instance_type = 'ml.g4dn.xlarge',
    strategy = 'SingleRecord',
    assemble_with = 'Line',
    output_path = output_data_path,
    accept = CONTENT_TYPE_CSV,
)


INFO:sagemaker:Creating model with name: 31-2023-04-05-08-34-50-498


In [49]:
transformer.transform(data = input_data_path, 
                        content_type = CONTENT_TYPE_CSV, 
                        split_type = 'Line')

INFO:sagemaker:Creating transform job with name: 31-2023-04-05-08-35-01-493


ResourceLimitExceeded: An error occurred (ResourceLimitExceeded) when calling the CreateTransformJob operation: The account-level service limit 'ml.g4dn.xlarge for transform job usage' is 0 Instances, with current utilization of 0 Instances and a request delta of 1 Instances. Please contact AWS support to request an increase for this limit.

In [None]:
# c = boto3.client('sagemaker')
# c.list_model_packages(ModelPackageGroupName="training-pipelineModelGroup")


# transform_job = sagemaker.transformer.Transformer(
#     model_name = model_name,
#     instance_count = 1,
#     instance_type = 'ml.g4dn.xlarge', # ml.g4dn.xlarge
#     strategy = 'SingleRecord',
#     assemble_with = 'Line',
#     output_path = output_data_path,
#     base_transform_job_name='inference-pipelines-batch',
#     sagemaker_session=session,
#     accept = CONTENT_TYPE_CSV)

# transform_job.transform(data = input_data_path, 
#                         content_type = CONTENT_TYPE_CSV, 
#                         split_type = 'Line')

# Test local

In [9]:
class MyTokenizer:
    def __init__(self, model_name="distilbert-base-uncased") -> None:
        self.model_name = model_name
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)

    def tokenize(self, txt_input):
        return self.tokenizer.encode(txt_input, padding="max_length", truncation=True)

In [20]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model = AutoModelForSequenceClassification.from_pretrained(
        "distilbert-base-uncased",
        num_labels=40,
    )

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.weight', 'pre_clas

In [66]:
input["attention_mask"].shape

torch.Size([1, 512])

In [71]:
tok = MyTokenizer()
# input = tok.tokenizer(input_txt, padding="max_length", return_tensors='pt', truncation=True)
input = tok.tokenizer(input_txt, return_tensors='pt')

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
input.to(device)
outputs = model(**input)
y_pred = torch.argmax(outputs.logits.cpu(), dim=1)
y_pred
# [config.MEDICAL_CATEGORIES[i.item()] for i in y_pred]

Token indices sequence length is longer than the specified maximum sequence length for this model (886 > 512). Running this sequence through the model will result in indexing errors


RuntimeError: The size of tensor a (886) must match the size of tensor b (512) at non-singleton dimension 1