### >  initialize parameters

In [2]:
import sagemaker
import time
import boto3
import json
from sagemaker.utils import name_from_base

sess = sagemaker.Session()
bucket = sess.default_bucket() # Set a default S3 bucket
prefix = 'whisper-sm-js'
role = sagemaker.get_execution_role()
session = boto3.session.Session()
region = session.region_name

# below boto3 clients are for invoking asynchronous endpoint 
sm = boto3.client("sagemaker")
sm_runtime = boto3.client("sagemaker-runtime")

model_name = "whisper-large-v3"
version = "v1.0.0"
# S3 client
s3 = boto3.client('s3')

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


### > download model artifacts from jumpstart
download locally so we can inspect the components

In [None]:
save_directory = "model"
!mkdir -p {save_directory}

js_model_artifacts = f"s3://jumpstart-cache-prod-{region}/huggingface-asr/huggingface-asr-{model_name}/artifacts/inference-prepack/{version}/"

!aws s3 sync {js_model_artifacts} {save_directory}

download: s3://jumpstart-cache-prod-us-east-1/huggingface-asr/huggingface-asr-whisper-large-v3/artifacts/inference-prepack/v1.0.0/__model_info__.json to model/__model_info__.json
download: s3://jumpstart-cache-prod-us-east-1/huggingface-asr/huggingface-asr-whisper-large-v3/artifacts/inference-prepack/v1.0.0/code/requirements.txt to model/code/requirements.txt
download: s3://jumpstart-cache-prod-us-east-1/huggingface-asr/huggingface-asr-whisper-large-v3/artifacts/inference-prepack/v1.0.0/code/__init__.py to model/code/__init__.py
download: s3://jumpstart-cache-prod-us-east-1/huggingface-asr/huggingface-asr-whisper-large-v3/artifacts/inference-prepack/v1.0.0/code/constants/__init__.py to model/code/constants/__init__.py
download: s3://jumpstart-cache-prod-us-east-1/huggingface-asr/huggingface-asr-whisper-large-v3/artifacts/inference-prepack/v1.0.0/code/__script_info__.json to model/code/__script_info__.json
download: s3://jumpstart-cache-prod-us-east-1/huggingface-asr/huggingface-asr-whi

### > upload uncompressed file to my s3

SageMaker now allows you to package files in uncompressed format to avoid wasting time tar and untar large model files.

In [10]:
uncompressed_path = f"s3://{bucket}/{prefix}/uncompressed/model/"
!aws s3 sync {save_directory} {uncompressed_path}

upload: model/__model_info__.json to s3://sagemaker-us-east-1-376678947624/whisper-sm-js/uncompressed/model/__model_info__.json
upload: model/code/__init__.py to s3://sagemaker-us-east-1-376678947624/whisper-sm-js/uncompressed/model/code/__init__.py
upload: model/code/constants/__pycache__/__init__.cpython-310.pyc to s3://sagemaker-us-east-1-376678947624/whisper-sm-js/uncompressed/model/code/constants/__pycache__/__init__.cpython-310.pyc
upload: model/code/.ipynb_checkpoints/requirements-checkpoint.txt to s3://sagemaker-us-east-1-376678947624/whisper-sm-js/uncompressed/model/code/.ipynb_checkpoints/requirements-checkpoint.txt
upload: model/code/constants/__pycache__/constants.cpython-310.pyc to s3://sagemaker-us-east-1-376678947624/whisper-sm-js/uncompressed/model/code/constants/__pycache__/constants.cpython-310.pyc
upload: model/code/.ipynb_checkpoints/inference-checkpoint.py to s3://sagemaker-us-east-1-376678947624/whisper-sm-js/uncompressed/model/code/.ipynb_checkpoints/inference-ch

### > Create Model using Uncompressed style

In [5]:
container=f"763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:2.1.0-transformers4.37.0-gpu-py310-cu118-ubuntu20.04"

In [11]:
model_data={
    'S3DataSource': {
        'S3Uri': uncompressed_path,
        'S3DataType': 'S3Prefix',
        'CompressionType': 'None'
    }
}

In [14]:
from sagemaker.model import Model

model_name = name_from_base(f"{prefix}-model")

model = Model(
    image_uri=container,
    model_data=model_data,
    role=role,
    env={
        "ENDPOINT_SERVER_TIMEOUT":"3600",
        "MODEL_CACHE_ROOT": "/opt/ml/model",
        'SAGEMAKER_ENV': '1',
        'SAGEMAKER_PROGRAM': 'inference.py'
    },
    name=model_name
)

In [16]:
endpoint_name = name_from_base(f"{prefix}-endpoint")
# deploy model to SageMaker Inference
predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.4xlarge",
    endpoint_name=endpoint_name,
)



---------!

### > invoke the realtime endpoint

In [32]:
def load_audio(file_name):
    with open(file_name, "rb") as file:
        input_audio = file.read()
    return input_audio

In [40]:
audio_file = "synthetic-voice.wav"
response = sm_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='audio/wav',
    Body=load_audio(audio_file)
)
output=json.loads(response["Body"].read().decode('utf-8'))['text'][0]
output

' Hello, sky.'

### > Delete endpoint

In [None]:
sm.delete_endpoint(EndpointName=endpoint_name)

### > Batch Transform Inference

This doesn't support uncompressed file format yet (TBD)

####> Create a transformer
whisper_transformer = model.transformer(
    instance_count = 1,
    instance_type = "ml.g4dn.xlarge",
    output_path=f"s3://{bucket}/{prefix}/output/batch-transform/",
    max_payload = 100
)

input_data = f"s3://{bucket}/{prefix}/input/"

#### > Define request data and job name
job_name = name_from_base(f"{prefix}-transform-job")

#### > Start batch transform job
whisper_transformer.transform(data = input_data, 
                              job_name= job_name, 
                              wait = False)

### > Asynchronous Inference 

In [41]:
## upload the sample file
s3_key = f"{prefix}/input/{audio_file}"
s3.upload_file("synthetic-voice.wav", bucket, s3_key)

In [43]:
num_duplicates = 10

# Create duplicate copies
for i in range(num_duplicates):
    duplicate_key = f"{s3_key.rsplit('.', 1)[0]}_{i+1}.{s3_key.rsplit('.', 1)[1]}"
    copy_source = {
        'Bucket': bucket,
        'Key': s3_key
    }
    s3.copy_object(Bucket=bucket, Key=duplicate_key, CopySource=copy_source)
    print(f"Created duplicate copy s3://{bucket}/{duplicate_key}")

Created duplicate copy s3://sagemaker-us-east-1-376678947624/whisper-sm-js/input/synthetic-voice_1.wav
Created duplicate copy s3://sagemaker-us-east-1-376678947624/whisper-sm-js/input/synthetic-voice_2.wav
Created duplicate copy s3://sagemaker-us-east-1-376678947624/whisper-sm-js/input/synthetic-voice_3.wav
Created duplicate copy s3://sagemaker-us-east-1-376678947624/whisper-sm-js/input/synthetic-voice_4.wav
Created duplicate copy s3://sagemaker-us-east-1-376678947624/whisper-sm-js/input/synthetic-voice_5.wav
Created duplicate copy s3://sagemaker-us-east-1-376678947624/whisper-sm-js/input/synthetic-voice_6.wav
Created duplicate copy s3://sagemaker-us-east-1-376678947624/whisper-sm-js/input/synthetic-voice_7.wav
Created duplicate copy s3://sagemaker-us-east-1-376678947624/whisper-sm-js/input/synthetic-voice_8.wav
Created duplicate copy s3://sagemaker-us-east-1-376678947624/whisper-sm-js/input/synthetic-voice_9.wav
Created duplicate copy s3://sagemaker-us-east-1-376678947624/whisper-sm-j

In [54]:
from sagemaker.async_inference import AsyncInferenceConfig

# Create an AsyncInferenceConfig object
async_config = AsyncInferenceConfig(
    output_path=f"s3://{bucket}/{prefix}/output", 
    max_concurrent_invocations_per_instance = 4,
    # notification_config = {
            #   "SuccessTopic": "arn:aws:sns:us-east-2:123456789012:MyTopic",
            #   "ErrorTopic": "arn:aws:sns:us-east-2:123456789012:MyTopic",
    # }, #  Notification configuration 
)

# Deploy the model for async inference
endpoint_name = name_from_base(f"{prefix}-async-endpoint")
async_predictor = model.deploy(
    async_inference_config=async_config,
    initial_instance_count=1, # number of instances
    instance_type='ml.g5.2xlarge', # instance type
    endpoint_name=endpoint_name
)



-----------!

### > invoke async endpoint

In [61]:
response = s3.list_objects_v2(Bucket=bucket, Prefix=f"{prefix}/input/")

for obj in response.get('Contents', []):
    key = obj['Key']
    input_path = f"s3://{bucket}/{key}"

    response = sm_runtime.invoke_endpoint_async(
        EndpointName=endpoint_name,
        InputLocation=input_path,
        ContentType='audio/wav',
        InvocationTimeoutSeconds=3600  # Set a 1 hour timeout
    )

    print(response)

{'ResponseMetadata': {'RequestId': '4d0d05e2-64a1-4b6c-bfd0-142e58943256', 'HTTPStatusCode': 202, 'HTTPHeaders': {'x-amzn-requestid': '4d0d05e2-64a1-4b6c-bfd0-142e58943256', 'x-amzn-sagemaker-outputlocation': 's3://sagemaker-us-east-1-376678947624/whisper-sm-js/output/c7990873-51f2-4dd7-8062-4ab4a17c515c.out', 'x-amzn-sagemaker-failurelocation': 's3://sagemaker-us-east-1-376678947624/async-endpoint-failures/whisper-sm-js-model-2024-05-11-17-39-37-749-1715451133-76a8/c7990873-51f2-4dd7-8062-4ab4a17c515c-error.out', 'date': 'Sat, 11 May 2024 18:39:32 GMT', 'content-type': 'application/json', 'content-length': '54', 'connection': 'keep-alive'}, 'RetryAttempts': 0}, 'OutputLocation': 's3://sagemaker-us-east-1-376678947624/whisper-sm-js/output/c7990873-51f2-4dd7-8062-4ab4a17c515c.out', 'FailureLocation': 's3://sagemaker-us-east-1-376678947624/async-endpoint-failures/whisper-sm-js-model-2024-05-11-17-39-37-749-1715451133-76a8/c7990873-51f2-4dd7-8062-4ab4a17c515c-error.out', 'InferenceId': 'c

### > Quick load test

In [None]:
for x in range(200000):
    key = 'whisper-sm-js/input/synthetic-voice.wav'
    input_path = f"s3://{bucket}/{key}"

    response = sm_runtime.invoke_endpoint_async(
        EndpointName=endpoint_name,
        InputLocation=input_path,
        ContentType='audio/wav',
        InvocationTimeoutSeconds=3600  # Set a 1 hour timeout
    )

    print(f"Execute audio file: {x+1}")

Execute audio file: 1
Execute audio file: 2
Execute audio file: 3
Execute audio file: 4
Execute audio file: 5
Execute audio file: 6
Execute audio file: 7
Execute audio file: 8
Execute audio file: 9
Execute audio file: 10
Execute audio file: 11
Execute audio file: 12
Execute audio file: 13
Execute audio file: 14
Execute audio file: 15
Execute audio file: 16
Execute audio file: 17
Execute audio file: 18
Execute audio file: 19
Execute audio file: 20
Execute audio file: 21
Execute audio file: 22
Execute audio file: 23
Execute audio file: 24
Execute audio file: 25
Execute audio file: 26
Execute audio file: 27
Execute audio file: 28
Execute audio file: 29
Execute audio file: 30
Execute audio file: 31
Execute audio file: 32
Execute audio file: 33
Execute audio file: 34
Execute audio file: 35
Execute audio file: 36
Execute audio file: 37
Execute audio file: 38
Execute audio file: 39
Execute audio file: 40
Execute audio file: 41
Execute audio file: 42
Execute audio file: 43
Execute audio file: 