### Deploy SageMaker Endpoint
The following section deploys the tortoise-tts model to SageMaker as an async inference endpoint.


__Prerequisites__
- Models must be in a model.tar.gz containing the fine-tuned autoregressive models and other models for tortoise-tts to run
- The execution role must have S3 read write access to the input and output bucket

In [1]:
from sagemaker import get_execution_role
from sagemaker.pytorch import PyTorchModel  
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer
from sagemaker.async_inference import AsyncInferenceConfig
from sagemaker.utils import name_from_base
import boto3
import json

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
%store -r model_s3
%store -r prefix
%store -r bucket_name

endpoint_name = name_from_base(prefix)

In [3]:
# create async endpoint configuration
async_config = AsyncInferenceConfig(
    output_path=f"s3://{bucket_name}/{prefix}/async_inference/output" , # Where our results will be stored
)

model = PyTorchModel(
    source_dir="source",
    entry_point="inference.py",
    model_data=model_s3,
    framework_version="2.1",
    py_version="py310",
    role=get_execution_role(),
    env={'SAGEMAKER_TS_RESPONSE_TIMEOUT': '900'}
    
)
predictor = model.deploy(initial_instance_count=1,
                         instance_type='ml.g5.2xlarge',
                         endpoint_name=endpoint_name,
                         serializer=JSONSerializer(),
                         deserializer=JSONDeserializer(),
                         async_inference_config=async_config,
                         model_data_download_timeout=1800)

----------!

## AutoScaling

In [4]:
client = boto3.client(
    "application-autoscaling"
)  # Common class representing Application Auto Scaling for SageMaker amongst other services

resource_id = (
    "endpoint/" + endpoint_name + "/variant/" + "AllTraffic"
)  # This is the format in which application autoscaling references the endpoint

# Configure Autoscaling on asynchronous endpoint down to zero instances
response = client.register_scalable_target(
    ServiceNamespace="sagemaker",
    ResourceId=resource_id,
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",
    MinCapacity=1,
    MaxCapacity=5,
)

response = client.put_scaling_policy(
    PolicyName=f"{name_from_base(prefix)}-invoc-scaling",
    ServiceNamespace="sagemaker",  # The namespace of the AWS service that provides the resource.
    ResourceId=resource_id,  # Endpoint name
    ScalableDimension="sagemaker:variant:DesiredInstanceCount",  # SageMaker supports only Instance Count
    PolicyType="TargetTrackingScaling",  # 'StepScaling'|'TargetTrackingScaling'
    TargetTrackingScalingPolicyConfiguration={
        "TargetValue": 5.0,  # The target value for the metric. - here the metric is - SageMakerVariantInvocationsPerInstance
        "CustomizedMetricSpecification": {
            "MetricName": "ApproximateBacklogSizePerInstance",
            "Namespace": "AWS/SageMaker",
            "Dimensions": [{"Name": "EndpointName", "Value": endpoint_name}],
            "Statistic": "Average",
        },
        "ScaleInCooldown": 300,  # The cooldown period helps you prevent your Auto Scaling group from launching or terminating
        # additional instances before the effects of previous activities are visible.
        # You can configure the length of time based on your instance startup time or other application needs.
        # ScaleInCooldown - The amount of time, in seconds, after a scale in activity completes before another scale in activity can start.
        "ScaleOutCooldown": 300  # ScaleOutCooldown - The amount of time, in seconds, after a scale out activity completes before another scale out activity can start.
        # 'DisableScaleIn': True|False - ndicates whether scale in by the target tracking policy is disabled.
        # If the value is true , scale in is disabled and the target tracking policy won't remove capacity from the scalable resource.
    },
)

## >  Test endpoint

In [17]:
from IPython.display import Audio, display

audio_file = "samples/adam/adam-interview.wav"
audio = Audio(audio_file, autoplay=False)

display(audio)

## Upload adam sample audio

In [29]:
s3 = boto3.client('s3')

s3_key = f"{prefix}/voice-samples/adam/{audio_file.split('/')[-1]}"

# Upload the file to S3
try:
    s3.upload_file(audio_file, bucket_name, s3_key)
    print(f'File uploaded successfully to s3://{bucket_name}/{s3_key}')
except Exception as e:
    print(f'Error uploading file: {e}')

sample_s3 = f"s3://{bucket_name}/{prefix}/voice-samples/adam"

File uploaded successfully to s3://sagemaker-us-east-1-372703588567/tortoise-tts/voice-samples/adam/swami-interview.wav


In [30]:
story = [
    "In a sleek, modern office setting, a group of A W S employees gather around a conference table, laptops open and energy palpable.",
    "The lead engineer, a confident woman, stands before them, her presentation deck loaded.",
    "The Azure team, led by a suave executive, strides into the room, exuding an air of casual arrogance.",
    "They take their seats, exchanging taunting glances with the AWS team. The tension is thick enough to cut with a knife.",
    "The A W S engineer launches into her presentation, highlighting the superiority of their cloud services with intricate diagrams and impressive statistics.", 
    "The Azure team members roll their eyes and scribble snarky comments on their notepads.",
    "The Azure executive stands up, his fingers tapping his tablet in a show of bravado.",
    "He begins a counterattack, boasting about Azure's scalability and cost-effectiveness. The A W S team leans back, arms crossed, unimpressed.",
    "The battle rages on, with each side one-upping the other's claims, using increasingly outlandish metaphors and analogies.", 
    "The tension gives way to absurdity as the arguments become more and more exaggerated.",
]

In [31]:
request_template = {"text": "", 
                "voice_samples_s3_uri": "", 
                "input_s3_uri": "",
                "destination_s3_uri": "", 
                "model_id": "", 
                "inference_params": {}}

In [32]:
payloads = []

for i, s in enumerate(story):
    payload = request_template.copy()
    payload["text"] =s
    input_file = f"s3://{bucket_name}/{prefix}/inputs/adam-input-part-{i}.json"
    output_file = f"s3://{bucket_name}/{prefix}/outputs/adam-output-{i+1}.wav"

    payload["voice_samples_s3_uri"] =sample_s3
    
    payload["input_s3_uri"]=input_file
    payload["destination_s3_uri"]=output_file
        
    payloads.append(payload)

payloads

[{'text': 'In a sleek, modern office setting, a group of A W S employees gather around a conference table, laptops open and energy palpable.',
  'voice_samples_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/voice-samples/adam',
  'input_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-0.json',
  'destination_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/outputs/adam-output-1.wav',
  'model_id': '',
  'inference_params': {}},
 {'text': 'The lead engineer, a confident woman, stands before them, her presentation deck loaded.',
  'voice_samples_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/voice-samples/adam',
  'input_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-1.json',
  'destination_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/outputs/adam-output-2.wav',
  'model_id': '',
  'inference_params': {}},
 {'text': 'The Azure team, led by a suave executive, str

In [33]:
s3 = boto3.resource('s3')
sagemaker = boto3.client('sagemaker-runtime')

In [34]:
for payload in payloads:
    # Upload the request json
    print(f"Uploading {payload['input_s3_uri']}")
    key = "/".join(payload['input_s3_uri'].split("/")[3:])
    s3_object = s3.Object(bucket_name, key)
    s3_object.put(Body=json.dumps(payload).encode('utf-8'))

    # Invoke SageMaker async endpoint
    response = sagemaker.invoke_endpoint_async(
        EndpointName=endpoint_name,
        ContentType='application/json',
        InputLocation=payload['input_s3_uri'],
        InvocationTimeoutSeconds=3600
    )

Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-0.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-1.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-2.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-3.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-4.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-5.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-6.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-7.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-8.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/adam-input-part-9.json


## Upload Swami sample audio

In [35]:
audio_file = "samples/swami/swami-interview.wav"
audio = Audio(audio_file, autoplay=False)

display(audio)

In [36]:
s3 = boto3.client('s3')

sample_s3 = f"s3://{bucket_name}/{prefix}/voice-samples/swami"

input_folder = "samples/swami"

!aws s3 sync {input_folder} {sample_s3}

In [37]:
payloads = []

for i, s in enumerate(story):
    payload = request_template.copy()
    payload["text"] =s
    input_file = f"s3://{bucket_name}/{prefix}/inputs/swami-input-part-{i}.json"
    output_file = f"s3://{bucket_name}/{prefix}/outputs/swami-output-{i+1}.wav"

    payload["voice_samples_s3_uri"] =sample_s3
    
    payload["input_s3_uri"]=input_file
    payload["destination_s3_uri"]=output_file
        
    payloads.append(payload)

payloads

[{'text': 'In a sleek, modern office setting, a group of A W S employees gather around a conference table, laptops open and energy palpable.',
  'voice_samples_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/voice-samples/swami',
  'input_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-0.json',
  'destination_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/outputs/swami-output-1.wav',
  'model_id': '',
  'inference_params': {}},
 {'text': 'The lead engineer, a confident woman, stands before them, her presentation deck loaded.',
  'voice_samples_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/voice-samples/swami',
  'input_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-1.json',
  'destination_s3_uri': 's3://sagemaker-us-east-1-372703588567/tortoise-tts/outputs/swami-output-2.wav',
  'model_id': '',
  'inference_params': {}},
 {'text': 'The Azure team, led by a suave executiv

In [39]:
s3 = boto3.resource('s3')
for payload in payloads:
    # Upload the request json
    print(f"Uploading {payload['input_s3_uri']}")
    key = "/".join(payload['input_s3_uri'].split("/")[3:])
    s3_object = s3.Object(bucket_name, key)
    s3_object.put(Body=json.dumps(payload).encode('utf-8'))

    # Invoke SageMaker async endpoint
    response = sagemaker.invoke_endpoint_async(
        EndpointName=endpoint_name,
        ContentType='application/json',
        InputLocation=payload['input_s3_uri'],
        InvocationTimeoutSeconds=3600
    )

Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-0.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-1.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-2.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-3.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-4.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-5.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-6.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-7.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-8.json
Uploading s3://sagemaker-us-east-1-372703588567/tortoise-tts/inputs/swami-input-part-9.json
