# GPT-SoVITS on Sagemaker

## build image

In [9]:
!chmod +x ./*.sh && ./build_and_push.sh 

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Cloning into 'GPT-SoVITS'...
remote: Enumerating objects: 2486, done.[K
remote: Counting objects: 100% (591/591), done.[K
remote: Compressing objects: 100% (145/145), done.[K
remote: Total 2486 (delta 492), reused 461 (delta 446), pack-reused 1895[K
Receiving objects: 100% (2486/2486), 6.35 MiB | 23.72 MiB/s, done.
Resolving deltas: 100% (1363/1363), done.
Sending build context to Docker daemon  44.71MB
Step 1/22 : FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04
2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04: Pulling from cnstark/pytorch

[1B351b9876: Pulling fs layer 
[1B8c13fa41: Pulling fs layer 
[1Bbc26d7ad: Pulling fs layer 
[1Bdafc0d69: Pulling fs layer 
[1BDigest: sha256:73676a4baa9ea475d39b84f3844028850945777d6a6b6b34c1b82f0260a456d4[3A[2K[3A[2K[3A[2K[5A[2K[2A[2K[4A[2K[2A[2K[4A[2K[2A[2K[1A[2K[2A[2K[4A[2K[4A[2K[2A[2K[4A[2K[4A[2K[4A[2K[4A[2K

In [10]:
import boto3
import sagemaker
from sagemaker import Model, image_uris, serializers, deserializers

role = sagemaker.get_execution_role()  # execution role for the endpoint
sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs
region = sess._region_name  # region name of the current SageMaker Studio environment
account_id = sess.account_id()  # account_id of the current SageMaker Studio environment
bucket = sess.default_bucket()
image="gpt-sovits-inference"
s3_client = boto3.client("s3")
sm_client = boto3.client("sagemaker")
smr_client = boto3.client("sagemaker-runtime")

full_image_uri=f"{account_id}.dkr.ecr.{region}.amazonaws.com/{image}:latest"
print(full_image_uri)


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
687912291502.dkr.ecr.us-west-2.amazonaws.com/gpt-sovits-inference:latest


## remote debug test

In [11]:
## empty model data for byoc with webserver
!touch dummy
!tar czvf model.tar.gz dummy
assets_dir = 's3://{0}/{1}/assets/'.format(bucket, 'gpt_sovits')
model_data = 's3://{0}/{1}/assets/model.tar.gz'.format(bucket, 'gpt_sovits')
!aws s3 cp model.tar.gz $assets_dir
!rm -f dummy model.tar.gz

dummy
upload: ./model.tar.gz to s3://sagemaker-us-west-2-687912291502/gpt_sovits/assets/model.tar.gz


In [12]:
from sagemaker_ssh_helper.wrapper import SSHModelWrapper
model = Model(image_uri=full_image_uri, model_data=model_data, role=role,dependencies=[SSHModelWrapper.dependency_dir()] )

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [13]:
from sagemaker_ssh_helper.wrapper import SSHModelWrapper
instance_type = "ml.g5.xlarge"
endpoint_name = sagemaker.utils.name_from_base("gpt-sovits-inference")


ssh_wrapper = SSHModelWrapper.create(model, connection_wait_time_seconds=0)  # <--NEW--

predictor: Predictor = model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    endpoint_name=endpoint_name,
    wait=True
)


print(f"To connect over SSH run: sm-local-ssh-training connect {ssh_wrapper.training_job_name()}")
instance_ids = ssh_wrapper.get_instance_ids(timeout_in_sec=900)  # <--NEW-- 
print(f"To connect over SSM run: aws ssm start-session --target {instance_ids[0]}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


ClientError: An error occurred (ValidationException) when calling the CreateModel operation: Requested image 687912291502.dkr.ecr.us-west-2.amazonaws.com/gpt-sovits-inference:latest not found.

## SM endpoint test

### create sagemaker model

In [None]:
import boto3
import re
import os
import json
import uuid
import boto3
import sagemaker
from time import gmtime, strftime
## for debug only
from sagemaker_ssh_helper.wrapper import SSHModelWrapper
sm_client = boto3.client(service_name='sagemaker')



def create_model():
    image=full_image_uri
    model_name="gpt-sovits-sagemaker-"+strftime("%Y-%m-%d-%H-%M-%S", gmtime())
    create_model_response = sm_client.create_model(
        ModelName=model_name,
        ExecutionRoleArn=role,
        Containers=[{"Image": image}],
    )
    print(create_model_response)
    return model_name

In [None]:
model_name=create_model()


### create endpoint configuration

In [None]:
endpointConfigName = "gpt-sovits-sagemaker-configuration-"+strftime("%Y-%m-%d-%H-%M-%S", gmtime())
def create_endpoint_configuration():
    create_endpoint_config_response = sm_client.create_endpoint_config(     
        EndpointConfigName=endpointConfigName,
        ProductionVariants=[
            {
                #"ModelName":"gpt-sovits-sagemaker-012024-03-28-04-00-03",
                "ModelName":model_name,
                "VariantName": "gpt-sovits-sagemaker"+"-variant",
                "InstanceType": "ml.g5.xlarge",  # 指定 g5.xlarge 机器
                "InitialInstanceCount": 1,
                "ModelDataDownloadTimeoutInSeconds": 1200,
                "ContainerStartupHealthCheckTimeoutInSeconds": 1200
            }
        ],
    )
    print(create_endpoint_config_response)
    return endpointConfigName


In [None]:
create_endpoint_configuration()


### create endpoint

In [None]:
endpointName="gpt-sovits-sagemaker-endpoint"+strftime("%Y-%m-%d-%H-%M-%S", gmtime())
def create_endpoint():
    create_endpoint_response = sm_client.create_endpoint(
        EndpointName=endpointName,
        #EndpointConfigName="gpt-sovits-sagemaker-configuration2024-03-28-04-03-53",
        EndpointConfigName=endpointConfigName
    )
    print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])
    resp = sm_client.describe_endpoint(EndpointName=endpointName)
    print("Endpoint Status: " + resp["EndpointStatus"])
    print("Waiting for {} endpoint to be in service".format("gpt-sovits-sagemaker-endpoint"))
    waiter = sm_client.get_waiter("endpoint_in_service")
    waiter.wait(EndpointName=endpointName)

In [None]:
create_endpoint()

## Realtime inferecne with sagemaker endpoint

In [None]:
import json
runtime_sm_client = boto3.client(service_name="sagemaker-runtime")
#endpointName="gpt-sovits-sagemaker-endpoint2024-04-03-23-49-44"
request = {refer_wav_path:"s3://sagemaker-us-west-2-687912291502/gpt_sovits_input/wav/Brigida.wav",
    prompt_text: "",
    prompt_language:"zh",
    text:"my queue, my love ,my wife。",
    text_language :"zh"
    output_s3uri:"s3://sagemaker-us-west-2-687912291502/gpt_sovits_output/wav/"}

def invoke_endpoint():
    content_type = "application/json"
    request_body = request
    payload = json.dumps(request_body)
    print(payload)
    response = runtime_sm_client.invoke_endpoint(
        EndpointName=endpointName,
        ContentType=content_type,
        Body=payload,
    )
    result = response['Body'].read().decode()
    print('返回：',result)

In [None]:
response=invoke_endpoint()