# Train PyTorch model and deploy it with Amazon SageMaker Python SDK

## Setup

In [1]:


import os
import numpy as np
import pandas as pd
import sagemaker

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = "sagemaker/DEMO-pytorch-rea"

role = sagemaker.get_execution_role()



## Upload data to S3

In [3]:
inputs_list = sagemaker_session.upload_data("./data/list_seq.pickle", bucket=bucket, key_prefix=prefix+'/train')
inputs_dict = sagemaker_session.upload_data("./data/dict_loc.pickle", bucket=bucket, key_prefix=prefix+'/train')

In [5]:
inputs_list

's3://sagemaker-us-east-1-761655029435/sagemaker/DEMO-pytorch-rea/train/list_seq.pickle'

## Sagemaker Pytorch Estimator - train your model

In [7]:
from sagemaker.pytorch import PyTorch

# place to save model artifact
output_path = f"s3://{bucket}/{prefix}/output/"


estimator = PyTorch(
    entry_point="train.py",
    source_dir="src",
    role=role,
    framework_version="1.7.1",
    py_version="py3",
    instance_count=1,
    instance_type="ml.c5.xlarge",
    output_path=output_path,
    hyperparameters={
        "embedding_dims": 128,
        "initial_lr": 0.025,
        "epochs": 3,
        "batch_size": 16,
        "n_workers": 16,
    }
)
estimator.fit({"training": f"s3://{bucket}/{prefix}/train"})


2021-04-29 15:33:26 Starting - Starting the training job...
2021-04-29 15:33:28 Starting - Launching requested ML instances.........
2021-04-29 15:35:01 Starting - Preparing the instances for training......
2021-04-29 15:36:01 Downloading - Downloading input data...
2021-04-29 15:36:49 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-04-29 15:36:50,457 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-04-29 15:36:50,459 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-04-29 15:36:50,468 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-04-29 15:36:50,477 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-04-29 15:36:58,170 sagemaker-train

## PyTorchModel - Deploy endpoint

In [8]:
estimator.latest_training_job.describe()['ModelArtifacts']['S3ModelArtifacts']

's3://sagemaker-us-east-1-761655029435/sagemaker/DEMO-pytorch-rea/output/pytorch-training-2021-04-29-15-33-25-836/output/model.tar.gz'

In [11]:
model_path=estimator.latest_training_job.describe()['ModelArtifacts']['S3ModelArtifacts']

In [12]:
from sagemaker.pytorch.model import PyTorchModel

pytorch_model = PyTorchModel(model_data=model_path,
                             entry_point='inference.py',
                             source_dir='src',
                             role=role,
                             framework_version='1.7.1',
                             py_version='py3')

In [13]:
predictor = pytorch_model.deploy(instance_type='ml.c5.xlarge', initial_instance_count=1, endpoint_name='pytorch-inference')

-------------!

## Invoke endpoint

In [15]:
import json
import boto3


endpoint_name = "pytorch-inference-2021-04-29-15-43-06-166"
single_test = json.dumps({"locationIDInput": ['mycty_51549'], "count": 5})
runtime_client = boto3.client('runtime.sagemaker')
response = runtime_client.invoke_endpoint(EndpointName = endpoint_name,
                                            ContentType = 'application/json',
                                            Body = single_test)
result = response['Body'].read().decode('ascii')
print('Predicted label is {}.'.format(result))

Predicted label is [
  {
    "id": 26,
    "score": 0.0,
    "global_id": "mycty_51549"
  },
  {
    "id": 214,
    "score": 0.2046120132840531,
    "global_id": "mycty_51540"
  },
  {
    "id": 515,
    "score": 0.22188108672158147,
    "global_id": "mycty_51576"
  },
  {
    "id": 201,
    "score": 0.24459431622790895,
    "global_id": "mycty_51723"
  },
  {
    "id": 11,
    "score": 0.2571738318739417,
    "global_id": "mycty_51387"
  },
  {
    "id": 522,
    "score": 0.270982997163975,
    "global_id": "mycty_51565"
  }
].


## Deploy multimodel endpoint

In [16]:
from sagemaker.multidatamodel import MultiDataModel

In [48]:
# This is where our MME will read models from on S3.
model_data_prefix = f's3://{bucket}/{prefix}/mme-artifacts/'

In [49]:
model_data_prefix

's3://sagemaker-us-east-1-761655029435/sagemaker/DEMO-pytorch-rea/mme-artifacts/'

In [32]:
mme = MultiDataModel(name='pytorch-mme-v1',
                     model_data_prefix=model_data_prefix,
                     model=pytorch_model,# passing our model - passes container image needed for the endpoint
                     sagemaker_session=sagemaker_session)



In [33]:

predictor = mme.deploy(initial_instance_count=1,
                       instance_type='ml.c5.xlarge',
                       endpoint_name='pytorch-mme-v1')

-------------!

In [34]:
# No models visible!
list(mme.list_models())



[]

In [45]:
## Add first model
artifact_path1 = estimator.latest_training_job.describe()['ModelArtifacts']['S3ModelArtifacts']
# This is copying over the model artifact to the S3 location for the MME.
mme.add_model(model_data_source=artifact_path1, model_data_path='Model1.tar.gz')

## Add second model
artifact_path2 = estimator.latest_training_job.describe()['ModelArtifacts']['S3ModelArtifacts']
# This is copying over the model artifact to the S3 location for the MME.
mme.add_model(model_data_source=artifact_path2, model_data_path='Model2.tar.gz')



's3://sagemaker-us-east-1-761655029435/sagemaker/DEMO-pytorch-rea/mme-artifactsv2/Model2.tar.gz'

In [46]:
!aws s3 ls $model_data_prefix

2021-04-29 16:58:40    2353140 Model1.tar.gz
2021-04-29 16:58:40    2353140 Model2.tar.gz


In [47]:
list(mme.list_models())

['Model1.tar.gz', 'Model2.tar.gz']

In [44]:
import json
import boto3


endpoint_name = "pytorch-mme-v1"
single_test = json.dumps({"locationIDInput": ['mycty_51549'], "count": 5})
runtime_client = boto3.client('runtime.sagemaker')

response = runtime_client.invoke_endpoint(EndpointName = endpoint_name,
                                            ContentType = 'application/json',
                                            TargetModel  = 'Model1.tar.gz',
                                            Body = single_test)
result = response['Body'].read().decode('ascii')

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from model with message "{
  "code": 400,
  "type": "InvalidModelException",
  "message": "Model version is not defined."
}
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/pytorch-mme-v1 in account 761655029435 for more information.