In [1]:
import os
from io import BytesIO

import json
import pandas as pd
import numpy as np

import boto3
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker.deserializers import JSONDeserializer
from sagemaker.serializers import CSVSerializer
from sagemaker.serverless import ServerlessInferenceConfig

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
sagemaker_client = boto3.client('sagemaker')
runtime = boto3.client('runtime.sagemaker')

In [3]:
s3 = boto3.client('s3')
bucket = 'movielens-recommendation'

In [4]:
train_channel = sagemaker.session.TrainingInput(
    f"s3://{bucket}/data/gru4rec/All.csv", 
    distribution="FullyReplicated",
    content_type="text/csv",
    s3_data_type="S3Prefix"
)
eval_channel = sagemaker.session.TrainingInput(
    f"s3://{bucket}/data/gru4rec/Valid.csv", 
    distribution="FullyReplicated",
    content_type="text/csv",
    s3_data_type="S3Prefix"
)

In [5]:
script_path = "inference.py"
source_dir = "scripts"
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [6]:
estimator = PyTorch(
    framework_version='1.5',
    py_version='py3',
    entry_point=script_path,
    source_dir=source_dir,
    instance_type="ml.m5.2xlarge",
    instance_count=1,
    role=role,
    sagemaker_session=sagemaker_session,
    output_path=f"s3://{bucket}/output",
)

In [7]:
estimator.fit({ "train": train_channel, "eval": eval_channel })

Using provided s3_resource


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: pytorch-training-2024-01-07-15-21-46-311


2024-01-07 15:21:46 Starting - Starting the training job...
2024-01-07 15:22:01 Starting - Preparing the instances for training......
2024-01-07 15:23:13 Downloading - Downloading input data...
2024-01-07 15:23:43 Downloading - Downloading the training image...
2024-01-07 15:24:09 Training - Training image download completed. Training in progress...[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2024-01-07 15:24:23,981 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2024-01-07 15:24:23,984 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-01-07 15:24:23,994 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2024-01-07 15:24:23,997 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2024-01-07 15:24:24,183 sagemaker-training

[34mfinish epoch 2[0m
[34mloss 0.13837623442231292[0m
[34mfinish epoch 3[0m
[34mloss 0.13842271616466648[0m
[34mepoch metrics:  [0.14014519552722962, 0.13837623442231292, 0.13842271616466648][0m
[34maverage metric:  0.13898138203806967[0m
[34m### SAVING MODEL ###[0m
[34msaved model  GRU4REC(
  (h2o): Linear(in_features=100, out_features=20571, bias=True)
  (final_activation): Tanh()
  (gru): GRU(20571, 100, num_layers=3, dropout=0.5)[0m
[34m)[0m
[34m### SAVING CODE ###[0m
[34msaved code in  /opt/ml/model/code/[0m
[34m### TRAINING END ###[0m
[34m#015  0%|          | 0/199646 [00:00<?, ?it/s]#015  0%|          | 0/199646 [00:19<?, ?it/s]#015  0%|          | 342/199646 [00:20<3:14:32, 17.08it/s]#015  0%|          | 344/199646 [00:20<3:14:19, 17.09it/s]#015  0%|          | 346/199646 [00:20<3:14:03, 17.12it/s]#015  0%|          | 348/199646 [00:20<3:13:43, 17.15it/s]#015  0%|          | 350/199646 [00:20<3:13:11, 17.19it/s]#015  0%|          | 352/199646 [00:20<3:1

[34m1:04:55<1:23:18, 22.69it/s]#015 43%|████▎     | 86223/199646 [1:04:55<1:25:13, 22.18it/s]#015 43%|████▎     | 86226/199646 [1:04:55<1:22:44, 22.85it/s]#015 43%|████▎     | 86229/199646 [1:04:55<1:22:27, 22.93it/s]#015 43%|████▎     | 86232/199646 [1:04:55<1:18:24, 24.11it/s]#015 43%|████▎     | 86235/199646 [1:04:56<1:20:23, 23.51it/s]#015 43%|████▎     | 86238/199646 [1:04:56<1:22:24, 22.93it/s]#015 43%|████▎     | 86241/199646 [1:04:56<1:21:23, 23.22it/s]#015 43%|████▎     | 86244/199646 [1:04:56<1:24:37, 22.34it/s]#015 43%|████▎     | 86247/199646 [1:04:56<1:27:00, 21.72it/s]#015 43%|████▎     | 86250/199646 [1:04:56<1:24:19, 22.41it/s]#015 43%|████▎     | 86253/199646 [1:04:56<1:19:29, 23.78it/s]#015 43%|████▎     | 86256/199646 [1:04:56<1:16:08, 24.82it/s]#015 43%|████▎     | 86259/199646 [1:04:57<1:20:08, 23.58it/s]#015 43%|████▎     | 86262/199646 [1:04:57<1:23:47, 22.55it/s]#015 43%|████▎     | 86265/199646 [1:04:57<1:25:17, 22.16it/s]#015 43%|████▎     | 86268/199646 [1:0

[34m.98it/s]#015 85%|████████▍ | 169446/199646 [2:00:04<17:41, 28.46it/s]#015 85%|████████▍ | 169449/199646 [2:00:05<17:26, 28.86it/s]#015 85%|████████▍ | 169452/199646 [2:00:05<17:49, 28.23it/s]#015 85%|████████▍ | 169455/199646 [2:00:05<17:37, 28.56it/s]#015 85%|████████▍ | 169458/199646 [2:00:05<17:24, 28.89it/s]#015 85%|████████▍ | 169461/199646 [2:00:05<17:16, 29.13it/s]#015 85%|████████▍ | 169464/199646 [2:00:05<17:47, 28.27it/s]#015 85%|████████▍ | 169467/199646 [2:00:05<18:34, 27.09it/s]#015 85%|████████▍ | 169470/199646 [2:00:05<19:33, 25.72it/s]#015 85%|████████▍ | 169473/199646 [2:00:05<18:47, 26.77it/s]#015 85%|████████▍ | 169476/199646 [2:00:06<18:56, 26.55it/s]#015 85%|████████▍ | 169479/199646 [2:00:06<20:02, 25.09it/s]#015 85%|████████▍ | 169482/199646 [2:00:06<20:41, 24.29it/s]#015 85%|████████▍ | 169485/199646 [2:00:06<19:48, 25.37it/s]#015 85%|████████▍ | 169488/199646 [2:00:06<18:57, 26.51it/s]#015 85%|████████▍ | 169491/199646 [2:00:06<18:19, 27.42it/s]#015 85%|██

[34m�       | 59963/199646 [44:39<1:50:34, 21.05it/s]#015 30%|███       | 59966/199646 [44:39<1:49:28, 21.26it/s]#015 30%|███       | 59969/199646 [44:39<1:49:48, 21.20it/s]#015 30%|███       | 59972/199646 [44:39<1:48:24, 21.47it/s]#015 30%|███       | 59975/199646 [44:39<1:40:53, 23.07it/s]#015 30%|███       | 59978/199646 [44:39<1:35:42, 24.32it/s]#015 30%|███       | 59981/199646 [44:40<1:34:51, 24.54it/s]#015 30%|███       | 59984/199646 [44:40<1:31:21, 25.48it/s]#015 30%|███       | 59987/199646 [44:40<1:29:00, 26.15it/s]#015 30%|███       | 59990/199646 [44:40<1:28:28, 26.31it/s]#015 30%|███       | 59993/199646 [44:40<1:34:06, 24.73it/s]#015 30%|███       | 59996/199646 [44:40<1:36:25, 24.14it/s]#015 30%|███       | 59999/199646 [44:40<1:38:33, 23.61it/s]#015 30%|███       | 60002/199646 [44:40<1:39:36, 23.37it/s]#015 30%|███       | 60005/199646 [44:41<1:43:06, 22.57it/s]#015 30%|███       | 60008/199646 [44:41<1:45:27, 22.07it/s]#015 30%|███       | 60011/199646 [44:41<1:46:

[34ms]#015 20%|██        | 40696/199646 [30:01<1:56:44, 22.69it/s]#015 20%|██        | 40699/199646 [30:01<1:50:12, 24.04it/s]#015 20%|██        | 40702/199646 [30:02<1:45:48, 25.04it/s]#015 20%|██        | 40705/199646 [30:02<1:42:35, 25.82it/s]#015 20%|██        | 40708/199646 [30:02<1:45:11, 25.18it/s]#015 20%|██        | 40711/199646 [30:02<1:42:22, 25.88it/s]#015 20%|██        | 40714/199646 [30:02<1:44:55, 25.24it/s]#015 20%|██        | 40717/199646 [30:02<1:51:00, 23.86it/s]#015 20%|██        | 40720/199646 [30:02<1:52:52, 23.47it/s]#015 20%|██        | 40723/199646 [30:02<1:56:29, 22.74it/s]#015 20%|██        | 40726/199646 [30:03<1:51:15, 23.81it/s]#015 20%|██        | 40729/199646 [30:03<1:46:34, 24.85it/s]#015 20%|██        | 40732/199646 [30:03<1:43:08, 25.68it/s]#015 20%|██        | 40735/199646 [30:03<1:41:11, 26.17it/s]#015 20%|██        | 40738/199646 [30:03<1:43:13, 25.66it/s]#015 20%|██        | 40741/199646 [30:03<1:49:14, 24.24it/s]#015 20%|██        | 40744/199646


2024-01-07 22:02:06 Uploading - Uploading generated training model
2024-01-07 22:02:06 Completed - Training job completed
Training seconds: 23932
Billable seconds: 23932


In [8]:
serverless_config = ServerlessInferenceConfig(
    memory_size_in_mb = 2048,
    max_concurrency = 3
)

In [9]:
endpoint_name = "GRU4Rec"

In [10]:
try:
    sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
    print(f"Deleting existing endpoint: {endpoint_name}")
    sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
except sagemaker_client.exceptions.ClientError as e:
    error_code = e.response['Error']['Code']
    if error_code == 'ValidationException':
        print(f"No existing endpoint found with name: {endpoint_name}")
    else:
        raise

No existing endpoint found with name: GRU4Rec


In [11]:
try:
    sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
    print(f"Deleted endpoint configuration: {endpoint_name}")
except sagemaker_client.exceptions.ClientError as e:
    print(f"Error deleting endpoint configuration: {e}")

Deleted endpoint configuration: GRU4Rec


In [12]:
deployed_model = estimator.deploy(
    endpoint_name=endpoint_name,
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",
    serializer=CSVSerializer(),
    deserializer=JSONDeserializer(),
    serverless_inference_config=serverless_config
)

INFO:sagemaker.image_uris:Defaulting to CPU type when using serverless inference
INFO:sagemaker:Repacking model artifact (s3://movielens-recommendation/output/pytorch-training-2024-01-07-15-21-46-311/output/model.tar.gz), script artifact (s3://movielens-recommendation/pytorch-training-2024-01-07-15-21-46-311/source/sourcedir.tar.gz), and dependencies ([]) into single tar.gz file located at s3://sagemaker-ap-northeast-1-382555373892/pytorch-training-2024-01-07-22-02-19-239/model.tar.gz. This may take some time depending on model size...
INFO:sagemaker:Creating model with name: pytorch-training-2024-01-07-22-02-19-239
INFO:sagemaker:Creating endpoint-config with name GRU4Rec
INFO:sagemaker:Creating endpoint with name GRU4Rec


----!