In [123]:
import os
import json 
import logging
from datetime import datetime

import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri # get the Docker image which contains the image classif agorithm

In [124]:
boto3.set_stream_logger(name="botocore.credentials", level=logging.WARNING)

In [125]:
session = sagemaker.Session()
region = session.boto_region_name

eu-west-1


In [126]:
arn_role = sagemaker.get_execution_role()

In [127]:
bucket_name = "smart-factory-sagemaker"
data_path = "stock-inference"

In [138]:
nclasses = 3
nimgs_train = 16
nepochs = 50
mini_batch_size = 4

In [139]:
train_instance = "ml.g4dn.xlarge"
job_name_prefix = "smart-factory-wh-stock-inference"

In [140]:
train_image_uri = sagemaker.image_uris.retrieve(
    #### TRY HUGGING FACE AFTER ###
    ## IF THE FRAMEWORK IS 3RD PARTY. THE IMAGE SCOPE IS DIFFERENT FOR TRAINING AND INFERENCE ##
    framework="image-classification",
    region=region,
    image_scope="training"
)
print(train_image_uri)

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


685385470294.dkr.ecr.eu-west-1.amazonaws.com/image-classification:1


In [141]:
s3_output_path = "s3://{}/{}/{}".format(bucket_name, data_path, "model_output")

In [142]:
clf_estimator = sagemaker.estimator.Estimator(
    image_uri=train_image_uri,
    role=arn_role,
    instance_count=1,
    instance_type=train_instance,
    volume_size=50,
    max_run=3600,
    input_mode="File",
    output_path=s3_output_path,
    sagemaker_session=session
)

In [143]:
clf_estimator.set_hyperparameters(
    num_classes=nclasses,
    epochs=nepochs,
    num_training_samples=nimgs_train,
    mini_batch_size=nimgs_train,
    num_layers=18,
    use_pretrained_model=1,
    multi_label=0
)

In [144]:
s3_training_imgs = "s3://{}/{}/{}".format(bucket_name, data_path, "train_imgs")
s3_valid_imgs = "s3://{}/{}/{}".format(bucket_name, data_path, "valid_imgs")
s3_training_annot = "s3://{}/{}/{}".format(bucket_name, data_path, "train_annots")
s3_valid_annot = "s3://{}/{}/{}".format(bucket_name, data_path, "valid_annots")


In [145]:
train_imgs = sagemaker.inputs.TrainingInput(
    s3_training_imgs,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix"
)

valid_imgs = sagemaker.inputs.TrainingInput(
    s3_valid_imgs,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix"
)

train_annot = sagemaker.inputs.TrainingInput(
    s3_training_annot,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix"
)

valid_annot = sagemaker.inputs.TrainingInput(
    s3_valid_annot,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix"
)


data_channels = {
    "train": train_imgs,
    "validation": valid_imgs,
    "train_lst": train_annot,
    "validation_lst": valid_annot
}

In [146]:
timestamp = (
    str(datetime.now().replace(microsecond=0)).replace(" ", "-").replace(":", "-")
)
job_name = job_name_prefix + "-" +timestamp
print(job_name)
clf_estimator.fit(inputs=data_channels, logs=True, job_name=job_name)

INFO:sagemaker:Creating training-job with name: smart-factory-wh-stock-inference-2023-05-07-14-51-38


smart-factory-wh-stock-inference-2023-05-07-14-51-38
2023-05-07 14:51:38 Starting - Starting the training job...
2023-05-07 14:51:53 Starting - Preparing the instances for training......
2023-05-07 14:52:48 Downloading - Downloading input data...
2023-05-07 14:53:13 Training - Downloading the training image............
2023-05-07 14:55:13 Training - Training image download completed. Training in progress.[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34mNvidia gpu devices, drivers and cuda toolkit versions (only available on hosts with GPU):[0m
[34mSun May  7 14:55:24 2023       [0m
[34m+-----------------------------------------------------------------------------+[0m
[34m| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |[0m
[34m|-------------------------------+----------------------+----------------------+[0m
[34m| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatil

[34m[05/07/2023 14:55:36 INFO 139676819011392] Epoch[15] Train-accuracy=1.000000[0m
[34m[05/07/2023 14:55:36 INFO 139676819011392] Epoch[15] Time cost=0.048[0m
[34m[05/07/2023 14:55:36 INFO 139676819011392] Epoch[15] Validation-accuracy=0.500000[0m
[34m[05/07/2023 14:55:36 INFO 139676819011392] Epoch[16] Train-accuracy=1.000000[0m
[34m[05/07/2023 14:55:36 INFO 139676819011392] Epoch[16] Time cost=0.046[0m
[34m[05/07/2023 14:55:36 INFO 139676819011392] Epoch[16] Validation-accuracy=nan[0m
[34m[05/07/2023 14:55:37 INFO 139676819011392] Epoch[17] Train-accuracy=1.000000[0m
[34m[05/07/2023 14:55:37 INFO 139676819011392] Epoch[17] Time cost=0.045[0m
[34m[05/07/2023 14:55:37 INFO 139676819011392] Epoch[17] Validation-accuracy=0.500000[0m
[34m[05/07/2023 14:55:37 INFO 139676819011392] Epoch[18] Train-accuracy=1.000000[0m
[34m[05/07/2023 14:55:37 INFO 139676819011392] Epoch[18] Time cost=0.045[0m
[34m[05/07/2023 14:55:37 INFO 139676819011392] Epoch[18] Validation-accurac


2023-05-07 14:56:05 Uploading - Uploading generated training model
2023-05-07 14:56:05 Completed - Training job completed
Training seconds: 197
Billable seconds: 197


In [150]:
infer_instance_type = "ml.t2.medium"
model_name = job_name
endpoint_name = "smart-factory-wh-stock-inference"

In [151]:
clf_predictor = clf_estimator.deploy(
    initial_instance_count=1,
    instance_type=infer_instance_type,
    endpoint_name=endpoint_name,
    model_name=model_name
)

INFO:sagemaker:Creating model with name: smart-factory-wh-stock-inference-2023-05-07-14-51-38
INFO:sagemaker:Creating endpoint-config with name smart-factory-wh-stock-inference
INFO:sagemaker:Creating endpoint with name smart-factory-wh-stock-inference


-----------------------!

In [None]:
sgmkr_runtime = boto3.client("runtime.sagemaker")