using:
    https://github.com/aws/amazon-sagemaker-examples/blob/0efd885ef2a5c04929d10c5272681f4ca17dac17/advanced_functionality/custom-training-containers/basic-training-container/notebook/basic_training_container.ipynb
    and https://github.com/aws/amazon-sagemaker-examples/blob/0efd885ef2a5c04929d10c5272681f4ca17dac17/advanced_functionality/custom-training-containers/framework-container/notebook/framework-container.ipynb
    for permissions:  https://aws.amazon.com/blogs/machine-learning/using-the-amazon-sagemaker-studio-image-build-cli-to-build-container-images-from-your-studio-notebooks/
    consider also:  https://github.com/aws/amazon-sagemaker-examples/blob/0efd885ef2a5c04929d10c5272681f4ca17dac17/advanced_functionality/pipe_bring_your_own/pipe_bring_your_own.ipynb

In [5]:
!pip install sagemaker-studio-image-build

Collecting sagemaker-studio-image-build
  Using cached sagemaker_studio_image_build-0.6.0-py3-none-any.whl
Installing collected packages: sagemaker-studio-image-build
Successfully installed sagemaker-studio-image-build-0.6.0
[0m

In [2]:
#import json for handelling hyperparameters
import pprint
import json

def print_json_object(json_object):
    pprint.pprint(json_object)


def load_json_object(json_file_path):
    with open(json_file_path) as json_file:
        return json.load(json_file)


In [3]:
#standered set of top lines for using sagemaker with custom container
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator

from sagemaker.tuner import (
IntegerParameter,
CategoricalParameter,
ContinuousParameter,
HyperparameterTuner,
)

ecr_namespace = "sagemaker-training-containers/"
prefix = "basic-training-container"

ecr_repository_name = ecr_namespace + prefix
role = get_execution_role()
account_id = role.split(":")[4]
region = boto3.Session().region_name
sagemaker_session = sagemaker.session.Session()
bucket = sagemaker_session.default_bucket()

print(account_id)
print(region)
print(role)
print(bucket)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
419483997767
us-east-2
arn:aws:iam::419483997767:role/service-role/AmazonSageMaker-ExecutionRole-20240615T151259
sagemaker-us-east-2-419483997767


In [6]:
#switch to dockerfile directory and build container with "framework"
%cd ml-docker-app
!sm-docker build -t ml-docker-app .

[Errno 2] No such file or directory: 'ml-docker-app'
/root/Docker_MLC/DockerMLC/ml-docker-app
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
....................[Container] 2024/09/27 13:35:52.157866 Running on CodeBuild On-demand

[Container] 2024/09/27 13:35:52.157878 Waiting for agent ping
[Container] 2024/09/27 13:35:52.359311 Waiting for DOWNLOAD_SOURCE
[Container] 2024/09/27 13:35:52.714784 Phase is DOWNLOAD_SOURCE
[Container] 2024/09/27 13:35:52.750286 CODEBUILD_SRC_DIR=/codebuild/output/src4148401815/src
[Container] 2024/09/27 13:35:52.752041 YAML location is /codebuild/output/src4148401815/src/buildspec.yml
[Container] 2024/09/27 13:35:52.753986 Setting HTTP client timeout to higher timeout for S3 source
[Container] 2024/09/27 13:35:52.754151 Processing environment variables
[Container] 2024/09/27 13:35:52.789385 No runtime versi

In [36]:
#pull container name for use in estimator
container_image_uri = "{0}.dkr.ecr.{1}.amazonaws.com/{2}:latest".format(
    account_id, region, ecr_repository_name
)
print(container_image_uri)

419483997767.dkr.ecr.us-east-2.amazonaws.com/sagemaker-training-containers/basic-training-container:latest


In [7]:
#turns out i'm not figuring out how to push containers properly to where I want in ECR; the sm-docker build puts them in the same place with the same name
#i need to figure that out, but fur now, hard code name from ECR
container_image_uri= "419483997767.dkr.ecr.us-east-2.amazonaws.com/sagemaker-studio-d-srdvznxntmpy:default-1718479071871"

In [15]:
#for some reason, to use hyperparmeters correctly need to put traying file/entry point python script into S3 as a zip file
#this and the next block take the *app.py file (entry point/training script), turn it into a zip, store in S3,
#and then create a reference to that zip file so estimator/sagemaker can pull it from S3 later.  That's just how sagemaker works apprently
import tarfile
import os


def create_tar_file(source_files, target=None):
    if target:
        filename = target
    else:
        _, filename = tempfile.mkstemp()

    with tarfile.open(filename, mode="w:gz") as t:
        for sf in source_files:
            # Add all files from the directory into the root of the directory structure of the tar
            t.add(sf, arcname=os.path.basename(sf))
    return filename


create_tar_file(["app.py"], "sourcedir.tar.gz")

'sourcedir.tar.gz'

In [16]:
sources = sagemaker_session.upload_data("sourcedir.tar.gz", bucket, prefix + "/code")
print(sources)
! rm sourcedir.tar.gz

s3://sagemaker-us-east-2-419483997767/basic-training-container/code/sourcedir.tar.gz


In [11]:
#test estimator 
import sagemaker
import json

# JSON encode hyperparameters.
def json_encode_hyperparameters(hyperparameters):
    return {str(k): json.dumps(v) for (k, v) in hyperparameters.items()}

hyperparameters = json_encode_hyperparameters(
    {
        "sagemaker_program": "app.py",
        "sagemaker_submit_directory": sources,
        "Ni": 50,
        "Ne": 3,
        "maxi": 5,
    }
)

est = sagemaker.estimator.Estimator(
    container_image_uri,
    role,
    instance_count=1,
    #instance_type="local",  # use local mode
    instance_type='ml.c4.xlarge',
    base_job_name=prefix,
    hyperparameters=hyperparameters,
)

est.fit()

INFO:sagemaker:Creating training-job with name: basic-training-container-2024-09-27-13-41-47-019


2024-09-27 13:41:47 Starting - Starting the training job...
2024-09-27 13:42:02 Starting - Preparing the instances for training...
2024-09-27 13:42:36 Downloading - Downloading the training image..[34m2024-09-27 13:42:56,708 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-09-27 13:42:56,708 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-09-27 13:42:56,719 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-09-27 13:42:56,720 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-09-27 13:42:56,729 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-09-27 13:42:56,730 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-09-27 13:42:56,739 sagemaker-training-toolkit INFO     Invoking user script[0m

In [79]:
logger = logging.getLogger(__name__)
maxJ=-103.4
ProcessTime=4.35
k=8
logger.info("Reward: {:.4f}, ProccesTime:  {:.6f}, Iteration:  {:.0f} \n".format(maxJ, ProcessTime, k))
metric_definitions = [{"Name": "Reward", "Regex": "Reward: ([0-9\\.]+)"}]
print(metric_definitions)

INFO:__main__:Reward: -103.4000, ProccesTime:  4.350000, Iteration:  8 



[{'Name': 'Reward', 'Regex': 'Reward: ([0-9\\.]+)'}]


In [17]:
#execute hyperparameter tuning job

objective_metric_name = "Reward"
objective_type = "Maximize"
metric_definitions = [
    {
        "Name": "Reward",
        "Regex": "Reward: (-[0-9\\.]+)",
    }
]


hyperparameter_ranges = {
"Nn": ContinuousParameter(0.2, 0.5),
"Pr": ContinuousParameter(0.05, 0.25),
"Pm": ContinuousParameter(0.3, 0.6),
}

tuner = HyperparameterTuner(
est,
objective_metric_name,
hyperparameter_ranges,
metric_definitions,
max_jobs=21,
max_parallel_jobs=3,
objective_type=objective_type,
early_stopping_type="Auto",
strategy = "Bayesian",
base_tuning_job_name = 'GPrun1',
)

tuner.fit()

INFO:sagemaker:Creating hyperparameter tuning job with name: GPrun1-240927-1348


................................................................................!
