In [1]:
import boto3
import sagemaker
from sagemaker import get_execution_role
import os

prefix = 'train_data'
ecr_repository_name = 'neural_network_spam_classifier'
role = get_execution_role()
account_id = role.split(':')[4]
region = boto3.Session().region_name
bucket = 'sagemaker-neural-net'
sagemaker_session = sagemaker.session.Session(default_bucket=bucket)

print(account_id)
print(region)
print(role)
print(bucket)

540748271236
us-east-1
arn:aws:iam::540748271236:role/service-role/AmazonSageMaker-ExecutionRole-20200831T143512
sagemaker-neural-net


In [11]:
! pygmentize ./container/build.sh

%%sh

[37m# The name of our algorithm[39;49;00m
[31malgorithm_name[39;49;00m=neural_network_spam_classifier

[37m#cd container[39;49;00m

chmod +x source_dir/train.py

[37m#aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-training:2.2.0-cpu-py37-ubuntu18.04[39;49;00m
[37m#[39;49;00m
[37m#docker pull 763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-training:2.2.0-cpu-py37-ubuntu18.04[39;49;00m

[31maccount[39;49;00m=[34m$([39;49;00maws sts get-caller-identity --query Account --output text[34m)[39;49;00m

[31mregion[39;49;00m=us-east-1

[31mfullname[39;49;00m=[33m"[39;49;00m[33m${[39;49;00m[31maccount[39;49;00m[33m}[39;49;00m[33m.dkr.ecr.[39;49;00m[33m${[39;49;00m[31mregion[39;49;00m[33m}[39;49;00m[33m.amazonaws.com/[39;49;00m[33m${[39;49;00m[31malgorithm_name[39;49;00m[33m}[39;49;00m[33m:latest[39;49;00m[33m"[39;49;00m

[37m# If th

In [12]:
%%capture
! ./scripts/build_and_push.sh 540748271236 us-east-1 sagemaker-training-containers/script-mode-container

In [13]:
container_image_uri = '{0}.dkr.ecr.{1}.amazonaws.com/{2}:version4'.format(account_id, region, ecr_repository_name)
print(container_image_uri)

540748271236.dkr.ecr.us-east-1.amazonaws.com/neural_network_spam_classifier:version4


In [32]:
print(sagemaker_session.upload_data(os.path.join(os.getcwd(),'container','source_dir','mids.csv'), bucket, prefix + '/train'))
print(sagemaker_session.upload_data(os.path.join(os.getcwd(),'container','source_dir','mids.csv'), bucket, prefix + '/validation'))

s3://sagemaker-neural-net/train_data/train/mids.csv
s3://sagemaker-neural-net/train_data/validation/mids.csv


In [14]:
import sagemaker
import json

# JSON encode hyperparameters
def json_encode_hyperparameters(hyperparameters):
    return {str(k): json.dumps(v) for (k, v) in hyperparameters.items()}

hyperparameters = json_encode_hyperparameters({
    "learning-rate": 0.02,
    "epochs": 100})

est = sagemaker.estimator.Estimator(container_image_uri,
                                    role, 
                                    train_instance_count=1, 
                                    train_instance_type='local', # we use local mode
                                    #train_instance_type='ml.m5.xlarge',
                                    base_job_name='neural_net_spam_classifier',
                                    output_path='s3://sagemaker-neural-net/neural-network/',
                                    hyperparameters=hyperparameters)

train_config = sagemaker.session.s3_input('s3://{0}/{1}/train/'.format(bucket, prefix), content_type='text/csv')
val_config = sagemaker.session.s3_input('s3://{0}/{1}/validation/'.format(bucket, prefix), content_type='text/csv')

est.fit({'train': train_config, 'validation': val_config })

Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


Creating tmph8snsvvd_algo-1-bbkjw_1 ... 
[1BAttaching to tmph8snsvvd_algo-1-bbkjw_12mdone[0m
[36malgo-1-bbkjw_1  |[0m 2020-09-24 08:40:57,367 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:
[36malgo-1-bbkjw_1  |[0m /usr/bin/python3 -m pip install -r requirements.txt
[36malgo-1-bbkjw_1  |[0m You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
[36malgo-1-bbkjw_1  |[0m 2020-09-24 08:40:59,284 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-bbkjw_1  |[0m 2020-09-24 08:40:59,306 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-bbkjw_1  |[0m 2020-09-24 08:40:59,320 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-bbkjw_1  |[0m 2020-09-24 08:40:59,331 sagemaker-training-toolkit INFO     Invoking user script
[36malgo-1-bbkjw_1  |[0m 
[36malgo-1-bbkjw_1  |[0m T

In [49]:
import json
from time import gmtime, strftime

tuning_job_name = 'SpamClassifierNN-HPTuning'

print(tuning_job_name)

tuning_job_config = {
    "ParameterRanges": {
      "CategoricalParameterRanges": [],
      "ContinuousParameterRanges": [
        {
          "MaxValue": "0.001",
          "MinValue": "0.0001",
          "Name": "learning-rate",          
        }
      ],
      "IntegerParameterRanges": []
    },
    "ResourceLimits": {
      "MaxNumberOfTrainingJobs": 9,
      "MaxParallelTrainingJobs": 3
    },
    "Strategy": "Bayesian",
    "HyperParameterTuningJobObjective": {
      "MetricName": "loss",
      "Type": "Minimize"
    }
  }

SpamClassifierNN-HPTuning


In [50]:
training_image = container_image_uri


training_job_definition = {
    "AlgorithmSpecification": {
      "MetricDefinitions": [
        {
          "Name": "loss",
          "Regex": "loss: ([0-9\\.]+)"
        }
      ],
      "TrainingImage": training_image,
      "TrainingInputMode": "File"
    },
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": 's3://{0}/{1}/train/'.format(bucket, prefix),
                    "S3DataDistributionType": "FullyReplicated"
                }
            },
            "CompressionType": "None",
            "RecordWrapperType": "None"
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": 's3://{0}/{1}/validation/'.format(bucket, prefix),
                    "S3DataDistributionType": "FullyReplicated"
                }
            },            
            "CompressionType": "None",
            "RecordWrapperType": "None"            
        }
    ],
    "OutputDataConfig": {
      "S3OutputPath": "s3://{0}/{1}/output".format(bucket,prefix)
    },
    "ResourceConfig": {
      "InstanceCount": 1,
      "InstanceType": "ml.m5.xlarge",
      "VolumeSizeInGB": 5
    },
    "RoleArn": role,
    "StoppingCondition": {
      "MaxRuntimeInSeconds": 3600
    }
}

In [51]:
smclient = boto3.client('sagemaker')
smclient.create_hyper_parameter_tuning_job(HyperParameterTuningJobName = tuning_job_name,
                                               HyperParameterTuningJobConfig = tuning_job_config,
                                               TrainingJobDefinition = training_job_definition)

{'HyperParameterTuningJobArn': 'arn:aws:sagemaker:us-east-1:540748271236:hyper-parameter-tuning-job/spamclassifiernn-hptuning',
 'ResponseMetadata': {'RequestId': 'dff79e1e-2feb-476c-b9bf-2509deac18d2',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'dff79e1e-2feb-476c-b9bf-2509deac18d2',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '126',
   'date': 'Thu, 24 Sep 2020 11:04:58 GMT'},
  'RetryAttempts': 0}}

In [54]:
smclient.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName = tuning_job_name)['HyperParameterTuningJobStatus']

'Completed'

2020-09-24
24-11-04
