# Training Job in AWS

Installing packages

In [None]:
import distro 
import os
if distro.id() in ["debian", "ubuntu"]:
    os.system("sudo apt-get update")
    os.system("sudo apt-get install ffmpeg libsm6 libxext6 -y")

import sys
!{sys.executable} -m pip install opencv-python
!{sys.executable} -m pip install --upgrade mxnet
!{sys.executable} -m pip install numpy==1.23.5

!sudo apt-get update && apt-get install -y libgl1-mesa-glx

Resizing the Images

In [None]:
RESIZE_SIZE = 256
BASE_DIR = "PlasticDetection/"

Converting to .lst to  rec format files for train and test

In [None]:
!python tools/im2rec.py --resize $RESIZE_SIZE --pack-label test $BASE_DIR/images/
!python tools/im2rec.py --resize $RESIZE_SIZE --pack-label train $BASE_DIR/images/

Uploading rec files to S3 bucket

In [None]:
import sagemaker
from sagemaker import get_execution_role

bucket = "object-detection-udemy"
prefix = "DEMO-objectDetection"


role = get_execution_role()

sess = sagemaker.Session()

train_channel = prefix + "/train"

sess.upload_data(path= "train.rec", bucket = bucket, key_prefix = train_channel)

s3_train_data = "s3://{}/{}".format(bucket,train_channel)

validation_channel = prefix + "/validation"

sess.upload_data(path= "test.rec", bucket = bucket, key_prefix = validation_channel)

s3_validation_data = "s3://{}/{}".format(bucket,validation_channel)

s3_output_location =  "s3://{}/{}/output".format(bucket, prefix)
print(s3_output_location)

Downloading the AWS Object detection Algorithm 

In [None]:
from sagemaker import image_uris

training_image = image_uris.retrieve(
    region = sess.boto_region_name, framework = "object-detection", version = "1"
)

print(training_image)

Creating Object Model

In [None]:
od_model = sagemaker.estimator.Estimator(
    training_image,
    role,
    instance_count = 1,
    instance_type = "ml.p3.2xlarge",  #gpu 
    volume_size = 50,
    max_run = 360000,
    input_mode = "File",
    output_path = s3_output_location,
    sagemaker_session = sess,)

Setting Hyperparameters

Source: https://docs.aws.amazon.com/sagemaker/latest/dg/object-detection-api-config.html

In [None]:
def set_hyperparameters(num_epochs, lr_steps):
    num_classes = 1
    num_training_samples = 898
    
    od_model.set_hyperparameters(
        base_network = "resnet-50",  #cnn model arch for images
        use_pretrained_model = 1,    #transfer learning
        num_classes = num_classes,   # 1 class for plastic detection 
        epochs = num_epochs,         # how many times the training loop will repeat over images
        lr_scheduler_step = lr_steps, # at these steps the lr will decrease 
        lr_scheduler_factor = 0.1,    #factor of decrease
        momentum = 0.9,               # for sgd optim
        weight_decay = 0.0005,
        nms_threshold = 0.45,
        image_shape = 512,
        num_training_samples = 898   #num of training samples
    )

set_hyperparameters(100, "50,70,80,90,95")

### Hyperparameter Tuning

In [None]:
from sagemaker.tuner import CategoricalParameter, ContinuousParameter, HyperparameterTuner


hyperparameter_ranges = {"learning_rate": ContinuousParameter(0.001,0.1),
    "mini_batch_size": CategoricalParameter([8,16]),
    "optimizer": CategoricalParameter(["sgd","adam"])   

}

Setting Tuner: MAP (Mean average precision as the evaluation metric)

Source: https://jonathan-hui.medium.com/map-mean-average-precision-for-object-detection-45c121a31173

In [None]:
max_jobs = 8  # this could be less, as this costs
max_parallel_jobs = 1
objective_metric_name = "validation:mAP" #on validation set map, not training 
objective_type = "Maximize"

tuner = HyperparameterTuner(estimator = od_model,
                            objective_metric_name = objective_metric_name,
                            hyperparameter_ranges = hyperparameter_ranges,
                            objective_type = objective_type,
                            max_jobs = max_jobs,
                            max_parallel_jobs = max_parallel_jobs          
                           )

Starting training job

In [None]:
train_data = sagemaker.inputs.TrainingInput(
            s3_train_data,  #path of train data
            distribution = "FullyReplicated",  #boto3 data distribution type
            content_type = "application/x-recordio",
            s3_data_type = "S3Prefix"
)

validation_data = sagemaker.inputs.TrainingInput(
            s3_validation_data, #path of validation data
            distribution = "FullyReplicated",
            content_type = "application/x-recordio", #recordio format
            s3_data_type = "S3Prefix"
)

data_channels = {"train": train_data, "validation": validation_data}

In [None]:
tuner.fit(inputs = data_channels, logs = True)   #with training job running, you can switch off the kernel instances

The process after this is monitor in AWS Sagemaker AI and the best training job model is used for further use