# Train an OD Model using our Ground Truth Labels

We have a labeled dataset, and we will now use it to train an object detection model. We'll use the augmented manifest format used in the output of one of our OD labeling jobs.import sagemaker


In [None]:
import sagemaker
import boto3
import numpy as np
import json
from PIL import Image, ImageDraw
from io import BytesIO

sm_session = sagemaker.session.Session()
sagemaker_client = boto3.client('sagemaker')
s3_client = boto3.client("s3")
role = sagemaker.get_execution_role()
region = sm_session.boto_region_name
BUCKET = sm_session.default_bucket()
PREFIX = "groundtruth_demo/training"

## Download Output Manifest

In [None]:
job_name = "ground-truth-od-demo-1660034162"

job_desc = sagemaker_client.describe_labeling_job(LabelingJobName=job_name)

output_manifest = job_desc['LabelingJobOutput']['OutputDatasetS3Uri']

We install the command-line utility jq for displaying JSON and download the labeling job's Output manifest

In [None]:
%%capture
!apt-get update
!apt-get install jq -y
!aws s3 cp {output_manifest} .

Let's have a look at the manifest

In [None]:
!cat 'output.manifest' | jq .

## Splitting Data Into Training and Validation sets

Our output manifest is in JSON Lines format, meaning each line is a valid JSON object representing our labels for a particular image

In [None]:
with open("output.manifest", "r") as f:
    output = [json.loads(line) for line in f.readlines()]

# Shuffle output in place.
np.random.shuffle(output)

dataset_size = len(output)
train_test_split_index = round(dataset_size * 0.8)

train_data = output[:train_test_split_index]
validation_data = output[train_test_split_index:]

num_training_samples = 0
num_val_samples = 0

with open("train.manifest", "w") as f:
    for line in train_data:
        f.write(json.dumps(line))
        f.write("\n")
        num_training_samples += 1

with open("validation.manifest", "w") as f:
    for line in validation_data:
        f.write(json.dumps(line))
        f.write("\n")
        num_val_samples += 1
        
print("Split the data into {} training and {} validation samples".format(num_training_samples, num_val_samples)) 

## Upload training and validation data to S3

Let's upload our training data to S3

In [None]:
!aws s3 cp train.manifest s3://{BUCKET}/{PREFIX}/train.manifest
!aws s3 cp validation.manifest s3://{BUCKET}/{PREFIX}/validation.manifest

## Setup Training Job with SageMaker SDK

In [None]:
TRAINING_OUTPUT = "s3://{}/{}/output".format(BUCKET, PREFIX)

training_image = sagemaker.image_uris.retrieve(
    region=region, framework="object-detection", version="1"
)
print("Training with image {}".format(training_image))

od_model = sagemaker.estimator.Estimator(
    training_image,
    role,
    instance_count=1,
    instance_type="ml.p3.2xlarge",
    volume_size=50,
    max_run=36000,
    input_mode="Pipe",
    output_path=TRAINING_OUTPUT,
    sagemaker_session=sm_session,
)

od_model.set_hyperparameters(
    base_network="resnet-50",
    use_pretrained_model=1,
    num_classes=1,
    mini_batch_size=1,
    epochs=30,
    learning_rate=0.001,
    lr_scheduler_step="33,67",
    lr_scheduler_factor=0.1,
    optimizer="sgd",
    momentum=0.9,
    weight_decay=0.0005,
    overlap_threshold=0.5,
    nms_threshold=0.45,
    image_shape=300,
    label_width=350,
    num_training_samples=str(num_training_samples),
)

## Setup Training Job Inputs

Let's find the data attribute name which has our labels - this was configured to be the same as the training job name. If not, find it from the output manifest file

In [None]:
s3_train_data = "s3://{}/{}/train.manifest".format(BUCKET, PREFIX)
s3_validation_data = "s3://{}/{}/validation.manifest".format(BUCKET, PREFIX)

train_data = sagemaker.inputs.TrainingInput(
    s3_train_data,
    distribution="FullyReplicated",
    content_type="application/x-recordio",
    s3_data_type="AugmentedManifestFile",
    attribute_names=["source-ref", job_name],
    record_wrapping="RecordIO",
)
validation_data = sagemaker.inputs.TrainingInput(
    s3_validation_data,
    distribution="FullyReplicated",
    content_type="application/x-recordio",
    s3_data_type="AugmentedManifestFile",
    attribute_names=["source-ref", job_name],
    record_wrapping="RecordIO",
)
data_channels = {"train": train_data, "validation": validation_data}

## Launch Training Job

In [None]:
%%time
od_model.fit(inputs=data_channels, logs=True)

## Deploy Model

In [None]:
%%time
object_detector = od_model.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge")
endpoint_name = object_detector.endpoint_name

## Generate Predictions

In [None]:
# Take an unseen image, i.e. an image not seen during training
test_img_id = "12354d7c2e6dcf4b"
image_bytes = s3_client.get_object(Bucket="open-images-dataset", Key="test/{}.jpg".format(test_img_id))['Body'].read()

In [None]:
response = object_detector.predict(image_bytes, initial_args={'ContentType': 'application/x-image'})

In [None]:
predictions = json.loads(response)['prediction']
# sort predictions by Confidence score
sorted_predictions = sorted(predictions, key = lambda x: x[1], reverse = True)

In [None]:
img = Image.open(BytesIO(image_bytes))
width, height = img.size
draw = ImageDraw.Draw(img)
draw.rectangle(((sorted_predictions[0][2]*width, sorted_predictions[0][3]*height), (sorted_predictions[0][4]*width, sorted_predictions[0][5]*height)), outline="red")

In [None]:
img

## Clean Endpoint

To stop incurring cost

In [None]:
object_detector.delete_endpoint()