In [None]:
import os
import boto3
import sagemaker

sagemaker_session = sagemaker.session.Session()

session = boto3.session.Session()
region = boto3.Session().region_name  # must be in the same region as the S3 data!
bucket = "sagemaker-bitty-magazines"  # pipeline steps use S3 storage extensively
model_package_group_name = f"BittiModelPackageGroupName"  # enables model versioning
prefix = 'sagemaker_pipelines_bitti'
turicreate_logs_path = "s3://{}/{}/logs".format(bucket, prefix)

print('SageMaker ver: ' + sagemaker.__version__)

In [None]:
# fetch the execution role from Secrets Manager
client_secrets = session.client(
    service_name='secretsmanager',
    region_name=region
)
secret_role = "bitti-pipeline-execution-role"
secret_account_id = "bitti-pipeline-account-id"
role = client_secrets.get_secret_value(SecretId=secret_role)["SecretString"]
account_id = client_secrets.get_secret_value(SecretId=secret_account_id)["SecretString"]

### Preprocessing step

Download the dataset, convert it into Turi Create's `SFrame` object, and save the output on S3 in a train/test split.

In [None]:
from sagemaker.workflow.parameters import ParameterInteger, ParameterString, ParameterFloat
from sagemaker.workflow.steps import CacheConfig


cache_config = CacheConfig(enable_caching=True, expire_after="30d")
source_dir = "pipelines/bitti"  # local scripts are taken from this directory

input_data = ParameterString(
    name="InputData",
    default_value=f"s3://{bucket}/bitti-data-yolo-format/")

processing_instance_count = ParameterInteger(
    name="ProcessingInstanceCount",
    default_value=1)

processing_instance_type = ParameterString(
    name="ProcessingInstanceType",
    default_value="ml.m5.large")  # ml.t3.medium runs out of RAM on eval stage

training_instance_type = ParameterString(
    name="TrainingInstanceType",
    default_value="ml.g4dn.4xlarge")

training_batch_size = ParameterInteger(
    name="TrainingBatchSize",
    #default_value=0  # will be determined automatically
    default_value=32
    )

training_max_iterations = ParameterInteger(
    name="MaxIterations",
    #default_value=0  # will be determined automatically
    default_value=18000
    )

turicreate_training_uri = ParameterString(
    name="TuriCreateTrainingURI",
    default_value=f"{account_id}.dkr.ecr.eu-central-1.amazonaws.com/tf-turicreate:latest")

turicreate_processing_uri = ParameterString(
    name="TuriCreateProcessingURI",
    default_value=f"{account_id}.dkr.ecr.eu-central-1.amazonaws.com/tf-turicreate:processing")

model_approval_status = ParameterString(
    name="ModelApprovalStatus",
    #default_value="PendingManualApproval")
    default_value="Approved")

model_approval_map_threshold = ParameterFloat(
    name="ModelApprovalmAPThreshold",
    default_value=0.7)

### Data augmentation step

The data set that was collected is slow, just around fifty images. Furthermore, all the magazines are placed with the same top-down view with labels horizontally aligned. We are going to change that.

In [None]:
from sagemaker.processing import ScriptProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep


magazine_augmentor = ScriptProcessor(
    image_uri=str(turicreate_processing_uri),
    command=["python3"],
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    #env={"RotationAngle": "15"},  # TODO: add configurability
    base_job_name="script-magazine-augmentation",
    role=role)

step_data_augmentation = ProcessingStep(
    name="DataAugmentation",
    processor=magazine_augmentor,
    inputs=[
      ProcessingInput(source=input_data, destination="/opt/ml/processing/input"),  
    ],
    outputs=[
        ProcessingOutput(output_name="augmented", source="/opt/ml/processing/output"),
    ],
    code=f"{source_dir}/augmentation.py",
    cache_config=cache_config)

### Preprocessing step

Download the dataset, convert it into Turi Create's `SFrame` object, and save the output on S3 in a train/test split.

In [None]:
sframes_preproessor = ScriptProcessor(
    image_uri=str(turicreate_processing_uri),
    command=["python3"],
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    env={"TrainSplitFraction": "0.9"},  # TODO: make it a pipeline param
    base_job_name="script-sframe-conversion",
    role=role)

step_sframe_process = ProcessingStep(
    name="BittiDataProcessing",
    processor=sframes_preproessor,
    inputs=[
      ProcessingInput(
            source=step_data_augmentation.properties.ProcessingOutputConfig.Outputs["augmented"].S3Output.S3Uri,
            destination="/opt/ml/processing/input")
    ],
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/output_train"),
        ProcessingOutput(output_name="test", source="/opt/ml/processing/output_test")
    ],
    code=f"{source_dir}/preprocessing.py",
    cache_config=cache_config)

### Now for the main bit - the training step

In [None]:
from sagemaker.tensorflow import TensorFlow


model_path = f"s3://{bucket}/output_model"

# Regular expressions are a pain, use the playground here: https://regex101.com/r/kopij0/1
turicreate_metrics = [{'Name': 'train:loss', 'Regex': "'train:loss': (?:\| [0-9]+ \| )([0-9]+[.][0-9]+)"}]

tf_train = TensorFlow(base_job_name='bitti-turicreate-pipelines',
                      entry_point='training.py',
                      source_dir=source_dir,
                      output_path=model_path,  # don't use model_dir hyperparam!
                      role=role,
                      image_uri=str(turicreate_training_uri),
                      hyperparameters={'max-iterations': int(training_max_iterations),
                                       'batch-size': int(training_batch_size)},
                      instance_count=1,
                      instance_type=str(training_instance_type),
                      metric_definitions=turicreate_metrics,
                      input_mode='File')

In [None]:
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

# TODO: change into Pipe - but that would need additional read f-ions in training.py

step_train = TrainingStep(
    name="ModelTraining",
    estimator=tf_train,
    inputs={
        "train": TrainingInput(step_sframe_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri,
                               #content_type="application/octet-stream",  # Tested, not needed for File mode
                               input_mode="File"),
        "test": TrainingInput(step_sframe_process.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
                              #content_type="application/octet-stream",
                              input_mode="File")},
    cache_config=cache_config)

### Define the evaluation step

We have trained the model, but we need to validate it too. What happened the first time was, due to the EXIF tags half of images were not rotated properly relative to the labels. Needless to say, that led to the mAP score to be close to nill. That's a good example for why the validation step is needed - only models that *work* should be carried on with.

In [None]:
from sagemaker.processing import ScriptProcessor
from sagemaker.workflow.properties import PropertyFile


script_eval = ScriptProcessor(
    image_uri=str(turicreate_processing_uri),
    command=["python3"],
    instance_type=processing_instance_type,
    instance_count=1,
    base_job_name="script-bitti-eval",
    role=role)

evaluation_report = PropertyFile(
    name="EvaluationReport",
    output_name="evaluation",
    path="evaluation.json")

step_eval = ProcessingStep(
    name="ModelEvaluation",
    processor=script_eval,
    inputs=[
        ProcessingInput(
            source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
            destination="/opt/ml/processing/model"),
        ProcessingInput(
            source=step_sframe_process.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
            destination="/opt/ml/processing/test")],
    outputs=[
        ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation")],
    code=f"{source_dir}/evaluation.py",
    property_files=[evaluation_report])

In [None]:
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
from sagemaker.workflow.condition_step import ConditionStep, JsonGet


cond_map = ConditionGreaterThanOrEqualTo(
    left=JsonGet(
        step=step_eval,
        property_file=evaluation_report,
        json_path="regression_metrics.mAP.value"),
    right=model_approval_map_threshold)

In [None]:
from sagemaker.model_metrics import MetricsSource, ModelMetrics 
from sagemaker.workflow.step_collections import RegisterModel


model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri="{}/evaluation.json".format(
            step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
        ),
        content_type="application/json"))

step_register = RegisterModel(
    name="BittiRegisterModel",
    estimator=tf_train,
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    content_types=["application/octet-stream"],
    response_types=["application/octet-stream"],
    inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances=["ml.m5.xlarge"],
    model_package_group_name=model_package_group_name,
    approval_status=model_approval_status,
    model_metrics=model_metrics)

In [None]:
script_publish = ScriptProcessor(
    image_uri=str(turicreate_processing_uri),
    command=["python3"],
    instance_type=processing_instance_type,
    instance_count=1,
    base_job_name="script-bitti-publish",
    role=role)

step_publish = ProcessingStep(
    name="PublishViaAPI",
    processor=script_publish,
    inputs=[
        ProcessingInput(
            source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
            destination="/opt/ml/processing/model")],
    code=f"{source_dir}/publish_to_api.py")

In [None]:
step_cond = ConditionStep(
    name="BittymAPcheck",
    conditions=[cond_map],
    if_steps=[step_register, step_publish],
    else_steps=[])

In [None]:
from sagemaker.workflow.pipeline import Pipeline


pipeline_name = "BittiPipeline"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        processing_instance_count,
        processing_instance_type,
        training_instance_type,
        training_batch_size,
        training_max_iterations,
        model_approval_status,
        input_data,
        model_approval_map_threshold
    ],
    steps=[step_data_augmentation, step_sframe_process, step_train, step_eval, step_cond],
)

In [None]:
pipeline.upsert(role_arn=role)

In [None]:
execution = pipeline.start()

### Generate a CloudFormation template out of it all (WIP)

In [None]:
desc = pipeline.describe()

cf_template = {
    "Resources": {
        "SagemakerObjectDetectionPipeline": {
            "Type": "AWS::SageMaker::Pipeline",
            "Properties": {
                "PipelineName": desc["PipelineName"],
                "PipelineDisplayName": desc["PipelineDisplayName"],
                "PipelineDescription": "Magazine logo detection for AIGA project",
                "PipelineDefinition": {
                    "PipelineDefinitionBody": desc["PipelineDefinition"]
                },
            },
            "Metadata": {
                "AWS::CloudFormation::Designer": {
                    "id": "5728c514-4c16-42af-915b-1f8be2094346"
                }
            }
        }
    },
    "Metadata": {
        "AWS::CloudFormation::Designer": {
            "5728c514-4c16-42af-915b-1f8be2094346": {
                "size": {
                    "width": 60,
                    "height": 60
                },
                "position": {
                    "x": 70,
                    "y": 90
                },
                "z": 1,
                "embeds": []
            }
        }
    }
}