In [None]:
import os
import boto3
import sagemaker

sagemaker_session = sagemaker.session.Session()

role = sagemaker.get_execution_role()

region = boto3.Session().region_name  # must be in the same region as the S3 data!
bucket = "sagemaker-bitty-magazines"  # pipeline steps use S3 storage extensively
model_package_group_name = f"BittiModelPackageGroupName"  # enables model versioning
prefix = 'sagemaker_pipelines_bitti'
turicreate_logs_path = "s3://{}/{}/logs".format(bucket, prefix)

print('Bucket: {}'.format(bucket))
print('Execution role: {}'.format(role))
print('SageMaker ver: ' + sagemaker.__version__)

### Preprocessing step

Download the dataset, convert it into Turi Create's `SFrame` object, and save the output on S3 in a train/test split.

In [6]:
# defining some variables

s3_input = f"s3://{bucket}/bitti-data-yolo-format/"
source_dir = "pipelines/bitti"

In [27]:
from sagemaker.workflow.parameters import ParameterInteger, ParameterString, ParameterFloat
from sagemaker.workflow.steps import CacheConfig

cache_config = CacheConfig(enable_caching=True, expire_after="30d")

processing_instance_count = ParameterInteger(
    name="ProcessingInstanceCount",
    default_value=1)

processing_instance_type = ParameterString(
    name="ProcessingInstanceType",
    default_value="ml.t3.large")  # ml.t3.medium runs out of RAM on eval stage

training_instance_type = ParameterString(
    name="TrainingInstanceType",
    default_value="ml.c5.9xlarge")  # TODO: get the GPU tested and running

training_batch_size = ParameterInteger(
    name="TrainingBatchSize",
    default_value=32)

training_max_iterations = ParameterInteger(
    name="MaxIterations",
    default_value=300)

training_n_cores = ParameterInteger(
    name="NumPyLambdaWorkers",
    default_value=36)

# this will also be used for preprocessing - TC runs on TF now
training_instance_tf_version = ParameterString(
    name="TrainingInstanceTFVersion",
    default_value="2.3")

model_approval_status = ParameterString(
    name="ModelApprovalStatus",
    #default_value="PendingManualApproval")
    default_value="Approved")

model_approval_map_threshold = ParameterFloat(
    name="ModelApprovalmAPThreshold",
    default_value=0.7)

input_data = ParameterString(
    name="InputData",
    default_value=s3_input)

In [28]:
from sagemaker.processing import ScriptProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep


preprocessing_image_uri = sagemaker.image_uris.retrieve(
    framework="tensorflow",
    image_scope="inference",
    region=region,
    version=str(training_instance_tf_version),
    py_version="py37",
    instance_type=processing_instance_type)

sframes_preproessor = ScriptProcessor(
    image_uri=preprocessing_image_uri,
    command=["python3"],
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    env={"TrainSplitFraction": "0.9"},  # TODO: make it a pipeline param
    base_job_name="script-sframe-conversion",
    role=role)

step_sframe_process = ProcessingStep(
    name="BittiDataProcessing",
    processor=sframes_preproessor,
    inputs=[
      ProcessingInput(source=input_data, destination="/opt/ml/processing/input"),  
    ],
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/output_train"),
        ProcessingOutput(output_name="test", source="/opt/ml/processing/output_test")
    ],
    code=f"{source_dir}/preprocessing.py",
    cache_config=cache_config
)

### Now for the main bit - the training step

In [30]:
from sagemaker.tensorflow import TensorFlow


model_path = f"s3://{bucket}/output_model"
image_uri = sagemaker.image_uris.retrieve(
    framework="tensorflow",
    image_scope="training",
    region=region,
    version=str(training_instance_tf_version),
    py_version="py37",
    instance_type=training_instance_type)

# Regular expressions are a pain, use the playground here: https://regex101.com/r/kopij0/1
# TODO: works for reporting, but how to publish it via CloudWatch metrics as well?
turicreate_metrics = [{'Name': 'train:loss', 'Regex': r"(?:\| [0-9]+ \| )([0-9]+[.][0-9]+)"}]

tf_train = TensorFlow(base_job_name='bitti-turicreate-pipelines',
                      entry_point='training.py',
                      source_dir=source_dir,
                      output_path=model_path,  # don't use model_dir hyperparam!
                      role=role,
                      image_uri=image_uri,
                      hyperparameters={'max-iterations': int(training_max_iterations),
                                       'batch-size': int(training_batch_size),
                                       'number-pylambda-workers': int(training_n_cores)
                                      },
                      instance_count=1,
                      instance_type=str(training_instance_type),
                      metric_definitions=turicreate_metrics,
                      input_mode='File')

In [31]:
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

# TODO: change into Pipe - but that would need additional read f-ions in training.py

step_train = TrainingStep(
    name="ModelTraining",
    estimator=tf_train,
    inputs={
        "train": TrainingInput(step_sframe_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri,
                               #content_type="application/octet-stream",  # Tested, not needed for File mode
                               input_mode="File"),
        "test": TrainingInput(step_sframe_process.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
                              #content_type="application/octet-stream",
                              input_mode="File")},
    cache_config=cache_config)

### Define the evaluation step

We have trained the model, but we need to validate it too. What happened the first time was, due to the EXIF tags half of images were not rotated properly relative to the labels. Needless to say, that led to the mAP score to be close to nill. That's a good example for why the validation step is needed - only models that *work* should be carried on with.

In [33]:
from sagemaker.processing import ScriptProcessor
from sagemaker.workflow.properties import PropertyFile


script_eval = ScriptProcessor(
    image_uri=image_uri,
    command=["python3"],
    instance_type=processing_instance_type,
    instance_count=1,
    base_job_name="script-bitti-eval",
    role=role)

evaluation_report = PropertyFile(
    name="EvaluationReport",
    output_name="evaluation",
    path="evaluation.json")

step_eval = ProcessingStep(
    name="ModelEvaluation",
    processor=script_eval,
    inputs=[
        ProcessingInput(
            source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
            destination="/opt/ml/processing/model"),
        ProcessingInput(
            source=step_sframe_process.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
            destination="/opt/ml/processing/test")],
    outputs=[
        ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation")],
    code=f"{source_dir}/evaluation.py",
    property_files=[evaluation_report])

In [34]:
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
from sagemaker.workflow.condition_step import ConditionStep, JsonGet


cond_map = ConditionGreaterThanOrEqualTo(
    left=JsonGet(
        step=step_eval,
        property_file=evaluation_report,
        json_path="regression_metrics.mAP.value"),
    right=model_approval_map_threshold)

In [35]:
from sagemaker.model_metrics import MetricsSource, ModelMetrics 
from sagemaker.workflow.step_collections import RegisterModel


model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri="{}/evaluation.json".format(
            step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
        ),
        content_type="application/json"
    )
)
step_register = RegisterModel(
    name="BittiRegisterModel",
    estimator=tf_train,
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    content_types=["application/octet-stream"],
    response_types=["application/octet-stream"],
    inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances=["ml.m5.xlarge"],
    model_package_group_name=model_package_group_name,
    approval_status=model_approval_status,
    model_metrics=model_metrics)

In [37]:
script_publish = ScriptProcessor(
    image_uri=image_uri,
    command=["python3"],
    instance_type=processing_instance_type,
    instance_count=1,
    base_job_name="script-bitti-publish",
    role=role)

step_publish = ProcessingStep(
    name="PublishViaAPI",
    processor=script_publish,
    inputs=[
        ProcessingInput(
            source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
            destination="/opt/ml/processing/model")],
    code=f"{source_dir}/publish_to_api.py")

In [38]:
step_cond = ConditionStep(
    name="BittymAPcheck",
    conditions=[cond_map],
    if_steps=[step_register, step_publish],
    else_steps=[])

In [39]:
from sagemaker.workflow.pipeline import Pipeline


pipeline_name = "BittiPipeline"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        processing_instance_count,
        processing_instance_type,
        training_instance_type,
        training_batch_size,
        training_max_iterations,
        training_n_cores,
        training_instance_tf_version,
        model_approval_status,
        input_data,
        model_approval_map_threshold
    ],
    steps=[step_sframe_process, step_train, step_eval, step_cond],
)

In [None]:
pipeline.upsert(role_arn=role)

In [41]:
execution = pipeline.start()

In [None]:
execution.describe()

In [None]:
execution.list_steps()

In [None]:
pipeline.describe()