# Automate Training with the AWS Step Functions Data Science SDK

## Install SDK

In [None]:
!pip install --upgrade -q stepfunctions

## Setup environment

In [None]:
import json
import uuid
import sagemaker
from sagemaker.pytorch import PyTorch as PyTorchEstimator
from sagemaker.processing import Processor, ProcessingInput, ProcessingOutput
import stepfunctions
from stepfunctions.inputs import ExecutionInput
from stepfunctions.workflow import Workflow, cloudformation
from stepfunctions.steps import Chain, ProcessingStep, TrainingStep, Catch, Fail, Succeed


sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role() # execution role for SageMaker
workflow_execution_role = role # execution role for Step Functions
bucket = sagemaker_session.default_bucket() # you can specify a bucket name here

## Execution input placeholders

In [None]:
job_name = uuid.uuid1().hex
processing_input = '<your-data-s3-path>' 
processing_output= f's3://{bucket}/{job_name}/data/processed/'


execution_input = ExecutionInput(
    schema={
        "JobName": str,
        "Processing": {
            "Input": str,
            "Output": str
        },
        "Training": {
            "Input": str,
        }
    }
)

## Create data processor

In [None]:
data_processor = Processor(
    role=role, 
    image_uri='<your-container-image-uri>', 
    instance_count=1, 
    instance_type="ml.m5.xlarge",
    volume_size_in_gb=30, 
    max_runtime_in_seconds=1200
)

In [None]:
input_folder = '/opt/ml/processing/input'
output_folder = '/opt/ml/processing/output'

inputs = [
    ProcessingInput(
        input_name='input',
        source=execution_input["Processing"]["Input"],
        destination=input_folder
    )
]

outputs = [
    ProcessingOutput(
        output_name='preprocessed',
        source=output_folder,
        destination=execution_input["Processing"]["Output"]
    )
]

## Create estimator

In [None]:
hyperparameters = {
    "model_name":"bert-base-cased",
    "data_folder": '/opt/ml/input/data/train',
    "output_folder": '/opt/ml/model',
    "epochs": 1,
    "learning_rate": 2e-5,
    "batch_size": 64,
    "seed": 42,
    "max_len": 160
}

metric_definitions = [{'Name': 'validation_accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'}]

estimator = PyTorchEstimator(
    entry_point='train.py',
    source_dir='source_dir',
    role=role,
    train_instance_count=1,
    train_instance_type='ml.p3.2xlarge',
    train_volume_size=50,
    hyperparameters=hyperparameters,
    metric_definitions=metric_definitions,
    framework_version='1.5.0',
    py_version='py3',
)

## Define workflow steps

In [None]:
processing_step = ProcessingStep(
    state_id="Process Data",
    processor=data_processor,
    job_name=execution_input["JobName"],
    inputs=inputs,
    outputs=outputs,
    container_arguments=[f"--input={input_folder}", f"--output={output_folder}"],
    result_path="$.Processing"
)

In [None]:
training_step = TrainingStep(
    state_id ="Train Model",
    estimator=estimator,
    job_name=execution_input["JobName"],
    data={'train': processing_output},
    result_path="$.Training",
)

In [None]:
# error catching, failure and success steps
failed = Fail(state_id="Failed")
succeed = Succeed(state_id="Succeed")

catch_failures = Catch(error_equals=["States.ALL"], next_step=failed)
processing_step.add_catch(catch_failures)
training_step.add_catch(catch_failures)

## Create workflow pipeline

In [None]:
workflow_graph = Chain([
        processing_step,
        training_step,
        succeed
])

workflow_pipeline = Workflow(
    name="TrainingWorkflow",
    definition=workflow_graph,
    execution_input=execution_input,
    role=workflow_execution_role
)

In [None]:
print(workflow_pipeline.definition.to_json(pretty=True))

In [None]:
workflow_pipeline.render_graph(portrait=True)

## Create/update state machine and execute

In [None]:
workflow_pipeline.create()
# workflow_pipeline.update(workflow_pipeline_definition)

## Workflow pipeline inputs

In [None]:
execution_inputs = {
    "JobName": job_name, 
    "Processing": {
        "Input": processing_input, 
        "Output": processing_output
    }, 
    "Training": {
        "Input": processing_output
    }
}

workflow_pipeline.execute(inputs=execution_inputs)

## Generate CloudFormation template

In [None]:
print(workflow_pipeline.get_cloudformation_template())