# Transformers Bert using SageMaker Pytorch Estimator

## Setup environment

In [5]:
import os
import sagemaker
from sagemaker.pytorch import PyTorch as PyTorchEstimator
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role() # we are using the notebook instance role for training in this example
bucket = sagemaker_session.default_bucket()

## Define data inputs from S3

In [6]:
# Replace with your S3 dataset path
inputs = {'train': 's3://sagemaker-eu-west-1-366243680492/data-processing-2020-06-12-08-37-01-419/output/preprocessed/MRPC'}
print(inputs)

{'train': 's3://sagemaker-eu-west-1-366243680492/data-processing-2020-06-12-08-37-01-419/output/preprocessed/MRPC'}


## Train a Bert model

In [7]:
hyperparameters={
        "task": "mrpc",
        "model_name_or_path":"bert-base-cased",
        "data_dir": '/opt/ml/input/data/train',
        "output_dir": '/opt/ml/model',
        "num_train_epochs": 1,
        "learning_rate": 2e-5,
        "train_batch_size": 64,
        "seed": 42,
        "do_train": True,
        "do_predict": True
    } 

metric_definitions = [
    {'Name': 'mape','Regex': '\'mape\': ([0-9\\.]+)'},
    {'Name': 'validation_loss','Regex': '\'val_loss\': ([0-9\\.]+)'},
    {'Name': 'f1_score','Regex': '\'f1\': ([0-9\\.]+)'}
]

In [8]:
estimator = PyTorchEstimator(
    entry_point='run_pl_glue.py',
    source_dir='source_dir',
    role=role,
    train_instance_count=1,
    train_instance_type='ml.p3.2xlarge',
    train_volume_size=50,
    metric_definitions=metric_definitions,
    hyperparameters=hyperparameters,
    framework_version='1.5.0',
    py_version='py3',
)

In [None]:
estimator.fit(inputs)

## Generate predictions with Batch Transform 

## Step function SDK

In [None]:
# import sys
# !{sys.executable} -m pip install --upgrade stepfunctions

In [9]:
import stepfunctions
import logging

from stepfunctions.template.pipeline import TrainingPipeline
stepfunctions.set_stream_logger(level=logging.INFO)

# StepFunctionsWorkflowExecutionRole ARN from above
workflow_execution_role = 'arn:aws:iam::366243680492:role/StepFunctionsWorkflowExecutionRole'

In [10]:
pipeline = TrainingPipeline(
    estimator=estimator,
    role=workflow_execution_role,
    inputs=inputs,
    s3_bucket=bucket
)

In [15]:
# print(pipeline.workflow.definition.to_json(pretty=True))

In [12]:
pipeline.render_graph()

In [13]:
pipeline.create()

[32m[INFO] Workflow created successfully on AWS Step Functions.[0m


'arn:aws:states:eu-west-1:366243680492:stateMachine:training-pipeline-2020-06-12-10-23-19'

In [14]:
pipeline.execute()

[32m[INFO] Workflow execution started successfully on AWS Step Functions.[0m


## HPO

In [None]:
hyperparameters={
        "task": "mrpc",
        "model_name_or_path":"bert-base-cased",
        "data_dir": '/opt/ml/input/data/train',
        "output_dir": '/opt/ml/model',
        "train_batch_size": 64,
        "seed": 42,
        "do_train": True,
        "do_predict": True
    } 

hyperparameter_ranges = {
        "num_train_epochs": IntegerParameter(2, 8),
        "learning_rate": ContinuousParameter(1e-5, 1e-3, scaling_type="Logarithmic"),
        "weight_decay": ContinuousParameter(0.0, 0.1),
        "adam_epsilon": ContinuousParameter(1e-8, 1e-6, scaling_type="Logarithmic")
    }

objective_metric_name = 'acc'
objective_type = 'Maximize'

In [None]:
estimator = PyTorchEstimator(
    entry_point='run_pl_glue.py',
    source_dir='source_dir',
    role=role,
    train_instance_count=1,
    train_instance_type='ml.p3.2xlarge',
    train_volume_size=50,
    metric_definitions=metric_definitions,
    hyperparameters=hyperparameters,
    framework_version='1.5.0',
    py_version='py3',
)

tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=50,
                            max_parallel_jobs=2,
                            objective_type=objective_type)

tuner.fit(inputs)