# Tune a sentiment analysis model using Transformers on SageMaker

## Setup environment

In [None]:
import os
import sagemaker
from sagemaker.pytorch import PyTorch as PyTorchEstimator
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = sagemaker_session.default_bucket()

## Define data inputs from S3

In [None]:
# Replace with your S3 dataset path
inputs = {'train': '<your-data-s3-path>'}
print(inputs)

## Tune

In [None]:
hyperparameters={
        "model_name":"bert-base-cased",
        "data_folder": '/opt/ml/input/data/train',
        "output_folder": '/opt/ml/model',
        "learning_rate": 2e-5,
        "batch_size": 64,
        "seed": 42,
        "max_len": 160
    }

metric_definitions = [{'Name': 'validation_accuracy',
                       'Regex': 'val_accuracy: ([0-9\\.]+)'}]

In [None]:
estimator = PyTorchEstimator(
    entry_point='train.py',
    source_dir='source_dir',
    role=role,
    train_instance_count=1,
    train_instance_type='ml.p3.2xlarge',
    train_volume_size=50,
    hyperparameters=hyperparameters,
    metric_definitions=metric_definitions,
    framework_version='1.5.0',
    py_version='py3',
)

hyperparameter_ranges = {
        'epochs': IntegerParameter(1, 3),
    }

objective_metric_name = 'validation_accuracy'
objective_type = 'Maximize'

In [None]:
tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=3,
                            max_parallel_jobs=1,
                            objective_type=objective_type)

tuner.fit(inputs)