In [None]:
import sagemaker
from sagemaker import get_execution_role

In [None]:
role = sagemaker.get_execution_role()
sess = sagemaker.Session()

In [None]:
%store -r bucket

In [None]:
training_input_path = f's3://{bucket}/processing_output/train_data'
val_input_path = f's3://{bucket}/processing_output/validation_data'

### Set up Huggingface training job

In [None]:
from sagemaker.huggingface import HuggingFace

# hyperparameters, which are passed into the training job
hyperparameters={'epochs': 2,
                 'train_batch_size': 32,
                 'model_name':'distilbert-base-uncased'
                 }

distribution = {'smdistributed':{'dataparallel':{ 'enabled': True }}}

In [None]:
huggingface_estimator = HuggingFace(entry_point='train.py',
                            source_dir='./scripts',
                            instance_type='ml.p3.16xlarge',
                            instance_count=2,
                            role=role,
                            transformers_version='4.6',
                            pytorch_version='1.7',
                            py_version='py36',
                            output_path=f's3://{bucket}/training_output/',
                            base_job_name="az-ade-training",
                            hyperparameters=hyperparameters,
                            disable_profiler=True,
                            distribution=distribution)

In [None]:
# starting the train job with our uploaded datasets as input
huggingface_estimator.fit({'train': training_input_path, 'val': val_input_path})

### Save training job name for next session

In [None]:
training_job_name = huggingface_estimator.latest_training_job.name
training_job_name

In [None]:
%store training_job_name