## Classifying news with HuggingFace and PyTorch on Amazon SageMaker

In [None]:
!pip install -U sagemaker

In [None]:
import sagemaker
from sagemaker.huggingface import HuggingFace

In [None]:
# gets role for executing training job
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
prefix = "news"
role = sagemaker.get_execution_role()

In [None]:
hyperparameters = {
	'model_name_or_path':'amazon/bort',
	'output_dir':'/opt/ml/model',
    'train_file':'/opt/ml/input/data/training/train.csv',
    'test_file':'/opt/ml/input/data/testing/test.csv',
    'do_train':True,
    'do_eval':True,
    'num_train_epochs': 1
	# add your remaining hyperparameters
	# more info here https://github.com/huggingface/transformers/tree/v4.4.2/examples/text-classification
}

In [None]:
# git configuration to download our fine-tuning script
git_config = {'repo': 'https://github.com/huggingface/transformers.git','branch': 'v4.4.2'}

In [None]:
# creates Hugging Face estimator
huggingface_estimator = HuggingFace(
	entry_point='run_glue.py',
	source_dir='./examples/text-classification',
	instance_type='ml.p3.2xlarge',
	instance_count=1,
	role=role,
	git_config=git_config,
	transformers_version='4.4.2',
	pytorch_version='1.6.0',
	py_version='py36',
	hyperparameters = hyperparameters,
    disable_profiler=True
)

In [None]:
training_path='s3://{}/{}/training'.format(bucket, prefix)
testing_path='s3://{}/{}/testing'.format(bucket, prefix)
# starting the train job
huggingface_estimator.fit({"training": training_path, "testing": testing_path})