In [1]:
#!pip install wget

In [2]:
import sagemaker, os, pandas as pd
import numpy as np

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = 'sagemaker/DEMO-pytorch-bert'

role = sagemaker.get_execution_role()

# Prepare training data

### Download data

In [3]:
url = 'https://nyu-mll.github.io/CoLA/cola_public_1.1.zip'
if not os.path.exists('./cola_public_1.1.zip'):
    #!wget.download(url, './cola_public_1.1.zip')
    !curl -o ./cola_public_1.1.zip https://nyu-mll.github.io/CoLA/cola_public_1.1.zip
if not os.path.exists('./cola_public/'):
    !unzip cola_public_1.1.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  249k  100  249k    0     0  4155k      0 --:--:-- --:--:-- --:--:-- 4155k


### Get sentence and label

In [4]:
df = pd.read_csv('./cola_public/raw/in_domain_train.tsv', \
                 sep='\t',header=None, usecols=[1,3], names=['label','sentence'])
sentences= df.sentence.values
labels = df.label.values

In [5]:
print(sentences[20:25])
print(labels[20:25])

['The professor talked us.' 'We yelled ourselves hoarse.'
 'We yelled ourselves.' 'We yelled Harry hoarse.'
 'Harry coughed himself into a fit.']
[0 1 0 0 1]


In [6]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(df)
train.to_csv('./cola_public/train.csv', index=False)
test.to_csv('./cola_public/test.csv', index=False)

In [7]:
inputs_train = sagemaker_session.upload_data('./cola_public/train.csv', bucket=bucket,key_prefix=prefix)
inputs_test = sagemaker_session.upload_data('./cola_public/test.csv', bucket=bucket,key_prefix=prefix)

# Run training

In [11]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(entry_point='train_deploy.py',source_dir='code',
                    role=role,
                    framework_version='1.3.1',
                    train_instance_count=2, #this script only support distributed training for GPU instances. 
                    train_instance_type='ml.p3.2xlarge',
                    hyperparameters={
                        'epochs': 1,
                        'num_labels':2,
                        'backend': 'gloo'
                    })

estimator.fit({'training': inputs_train, 'testing':inputs_test})

2020-01-31 16:36:49 Starting - Starting the training job...
2020-01-31 16:36:51 Starting - Launching requested ML instances......
2020-01-31 16:37:56 Starting - Preparing the instances for training......
2020-01-31 16:39:01 Downloading - Downloading input data...
2020-01-31 16:39:38 Training - Downloading the training image.......[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2020-01-31 16:40:50,512 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2020-01-31 16:40:50,537 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[35mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[35mbash: no job control in this shell[0m
[35m2020-01-31 16:40:53,280 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[35m2020-01-31 16:40:53,306 sagemaker_pytorch_co


[35mDistributed training - True[0m
[34mDistributed training - True[0m
[34mNumber of gpus available - 1[0m
[34mINFO:__main__:Average training loss: 0.5166329507734261[0m
[34mInitialized the distributed environment: 'gloo' backend on 2 nodes. Current host rank is 0. Number of gpus: 1
[0m
[34mGet train data loader[0m
[35mNumber of gpus available - 1[0m
[35mINFO:__main__:Average training loss: 0.5512401800529629[0m
[35mInitialized the distributed environment: 'gloo' backend on 2 nodes. Current host rank is 1. Number of gpus: 1
[0m
[35mGet train data loader[0m
[35mINFO:__main__:Test set: Accuracy: 0.7608695652173914[0m
[35mProcesses 3207/6413 (50%) of train data
[0m
[35mProcesses 2138/2138 (100%) of test data[0m
[35mINFO:__main__:Saving tuned model.[0m
[35mStarting BertForSequenceClassification[0m
[35mINFO:transformers.configuration_utils:Configuration saved in /opt/ml/model/config.json
[0m
[35mINFO:transformers.modeling_utils:Model weights saved in /opt/ml


2020-01-31 16:41:58 Uploading - Uploading generated training model
2020-01-31 16:43:39 Completed - Training job completed
Training seconds: 556
Billable seconds: 556


# Host

In [12]:
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge', content_type='application/json')

-------------------!

In [13]:
from sagemaker.predictor import json_deserializer, json_serializer

predictor.content_type = 'application/json'
predictor.accept = 'application/json'
predictor.serializer = json_serializer
predictor.deserializer = json_deserializer

In [15]:
result = predictor.predict("Somebody just left - guess who.")
print(np.argmax(result, axis=1))

[1]


# Use model that have been trained

In [6]:
from sagemaker.pytorch.model import PyTorchModel 
pytorch_model = PyTorchModel(model_data='s3://sagemaker-us-east-1-084313272408/pytorch-training-2020-01-10-15-06-55-841/model.tar.gz',
                             role=role,
                             framework_version='1.3.1',
                             source_dir='code',
                             entry_point='train_deploy.py')

predictor = pytorch_model.deploy(instance_type='ml.c4.xlarge', initial_instance_count=1)

---------------------------------------------------------------------------------------------------------------!