Using kernel `conda_pytorch_latest_p36`

In [1]:
import sys
sys.path.append('../../../')

In [2]:
from pathlib import Path
import os
import random
import json


In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
import torch
import boto3
from transformers import (
    AutoTokenizer, 
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification,
    TrainingArguments, 
    Trainer
)
import sagemaker
from sagemaker import get_execution_role
from sagemaker.transformer import Transformer
from sagemaker.pytorch import PyTorchModel

In [4]:
from deep.constants import *

In [5]:
%load_ext autoreload
%autoreload 2

## Sagemaker Prep

In [6]:
default_bucket = 'deep-experiments-sagemaker-bucket'
sess = sagemaker.Session(default_bucket=default_bucket)

role = 'AmazonSageMaker-ExecutionRole-20210519T102514'
print(
    role
)  # This is the role that SageMaker would use to leverage AWS resources (S3, CloudWatch) on your behalf

bucket = SAGEMAKER_BUCKET
prefix = "huggingface/first"  # Replace with the prefix under which you want to store the data if needed


AmazonSageMaker-ExecutionRole-20210519T102514


### Bucket upload

In [7]:
pt_mnist_model_data = 's3://sagemaker-us-east-1-961104659532/pytorch-training-2021-05-26-13-34-05-285/output/model.tar.gz'

In [8]:
hyperparameters={
    'train_batch_size': 32,
    'model_name': 'distilbert-base-uncased'
}

model = PyTorchModel(
    entry_point="batch_inference.py",
    source_dir=str(SCRIPTS_TRAINING_PATH / 'stefano/pytorch_estimator_base'),
    output_dir
    role=role,
    model_data=pt_mnist_model_data,
    framework_version="1.8.1",
    py_version="py3",
)

In [9]:
# then create transformer from PyTorchModel object
transformer = model.transformer(
    instance_count=1, 
    instance_type='ml.m5.large', 
    strategy='MultiRecord',
    assemble_with='Line',
    output_path='s3://{}/batch_transform_output'.format(bucket),
)

In [10]:
dummy_data = pd.DataFrame({"inputs": ['There are many health problems', 'There are many more health problems']})
dummy_data.to_csv('s3://deep-experiments-sagemaker-bucket/test1/prova.csv')

In [12]:
transformer.transform(
    's3://deep-experiments-sagemaker-bucket/test1/prova.csv',
    content_type='text/csv', 
    split_type='Line', 
    logs=True, 
    wait=False
)