In [1]:
!pip -q install sagemaker transformers --upgrade

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
awscli 1.25.42 requires botocore==1.27.42, but you have botocore 1.27.66 which is incompatible.
aiobotocore 2.0.1 requires botocore<1.22.9,>=1.22.8, but you have botocore 1.27.66 which is incompatible.[0m[31m
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p38/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [80]:
import sagemaker

print(sagemaker.__version__)

role=sagemaker.get_execution_role()

2.108.0


In [81]:
# we're using the output from a SageMaker Processing job
input_path = 's3://sagemaker-eu-central-1-843182712965/sagemaker-scikit-learn-2022-09-03-18-17-21-834/output'

train_input_path = '{}/{}'.format(input_path, 'train_data')
valid_input_path = '{}/{}'.format(input_path, 'valid_data')
test_input_path  = '{}/{}'.format(input_path, 'test_data')

In [82]:
hyperparameters={
    'epochs': 3,
    'model_name': 'google/vit-base-patch16-224-in21k',
}

In [26]:
entry_point = 'train_hf_trainer.py'

In [27]:
from sagemaker.huggingface import HuggingFace

huggingface_estimator = HuggingFace(
    role=role,
    # Fine-tuning script
    entry_point=entry_point,
    hyperparameters=hyperparameters,
    # Infrastructure
    transformers_version='4.17.0',
    pytorch_version='1.10.2',
    py_version='py38',
    instance_type='ml.g4dn.2xlarge',
    instance_count=1
)

In [28]:
huggingface_estimator.fit(
    {'train': train_input_path, 
     'valid': valid_input_path,
     'test': test_input_path,
    })

2022-09-03 23:36:07 Starting - Starting the training job...
2022-09-03 23:36:32 Starting - Preparing the instances for trainingProfilerReport-1662248167: InProgress
......
2022-09-03 23:37:32 Downloading - Downloading input data...
2022-09-03 23:38:02 Training - Downloading the training image..............................
2022-09-03 23:43:04 Training - Training image download completed. Training in progress..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2022-09-03 23:43:07,049 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2022-09-03 23:43:07,074 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2022-09-03 23:43:07,080 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2022-09-03 23:43:07,550 sagemaker-training-toolkit INFO     Invoking user script[0m
[34mTrainin

In [29]:
huggingface_estimator.model_data

's3://sagemaker-eu-central-1-843182712965/huggingface-pytorch-training-2022-09-03-23-36-07-225/output/model.tar.gz'

In [None]:
%%bash -s $huggingface_estimator.model_data
aws s3 cp $1 model-hf.tar.gz
tar tvfz model-hf.tar.gz

### HuggingFace container doesn't support deployment for image classification tasks so we have to use pytorch to deploy

In [83]:
entry_point = 'train_pytorch_lightning.py'

In [85]:
from sagemaker.huggingface import HuggingFace

huggingface_estimator = HuggingFace(
    role=role,
    # Fine-tuning script
    entry_point=entry_point,
    hyperparameters=hyperparameters,
    # Infrastructure
    transformers_version='4.17.0',  # Need >= 4.10 because of https://github.com/huggingface/transformers/issues/12904
    pytorch_version='1.10.2',
    py_version='py38',
    instance_type='ml.g4dn.2xlarge',
    instance_count=1
)

In [86]:
huggingface_estimator.fit(
    {'train': train_input_path, 
     'valid': valid_input_path,
     'test': test_input_path,
    })

2022-09-10 18:02:18 Starting - Starting the training job...
2022-09-10 18:02:42 Starting - Preparing the instances for trainingProfilerReport-1662832938: InProgress
......
2022-09-10 18:03:42 Downloading - Downloading input data...
2022-09-10 18:04:17 Training - Downloading the training image.................................
2022-09-10 18:09:44 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2022-09-10 18:09:38,270 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2022-09-10 18:09:38,296 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2022-09-10 18:09:38,306 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2022-09-10 18:09:39,265 sagemaker-training-toolkit INFO     Invoking user script[0m
[34mTrain

In [214]:
huggingface_estimator.model_data

's3://sagemaker-eu-central-1-843182712965/huggingface-pytorch-training-2022-09-10-18-02-18-006/output/model.tar.gz'

In [38]:
%%bash -s $huggingface_estimator.model_data
aws s3 cp $1 model-pl.tar.gz
tar tvfz model-pl.tar.gz

download: s3://sagemaker-eu-central-1-843182712965/huggingface-pytorch-training-2022-09-04-12-16-25-136/output/model.tar.gz to ./model-pl.tar.gz
-rw-r--r-- 0/0      1032158795 2022-09-04 12:25 vit.ckpt


In [215]:
rt_predictor = huggingface_estimator.deploy(initial_instance_count=1,
                                      instance_type='ml.m5.large', 
                                      endpoint_name='HuggingFace-ViT',
                                      wait=True)

-----!

### Process image to pass to endpoint

In [76]:
import argparse, os, subprocess, sys, ast, pickle, boto3
import numpy as np
from PIL import Image
from io import BytesIO

In [102]:
from transformers import ViTFeatureExtractor
# Feature Extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')

In [107]:
!pip install datasets
from datasets import Dataset, Features, ClassLabel, Array3D

In [164]:
image = Image.open('Validation/Farm/farmstrain-153_jpg.rf.ab54108710ba9006c6cfb037ec43b1b5.jpg')
image = image.resize((224,224))
image = np.array(image, dtype=np.uint8)
image = np.moveaxis(image, source=-1, destination=0) # channels first for PyTorch

In [209]:
#this can be converted to json
inputs = feature_extractor(image)
pixel_values = inputs["pixel_values"]
pixel_values = np.array(pixel_values)
pixel_values = pixel_values.tolist()

In [267]:
#still seem to have issues with using AWS endpoints for HuggingFace image classification. We will use Heroku instead
rt_predictor.predict({"inputs": values })

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from primary with message "{
  "code": 400,
  "type": "InternalServerException",
  "message": "(\"You need to define one of the following [\u0027audio-classification\u0027, \u0027automatic-speech-recognition\u0027, \u0027feature-extraction\u0027, \u0027text-classification\u0027, \u0027token-classification\u0027, \u0027question-answering\u0027, \u0027table-question-answering\u0027, \u0027fill-mask\u0027, \u0027summarization\u0027, \u0027translation\u0027, \u0027text2text-generation\u0027, \u0027text-generation\u0027, \u0027zero-shot-classification\u0027, \u0027zero-shot-image-classification\u0027, \u0027conversational\u0027, \u0027image-classification\u0027, \u0027image-segmentation\u0027, \u0027object-detection\u0027] as env \u0027HF_TASK\u0027.\", 403)"
}
". See https://eu-central-1.console.aws.amazon.com/cloudwatch/home?region=eu-central-1#logEventViewer:group=/aws/sagemaker/Endpoints/HuggingFace-ViT in account 843182712965 for more information.

In [269]:
#delete endpoint to not incure costs
rt_predictor.delete_model()
rt_predictor.delete_endpoint()