In [None]:
import os

# The Google Cloud Notebook product has specific requirements
IS_GOOGLE_CLOUD_NOTEBOOK = os.path.exists("/opt/deeplearning/metadata/env_version")

# Google Cloud Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_GOOGLE_CLOUD_NOTEBOOK:
    USER_FLAG = "--user"

In [1]:
!pip -q install --user --upgrade transformers
!pip -q install --user --upgrade datasets
!pip -q install --user --upgrade tqdm
!pip -q install --user --upgrade cloudml-hypertune

In [1]:
import base64
import json
import os
import random
import sys

import google.auth
from google.cloud import aiplatform
from google.cloud.aiplatform import gapic as aip
from google.cloud.aiplatform import hyperparameter_tuning as hpt
from google.protobuf.json_format import MessageToDict

In [2]:
import datasets
import numpy as np
import pandas as pd
import torch
import transformers
from datasets import ClassLabel, Sequence, load_dataset
from transformers import (AutoModelForSequenceClassification, AutoTokenizer,
                          EvalPrediction, Trainer, TrainingArguments,
                          default_data_collator)

In [3]:
print(f"Notebook runtime: {'GPU' if torch.cuda.is_available() else 'CPU'}")
print(f"PyTorch version : {torch.__version__}")
print(f"Transformers version : {datasets.__version__}")
print(f"Datasets version : {transformers.__version__}")

Notebook runtime: GPU
PyTorch version : 1.11.0
Transformers version : 2.1.0
Datasets version : 4.18.0


In [3]:
APP_NAME = "xlm-roberta-large-xnli"

In [4]:
datasets = load_dataset("imdb")



  0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
batch_size = 16
max_seq_length = 128
model_name_or_path = "joeddav/xlm-roberta-large-xnli"

In [9]:
! pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.96


In [6]:
tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path
)

In [None]:
# Dataset loading repeated here to make this cell idempotent
# Since we are over-writing datasets variable
datasets = load_dataset("imdb")

# Mapping labels to ids
# NOTE: We can extract this automatically but the `Unique` method of the datasets
# is not reporting the label -1 which shows up in the pre-processing.
# Hence the additional -1 term in the dictionary
label_to_id = {1: 1, 0: 0, -1: 0}


def preprocess_function(examples):
    """
    Tokenize the input example texts
    NOTE: The same preprocessing step(s) will be applied
    at the time of inference as well.
    """
    args = (examples["text"],)
    result = tokenizer(
        *args, padding="max_length", max_length=max_seq_length, truncation=True
    )

    # Map labels to IDs (not necessary for GLUE tasks)
    if label_to_id is not None and "label" in examples:
        result["label"] = [label_to_id[example] for example in examples["label"]]

    return result


# apply preprocessing function to input examples
datasets = datasets.map(preprocess_function, batched=True, load_from_cache_file=True)

In [8]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name_or_path
)

Downloading:   0%|          | 0.00/2.09G [00:00<?, ?B/s]

Some weights of the model checkpoint at joeddav/xlm-roberta-large-xnli were not used when initializing XLMRobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
args = TrainingArguments(
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=1,
    weight_decay=0.01,
    output_dir="/tmp/cls",
)

In [10]:
def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    preds = np.argmax(preds, axis=1)
    return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}

In [11]:
trainer = Trainer(
    model,
    args,
    train_dataset=datasets["train"],
    eval_dataset=datasets["test"],
    data_collator=default_data_collator,
    tokenizer=AutoTokenizer.from_pretrained('joeddav/xlm-roberta-large-xnli'),
    compute_metrics=compute_metrics,
)

In [12]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 25000
  Num Epochs = 1
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 1563


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2558,0.236516,0.90296


Saving model checkpoint to /tmp/cls/checkpoint-500
Configuration saved in /tmp/cls/checkpoint-500/config.json
Model weights saved in /tmp/cls/checkpoint-500/pytorch_model.bin
tokenizer config file saved in /tmp/cls/checkpoint-500/tokenizer_config.json
Special tokens file saved in /tmp/cls/checkpoint-500/special_tokens_map.json
Saving model checkpoint to /tmp/cls/checkpoint-1000
Configuration saved in /tmp/cls/checkpoint-1000/config.json
Model weights saved in /tmp/cls/checkpoint-1000/pytorch_model.bin
tokenizer config file saved in /tmp/cls/checkpoint-1000/tokenizer_config.json
Special tokens file saved in /tmp/cls/checkpoint-1000/special_tokens_map.json
Saving model checkpoint to /tmp/cls/checkpoint-1500
Configuration saved in /tmp/cls/checkpoint-1500/config.json
Model weights saved in /tmp/cls/checkpoint-1500/pytorch_model.bin
tokenizer config file saved in /tmp/cls/checkpoint-1500/tokenizer_config.json
Special tokens file saved in /tmp/cls/checkpoint-1500/special_tokens_map.json
The

TrainOutput(global_step=1563, training_loss=0.30403682198649556, metrics={'train_runtime': 2959.1032, 'train_samples_per_second': 8.449, 'train_steps_per_second': 0.528, 'total_flos': 5824590700800000.0, 'train_loss': 0.30403682198649556, 'epoch': 1.0})

In [13]:
saved_model_local_path = "./models"
!mkdir ./models

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [14]:
trainer.save_model(saved_model_local_path)

Saving model checkpoint to ./models
Configuration saved in ./models/config.json
Model weights saved in ./models/pytorch_model.bin
tokenizer config file saved in ./models/tokenizer_config.json
Special tokens file saved in ./models/special_tokens_map.json


In [15]:
history = trainer.evaluate()

The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 25000
  Batch size = 16


In [16]:
history

{'eval_loss': 0.2365160882472992,
 'eval_accuracy': 0.9029600024223328,
 'eval_runtime': 666.262,
 'eval_samples_per_second': 37.523,
 'eval_steps_per_second': 2.346,
 'epoch': 1.0}

In [17]:
label_text = {0: "Negative", 1: "Positive"}
saved_model_path = saved_model_local_path


def predict(input_text, saved_model_path):
    # initialize tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

    # preprocess and encode input text
    tokenizer_args = (input_text,)
    predict_input = tokenizer(
        *tokenizer_args,
        padding="max_length",
        max_length=128,
        truncation=True,
        return_tensors="pt",
    )

    # load trained model
    loaded_model = AutoModelForSequenceClassification.from_pretrained(saved_model_path)

    # get predictions
    output = loaded_model(predict_input["input_ids"])

    # return labels
    label_id = torch.argmax(*output.to_tuple(), dim=1)

    print(f"Review text: {input_text}")
    print(f"Sentiment : {label_text[label_id.item()]}\n")

In [18]:
# example #1
review_text = (
    """Jaw dropping visual affects and action! One of the best I have seen to date."""
)
predict_input = predict(review_text, saved_model_path)

loading configuration file https://huggingface.co/joeddav/xlm-roberta-large-xnli/resolve/main/config.json from cache at /home/jupyter/.cache/huggingface/transformers/da1b485a5537cdbf9955a680c80c4231fde7151b2293c7a5c774f9eb80f18d8c.a708a6640cf3d751b88753b1e3e696d15385fa78532782f3703b2acb724495e1
Model config XLMRobertaConfig {
  "_name_or_path": "joeddav/xlm-roberta-large-xnli",
  "architectures": [
    "XLMRobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "contradiction",
    "1": "neutral",
    "2": "entailment"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "contradiction": 0,
    "entailment": 2,
    "neutral": 1
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num

Review text: Jaw dropping visual affects and action! One of the best I have seen to date.
Sentiment : Positive



In [19]:
# example #2
review_text = """Take away the CGI and the A-list cast and you end up with film with less punch."""
predict_input = predict(review_text, saved_model_path)

loading configuration file https://huggingface.co/joeddav/xlm-roberta-large-xnli/resolve/main/config.json from cache at /home/jupyter/.cache/huggingface/transformers/da1b485a5537cdbf9955a680c80c4231fde7151b2293c7a5c774f9eb80f18d8c.a708a6640cf3d751b88753b1e3e696d15385fa78532782f3703b2acb724495e1
Model config XLMRobertaConfig {
  "_name_or_path": "joeddav/xlm-roberta-large-xnli",
  "architectures": [
    "XLMRobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "contradiction",
    "1": "neutral",
    "2": "entailment"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "contradiction": 0,
    "entailment": 2,
    "neutral": 1
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num

Review text: Take away the CGI and the A-list cast and you end up with film with less punch.
Sentiment : Negative



In [21]:
%%writefile predictor/custom_handler.py

import os
import json
import logging

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from ts.torch_handler.base_handler import BaseHandler

logger = logging.getLogger(__name__)


class TransformersClassifierHandler(BaseHandler):
    """
    The handler takes an input string and returns the classification text 
    based on the serialized transformers checkpoint.
    """
    def __init__(self):
        super(TransformersClassifierHandler, self).__init__()
        self.initialized = False

    def initialize(self, ctx):
        """ Loads the model.pt file and initialized the model object.
        Instantiates Tokenizer for preprocessor to use
        Loads labels to name mapping file for post-processing inference response
        """
        self.manifest = ctx.manifest

        properties = ctx.system_properties
        model_dir = properties.get("model_dir")
        self.device = torch.device("cuda:" + str(properties.get("gpu_id")) if torch.cuda.is_available() else "cpu")

        # Read model serialize/pt file
        serialized_file = self.manifest["model"]["serializedFile"]
        model_pt_path = os.path.join(model_dir, serialized_file)
        if not os.path.isfile(model_pt_path):
            raise RuntimeError("Missing the model.pt or pytorch_model.bin file")
        
        # Load model
        self.model = AutoModelForSequenceClassification.from_pretrained(model_dir)
        self.model.to(self.device)
        self.model.eval()
        logger.debug('Transformer model from path {0} loaded successfully'.format(model_dir))
        
        # Ensure to use the same tokenizer used during training
        self.tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')

        # Read the mapping file, index to object name
        mapping_file_path = os.path.join(model_dir, "index_to_name.json")

        if os.path.isfile(mapping_file_path):
            with open(mapping_file_path) as f:
                self.mapping = json.load(f)
        else:
            logger.warning('Missing the index_to_name.json file. Inference output will default.')
            self.mapping = {"0": "Negative",  "1": "Positive"}

        self.initialized = True

    def preprocess(self, data):
        """ Preprocessing input request by tokenizing
            Extend with your own preprocessing steps as needed
        """
        text = data[0].get("data")
        if text is None:
            text = data[0].get("body")
        sentences = text.decode('utf-8')
        logger.info("Received text: '%s'", sentences)

        # Tokenize the texts
        tokenizer_args = ((sentences,))
        inputs = self.tokenizer(*tokenizer_args,
                                padding='max_length',
                                max_length=128,
                                truncation=True,
                                return_tensors = "pt")
        return inputs

    def inference(self, inputs):
        """ Predict the class of a text using a trained transformer model.
        """
        prediction = self.model(inputs['input_ids'].to(self.device))[0].argmax().item()

        if self.mapping:
            prediction = self.mapping[str(prediction)]

        logger.info("Model predicted: '%s'", prediction)
        return [prediction]

    def postprocess(self, inference_output):
        return inference_output


Writing predictor/custom_handler.py


In [22]:
%%writefile ./predictor/index_to_name.json

{
    "0": "Negative", 
    "1": "Positive"
}

Writing ./predictor/index_to_name.json


In [4]:
%%bash -s $APP_NAME

APP_NAME=$1

cat << EOF > ./predictor/Dockerfile

FROM pytorch/torchserve:latest-cpu

# install dependencies
RUN python3 -m pip install --upgrade pip
RUN pip3 install transformers

USER model-server

# copy model artifacts, custom handler and other dependencies
COPY ./custom_handler.py /home/model-server/
COPY ./index_to_name.json /home/model-server/
COPY ./models/ /home/model-server/

# create torchserve configuration file
USER root
RUN printf "\nservice_envelope=json" >> /home/model-server/config.properties
RUN printf "\ninference_address=http://0.0.0.0:7080" >> /home/model-server/config.properties
RUN printf "\nmanagement_address=http://0.0.0.0:7081" >> /home/model-server/config.properties
USER model-server

# expose health and prediction listener ports from the image
EXPOSE 7080
EXPOSE 7081

# create model archive file packaging model artifacts and dependencies
RUN torch-model-archiver -f \
  --model-name=$APP_NAME \
  --version=1.0 \
  --serialized-file=/home/model-server/pytorch_model.bin \
  --handler=/home/model-server/custom_handler.py \
  --extra-files "/home/model-server/config.json,/home/model-server/tokenizer.json,/home/model-server/training_args.bin,/home/model-server/tokenizer_config.json,/home/model-server/special_tokens_map.json,/home/model-server/index_to_name.json" \
  --export-path=/home/model-server/model-store

# run Torchserve HTTP serve to respond to prediction requests
CMD ["torchserve", \
     "--start", \
     "--ts-config=/home/model-server/config.properties", \
     "--models", \
     "$APP_NAME=$APP_NAME.mar", \
     "--model-store", \
     "/home/model-server/model-store"]
EOF

echo "Writing ./predictor/Dockerfile"

Writing ./predictor/Dockerfile


In [5]:
PROJECT_ID="project-id"
CUSTOM_PREDICTOR_IMAGE_URI = f"gcr.io/{PROJECT_ID}/pytorch_predict_{APP_NAME}"
print(f"CUSTOM_PREDICTOR_IMAGE_URI = {CUSTOM_PREDICTOR_IMAGE_URI}")

CUSTOM_PREDICTOR_IMAGE_URI = gcr.io/mlconsole-poc/pytorch_predict_xlm-roberta-large-xnli


In [6]:
!docker build \
  --tag=$CUSTOM_PREDICTOR_IMAGE_URI \
  ./predictor

Sending build context to Docker daemon  2.262GB
Step 1/16 : FROM pytorch/torchserve:latest-cpu
 ---> 659d9f4840d5
Step 2/16 : RUN python3 -m pip install --upgrade pip
 ---> Using cache
 ---> 5dc616524ce9
Step 3/16 : RUN pip3 install transformers
 ---> Using cache
 ---> 0c1e2b18fb86
Step 4/16 : USER model-server
 ---> Using cache
 ---> 16689c0d7b03
Step 5/16 : COPY ./custom_handler.py /home/model-server/
 ---> Using cache
 ---> f9ac835ef856
Step 6/16 : COPY ./index_to_name.json /home/model-server/
 ---> Using cache
 ---> a5d54cec9a1e
Step 7/16 : COPY ./models/ /home/model-server/
 ---> Using cache
 ---> 7b7fc39a9989
Step 8/16 : USER root
 ---> Using cache
 ---> fb83b09e68a4
Step 9/16 : RUN printf "\nservice_envelope=json" >> /home/model-server/config.properties
 ---> Using cache
 ---> d0f7c0c1b91b
Step 10/16 : RUN printf "\ninference_address=http://0.0.0.0:7080" >> /home/model-server/config.properties
 ---> Using cache
 ---> 81c630f1c1ea
Step 11/16 : RUN printf "\nmanagement_address=htt

In [8]:
!docker run -dp 7080:7080 $CUSTOM_PREDICTOR_IMAGE_URI

5eb0137c30e30dcc132f7812fb25c5e4c5a90ce67960062ce76d963a5f001481


In [13]:
!curl http://localhost:7080/ping

{
  "status": "Healthy"
}


In [14]:
%%bash -s $APP_NAME

APP_NAME=$1

cat > ./predictor/instances.json <<END
{ 
   "instances": [
     { 
       "data": {
         "b64": "$(echo 'Take away the CGI and the A-list cast and you end up with film with less punch.' | base64 --wrap=0)"
       }
     }
   ]
}
END

curl -s -X POST \
  -H "Content-Type: application/json; charset=utf-8" \
  -d @./predictor/instances.json \
  http://localhost:7080/predictions/$APP_NAME/

{"predictions": ["Positive"]}

In [19]:
!docker push $CUSTOM_PREDICTOR_IMAGE_URI

Using default tag: latest
The push refers to repository [gcr.io/mlconsole-poc/pytorch_predict_xlm-roberta-large-xnli]

[1B31f4b8b8: Preparing 
[1B1d4e4250: Preparing 
[1B34f6ccaa: Preparing 
[1Bb4944353: Preparing 
[1Bc5b15422: Preparing 
[1B99f6a19b: Preparing 
[1Bc9cfb1ac: Preparing 
[1B25166418: Preparing 
[1B20ea0c3b: Preparing 
[1Bbf18a086: Preparing 
[1Bd2b4edc6: Preparing 
[1B4d0230ad: Preparing 
[1Bf03ffca8: Preparing 
[1Bf2874ea9: Preparing 
[1Ba1d7f3ba: Preparing 
[1Bd7c91a07: Preparing 
[1Bc3f5e5be: Preparing 
[1B512fd434: Preparing 
[1B31fc0e08: Preparing 
[20B1f4b8b8: Pushed   4.198GB/4.198GB[2K[20A[2K[20A[2K[16A[2K[20A[2K[16A[2K[20A[2K[16A[2K[20A[2K[16A[2K[20A[2K[16A[2K[20A[2K[20A[2K[16A[2K[20A[2K[20A[2K[16A[2K[20A[2K[16A[2K[20A[2K[19A[2K[20A[2K[15A[2K[16A[2K[13A[2K[20A[2K[16A[2K[13A[2K[16A[2K[13A[2K[16A[2K[20A[2K[16A[2K[20A[2K[13A[2K[16A[2K[13A[2K[16A[2K[20A[2K[16A[2K[20

In [None]:
import os
import sys
from google.cloud import aiplatform

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your GCP account. This provides access to your
# Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

# The Google Cloud Notebook product has specific requirements
IS_GOOGLE_CLOUD_NOTEBOOK = os.path.exists("/opt/deeplearning/metadata/env_version")

# If on Google Cloud Notebooks, then don't execute this code
if IS_GOOGLE_CLOUD_NOTEBOOK:
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this notebook locally, replace the string below with the
    # path to your service account key and run this cell to authenticate your GCP
    # account.
    elif not os.getenv("IS_TESTING"): 
        %env GOOGLE_APPLICATION_CREDENTIALS "credetial_file_path"

In [26]:
PROJECT_ID='Project-ID' 
BUCKET_NAME = 'bucket-name'
MODEL_NAME = 'xlm-roberta-large-xnli'

aiplatform.init(project=PROJECT_ID, staging_bucket=BUCKET_NAME, location="us-central1", credentials = cred)

In [27]:
VERSION = 1
model_display_name = f"{MODEL_NAME}-v{VERSION}"
model_description = "HF ZSL Model Custom Container"

health_route = "/ping"

predict_route = f"/predictions/{MODEL_NAME}"
serving_container_ports = [7080]

In [None]:
model = aiplatform.Model.upload(
    display_name=model_display_name,
    description=model_description,
    serving_container_image_uri=CUSTOM_PREDICTOR_IMAGE_URI,
    serving_container_predict_route=predict_route,
    serving_container_health_route=health_route,
    serving_container_ports=serving_container_ports,
)

model.wait()

print(model.display_name)
print(model.resource_name)

In [None]:
endpoint_display_name = f"{MODEL_NAME}-endpoint"
filter = f'display_name="{endpoint_display_name}"'

for endpoint_info in aiplatform.Endpoint.list(filter=filter):
    print(
        f"Endpoint display name = {endpoint_info.display_name} resource id ={endpoint_info.resource_name} "
    )

endpoint = aiplatform.Endpoint(endpoint_info.resource_name)