<a href="https://colab.research.google.com/github/devasworski/pytorch-apple-silicon-benchmarks/blob/master/transformers_sequence_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi

Wed May 25 13:01:10 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P8    11W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install transformers
import logging
import torch
from time import time
from argparse import ArgumentParser
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from random import randint
from tqdm import tqdm
from torch.optim import Adam

In [None]:
logging.getLogger().setLevel(logging.INFO)

MODEL = "bert-base-cased" #@param ["bert-base-cased"]
DEVICE = "cuda" #@param ["cuda", "cpu"]
BATCH_SIZE = 256 #@param ["16", "64", "256"] {type:"raw"}
MODE = "training" #@param ["training", "inference"]
STEPS = 100 #@param [100]
SEQUENCE_LENGHT = 512 #@param ["128", "512"] {type:"raw"}


def train():

    # instantiate model and tokenizer
    tok = AutoTokenizer.from_pretrained(MODEL)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL)

    # get device
    device = torch.device(DEVICE)

    # move model to right device
    model.to(device=device)

    do_backprop = MODE == 'training'

    # instantiate simple optimizer
    if do_backprop:
        optim = Adam(model.parameters(), lr=1e-04)

    # create fake inputs (performance does not depend on the input tokens, just on the sequence length)
    input_ids = [[randint(0, tok.vocab_size - 1) for _ in range(SEQUENCE_LENGHT)]] * BATCH_SIZE
    attention_mask = [[1] * SEQUENCE_LENGHT] * BATCH_SIZE
    labels = [randint(0, 1)] * BATCH_SIZE

    # create input dict
    inputs = dict(input_ids=input_ids, attention_mask=attention_mask, labels=labels)

    # transform inputs in tensors
    inputs = {k: torch.tensor(v) for k, v in inputs.items()}

    logging.info("Input tensors size:")
    for k, v in inputs.items():
        logging.info(f" * {k}: {v.shape}")

    start_time = time()
    for _ in tqdm(range(STEPS), desc="Testing...", total=STEPS):
        # move inputs to correct device
        # cannot do it before because in a real-world scenario the data will be always different
        data = {k: v.to(device=device) for k, v in inputs.items()}

        if do_backprop:
            optim.zero_grad()
    
        if do_backprop:
            res = model(**data)
        else:
            with torch.no_grad():
                res = model(**data)
    
        if do_backprop:
            res.loss.backward()
            optim.step()

    logging.info(f"Model {MODEL} took {(time() - start_time):.2f} seconds to do {STEPS} steps in {MODE} with batch size {BATCH_SIZE} on {DEVICE}.")
train()