# Pytorch HuggingFace GPU acceleration tests
<https://towardsdatascience.com/gpu-acceleration-comes-to-pytorch-on-m1-macs-195c399efcc1>


In [2]:
# Check if CUDA/MPS is available

import platform
import torch

print(f'platform.platform = {platform.platform()}')
if platform.platform().startswith('macOS'):
    if torch.has_mps:
        print('MPS is available')
    else:
        print('MPS is not available')
        exit(1)
else:
    if platform.platform().startswith('Windows'):
        if torch.has_cuda:
            print('CUDA is available')
        else:
            print('CUDA is not available')
            exit(1)

  from .autonotebook import tqdm as notebook_tqdm


platform.platform = Windows-10-10.0.22621-SP0
CUDA is available


In [3]:
from datasets import load_dataset  # pip install datasets

# load the first 1K rows of the TREC dataset
trec = load_dataset('trec', split='train[:1000]')
trec

Downloading builder script: 100%|██████████| 5.09k/5.09k [00:00<00:00, 1.70MB/s]
Downloading metadata: 100%|██████████| 3.34k/3.34k [00:00<00:00, 1.67MB/s]
Downloading readme: 100%|██████████| 10.6k/10.6k [00:00<00:00, 3.55MB/s]


TypeError: can only concatenate str (not "int") to str

In [5]:
trec[0]

{'text': 'How did serfdom develop in and then leave Russia ?',
 'coarse_label': 2,
 'fine_label': 26}

In [6]:
from transformers import AutoTokenizer, AutoModel  # pip install transformers

# initialize BERT tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModel.from_pretrained('bert-base-uncased')

# take the first 64 rows of the trec data
text = trec['text'][:64]

# tokenize text using the BERT tokenizer
tokens = tokenizer(
    text, max_length=512,
    truncation=True, padding=True,
    return_tensors='pt'
)

Downloading (…)okenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<00:00, 11.1kB/s]
Downloading (…)lve/main/config.json: 100%|██████████| 570/570 [00:00<00:00, 240kB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:01<00:00, 225kB/s]
Downloading (…)/main/tokenizer.json: 100%|██████████| 466k/466k [00:01<00:00, 363kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 440M/440M [01:06<00:00, 6.61MB/s] 
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenc

# Inference Tests
When processing these tokens on CPU we get an average processing time of 547ms. We can switch this to MPS by moving the tokens tensor and model to the MPS device.

In [7]:
device = torch.device('mps')
model.to(device)
tokens.to(device)
device

device(type='mps')

In [9]:
%%timeit
model(**tokens)

244 ms ± 1.84 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
