## Comparing Training and Evaluation Performance of a Question-Answering Model on CPU and GPU (MPS) using PyTorch
- comparing the training and evaluation performance of a question-answering model using the CPU and GPU (MPS) on a Mac.
- PyTorch's torch.device is used to switch between the CPU and GPU (MPS) for computation.


## Plan:
1. Model Definition: The model is defined using PyTorch's BertForQuestionAnswering from the transformers library.
2. Data Loading: Data is loaded using a custom Dataset class and DataLoader.
3. Training: For each epoch, the model processes the training data in batches. For each batch, data and target labels are moved to the specified device. The forward pass computes the output, the loss is computed using the model's loss function, the backward pass is performed to compute gradients, and the optimizer updates the model parameters.
4. Inference: During inference, the model processes the test data in batches. For each batch, data and target labels are moved to the specified device, and the model performs a forward pass to compute predictions. Additionally, metrics such as accuracy, inference time, throughput, CPU usage, and memory usage are calculated.

In [15]:
#import the necessary packages
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertForQuestionAnswering
from torch.utils.data import DataLoader, Dataset
import time
import psutil

from datasets import load_dataset



In [16]:
# Load the SQuAD dataset
dataset = load_dataset('squad')

# Use a smaller subset for testing
small_train_dataset = dataset['train'].select(range(100))  # Select first 100 examples
small_val_dataset = dataset['validation'].select(range(100))  # Select first 100 examples


In [17]:
#create class for Question Answering tasks
class QADataset(Dataset):
    #initializes the dataset w tokenizer, questions, contexts, answers and max length
    def __init__(self, tokenizer, questions, contexts, answers, max_length=512):
        self.tokenizer = tokenizer #to convert text to token ids
        self.questions = questions #list of question strs
        self.contexts = contexts #list of context strs
        self.answers = answers #list of answer dictionaries
        self.max_length = max_length

    #method to get total number of samples in dataset
    def __len__(self):
        return len(self.questions)

    #method to retreive a single sample (question-context pair)
    #returns a tuple containing the input ids, attention mask, start positions, and end positions
    def __getitem__(self, idx):
        #tokenize the pair at the given index
        encoding = self.tokenizer(
            self.questions[idx],  
            self.contexts[idx], 
            truncation=True, 
            padding='max_length', 
            max_length=self.max_length,
            return_tensors='pt' #returns pytorch tensors
        )
        
        #extracts and squeezes the input_ids and attention_mask tensors to remove unnecessary dimensions
        #ensures that the tensors are in the correct shape
        input_ids = encoding['input_ids'].squeeze()
        attention_mask = encoding['attention_mask'].squeeze()

        #convert the answer start and end position to PyTorch tensors
        #ensures that the can be used in PyTorch's graph for gradient compuationa and backpropagation
        start_positions = torch.tensor(self.answers[idx]['start'])
        end_positions = torch.tensor(self.answers[idx]['end'])

        #return as a tuple
        return input_ids, attention_mask, start_positions, end_positions


In [26]:
# #initializes tokenizer from pre-trained 'bert-base-uncased' model
# #converts text into token IDs for the model
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# #example data
# questions = ["What is AI?", "What is machine learning?"]
# contexts = ["Artificial Intelligence is the simulation of human intelligence processes by machines.", 
#             "Machine learning is a subset of AI that focuses on the development of computer programs."]
# answers = [{'start': 0, 'end': 2}, {'start': 0, 'end': 2}]

# # #load the SQuAD dataset
# # dataset = load_dataset('squad')

# # #extract the list of questions from the training split of the dataset
# # #dataset['train']] = list of dict, the list comprehension iterates over each example in the training split and extracts the value of the 'question' key
# # questions = [example['question'] for example in dataset['train']]
# # #extract the list of contexts (passages) from the training split of the dataset
# # #the list comprehension iterates over each example in the training split and extracts the value of the 'context' key
# # contexts = [example['context'] for example in dataset['train']]
# # #extract the list of answers from the training split of the dataset
# # answers = [{'start': example['answers']['answer_start'][0], 'end': example['answers']['answer_start'][0] + len(example['answers']['text'][0])} for example in dataset['train']]


# #instantiates the QADataset with the tokenizer, questions, contexts, and answers
# dataset = QADataset(tokenizer, questions, contexts, answers)
# #create a DataLoader to load data from the QADataset, which will handle batching the data and shuffling it at every epoch
# dataloader = DataLoader(dataset, batch_size=2, shuffle=True)



In [18]:
# Extract questions, contexts, and answers from the smaller subset
questions = [example['question'] for example in small_train_dataset]
contexts = [example['context'] for example in small_train_dataset]
answers = [{'start': example['answers']['answer_start'][0], 'end': example['answers']['answer_start'][0] + len(example['answers']['text'][0])} for example in small_train_dataset]

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Create the dataset and dataloader
qa_dataset = QADataset(tokenizer, questions, contexts, answers)
dataloader = DataLoader(qa_dataset, batch_size=8, shuffle=True, num_workers=4)  # Increase batch size and use 4 worker threads



In [19]:
#initialize BERT model 
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


NameError: name 'GradScaler' is not defined

In [20]:
def train_and_evaluate(device):
    model.to(device)
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=3e-5)

    # Training
    start_time = time.time()
    for epoch in range(3):  # Let's run for 3 epochs for simplicity
        for batch in dataloader:
            input_ids, attention_mask, start_positions, end_positions = [b.to(device) for b in batch]

            optimizer.zero_grad()
            outputs = model(input_ids=input_ids, 
                            attention_mask=attention_mask,
                            start_positions=start_positions,
                            end_positions=end_positions)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

    training_time = time.time() - start_time

    # Evaluation
    model.eval()
    correct_predictions = 0
    total_predictions = 0
    inference_time = 0

    with torch.no_grad():
        for batch in dataloader:
            input_ids, attention_mask, start_positions, end_positions = [b.to(device) for b in batch]

            start = time.time()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            inference_time += time.time() - start

            # Assuming simple accuracy for demonstration purposes
            start_pred = torch.argmax(outputs.start_logits, dim=-1)
            end_pred = torch.argmax(outputs.end_logits, dim=-1)

            correct_predictions += ((start_pred == start_positions) & (end_pred == end_positions)).sum().item()
            total_predictions += input_ids.size(0)

    accuracy = correct_predictions / total_predictions
    return training_time, inference_time, accuracy


In [None]:
# Run experiment on CPU
cpu_device = torch.device('cpu')
cpu_accuracy, cpu_inference_time = train_and_evaluate(cpu_device)
print(f"CPU - Accuracy: {cpu_accuracy:.2f}, Inference Time: {cpu_inference_time:.2f}s")


In [None]:
# Check if MPS is available and run on MPS
if torch.backends.mps.is_available():
    mps_device = torch.device('mps')
    mps_accuracy, mps_inference_time = train_and_evaluate(mps_device)
    print(f"MPS - Accuracy: {mps_accuracy:.2f}, Inference Time: {mps_inference_time:.2f}s")
else:
    print("No MPS device found")

In [21]:
#run on CPU
cpu_device = torch.device('cpu')
cpu_training_time, cpu_inference_time, cpu_accuracy = train_and_evaluate(cpu_device)
print(f"CPU - Training Time: {cpu_training_time:.2f}s, Inference Time: {cpu_inference_time:.2f}s, Accuracy: {cpu_accuracy:.2f}")



Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/opt/anaconda3/lib/python3.11/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/multiprocessing/spawn.py", line 132, in _main
    self = reduction.pickle.load(from_parent)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute 'QADataset' on <module '__main__' (built-in)>


KeyboardInterrupt: 

In [33]:
#check if GPU is available and run on MPS
if torch.backends.mps.is_available():
    mps_device = torch.device('mps')
    mps_training_time, mps_inference_time, mps_accuracy = train_and_evaluate(mps_device)
    print(f"GPU - Training Time: {mps_training_time:.2f}s, Inference Time: {mps_inference_time:.2f}s, Accuracy: {mps_accuracy:.2f}")
else:
    print("No GPU device found")

GPU - Training Time: 44.11s, Inference Time: 0.13s, Accuracy: 1.00
