# Train And Test - Classification XPU

## Classification

- Imports
  - standard libs
  - 3rd party libs
  - alpabetical or logical grouping
- Set random seed
- Config and Hyperparams
- Dataset and Dataloader
- Model definition/class
- Helper functions (training, eval, visualization)
- Then main code

Note: You can flip torch.amp on and off to test, this is work on XPU. Note this is not a great example case for leveraging amp but it is functional for testing. This is a setting with the hyperparameters.

In [6]:
import logging
from pathlib import Path
import random
import requests

import torch
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.datasets import make_circles
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Set or adjust your hyperparameters and amp/cpu override
hp = {
    "n_samples": 5000,
    "test_size": 0.2,
    "learning_rate": 0.025,
    "noise": 0.03,
    "epochs": 3000,
    "input_features": 2,
    "output_features": 1,
    "hidden_units": 24,
    "random_seed": 42,
    "randomize_seed": True,
    "cpu_only": False,
}
# Logging configuration
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Randomize seed if set to True
if hp['randomize_seed']:
    hp['random_seed'] = random.randint(0, 1000000000)
logging.info(f"Seed set to: {hp['random_seed']}")   

def get_device():
    """
    This will check for an Intel XPU device and return it if available, otherwise it will return cpu.

    Returns the torch device to use.
    """
    if hp['cpu_only'] == False:
        #device = "xpu" if torch.xpu.is_available() else "cpu"
        if torch.xpu.is_available():
            device = "xpu"
        elif torch.cuda.is_available():
            device = "cuda"
        else:
            device = "cpu"

        logging.info(f"Using device: {device}")
        return device
    else:
        logging.info("Using CPU only")
        return "cpu"

# Basic Classification Model with ReLU activations
class ClassificationModel(nn.Module):
    def __init__(self, input_features = 2, output_features = 1, hidden_units = 8):

        super().__init__()

        self.linear_layer_stack = nn.Sequential(
            nn.Linear(in_features = input_features, out_features = hidden_units),
            nn.ReLU(),
            nn.Linear(in_features = hidden_units, out_features = hidden_units),
            nn.ReLU(),
            nn.Linear(in_features = hidden_units, out_features = hidden_units),
            nn.ReLU(),
            nn.Linear(in_features = hidden_units, out_features = hidden_units),
            nn.ReLU(),
            nn.Linear(in_features = hidden_units, out_features = output_features),
        )
    def forward(self, x):

        return self.linear_layer_stack(x)

def generate_data(samples, noise, seed, size):
    """
    This function generates a dataset using sklearn's make_circles function.

    samples: int, number of samples to generate
    noise: float, standard deviation of Gaussian noise added to the data
    seed: int, random seed for reproducibility
    size: float, size of the test set

    Returns the train and test sets as tensors.
    """

    # Generate the dataset
    X, y = make_circles(samples, noise=noise, random_state=seed)

    # Plot the data
    plt.scatter(x = X[:, 0], y = X[:, 1], c = y, cmap = plt.cm.RdYlBu)

    # Turn data into tensors
    X = torch.from_numpy(X).type(torch.float)
    y = torch.from_numpy(y).type(torch.float)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=size, random_state=seed)

    return X_train, X_test, y_train, y_test

# Calculate accuracy out of 100 examples
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

def import_helper_func():
    if Path("helper_functions.py").is_file():
        print("File exists, skipping download")
    else:
        print("Downloading")
        request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
        with open("helper_functions.py", "wb") as f:
            f.write(request.content)

    from helper_functions import plot_predictions, plot_decision_boundary

def set_seed(seed=hp['random_seed'], device="cpu"):
    """
    Set seed for reproducibility.
    """
    torch.manual_seed(seed)
    if device == "xpu":
        torch.xpu.manual_seed(seed)
    elif device == "cuda":
        torch.cuda.manual_seed(seed)
    logging.info(f"Seed has been set: {seed}")

def move_to_device(X_train, y_train, X_test, y_test, device="cpu"):
    """
    Moves data to the target device.
    """
    X_train = X_train.to(device)
    y_train = y_train.to(device)
    X_test = X_test.to(device)
    y_test = y_test.to(device)
    logging.info(f"X_train device: {X_train.device}, y_train device: {y_train.device}")
    logging.info(f"X_test device: {X_test.device}, y_test device: {y_test.device}")
    return X_train, y_train, X_test, y_test

def train_test_model(model, X_train, y_train, X_test, y_test, loss_fn, optimizer, epochs, device="cpu", scaler=None):
    """
    Train and test a PyTorch model.

    Args:
        model (torch.nn.Module): A PyTorch model.
        X_train (torch.Tensor): Training data (features).
        y_train (torch.Tensor): Training labels.
        X_test (torch.Tensor): Testing data (features).
        y_test (torch.Tensor): Testing labels.
        loss_fn (torch.nn.Module): An instance of a PyTorch loss function.
        optimizer (torch.optim.Optimizer): An instance of a PyTorch optimizer.
        epochs (int): Number of epochs to train the model for.
        device (str): The target device to run the model on.

    Returns:
        tuple: Training loss, testing loss
    """
    for epoch in tqdm(range(epochs), desc="Training"):
        model.train()

        if device != "cpu":
            with torch.amp.autocast(device):

                y_logits = model(X_train).squeeze()
                y_pred = torch.round(torch.sigmoid(y_logits))
                loss = loss_fn(y_logits, y_train)
                acc = accuracy_fn(y_true = y_train, y_pred = y_pred)
                optimizer.zero_grad()
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
        else:
            y_logits = model(X_train).squeeze()
            y_pred = torch.round(torch.sigmoid(y_logits))
            loss = loss_fn(y_logits, y_train)
            acc = accuracy_fn(y_true = y_train, y_pred = y_pred)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()

        with torch.inference_mode():

            if device != "cpu":
                with torch.amp.autocast(device):
                    test_logits = model(X_test).squeeze()
                    test_pred = torch.round(torch.sigmoid(test_logits))

            else:
                test_logits = model(X_test).squeeze()
                test_pred = torch.round(torch.sigmoid(test_logits))
                
            test_loss = loss_fn(test_logits, y_test)
            test_acc = accuracy_fn(y_true = y_test, y_pred = test_pred)

    logging.info(f"Epoch: {epoch} | Loss: {loss:.5f} | Acc: {acc:.2f}% | Test Loss: {test_loss:.5f} | Test Acc: {test_acc:.2f}% | Device: {device}")

# Download helper func from learn pytorch repo if its not downlaoded
if Path("helper_functions.py").is_file():
    print("File exists, skipping download")
else:
    print("Downloading")
    request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
    with open("helper_functions.py", "wb") as f:
        f.write(request.content)

# Then import the file
from helper_functions import plot_predictions, plot_decision_boundary

# Main function to run training, testing, saving of the model
def main():

    # Set the device
    device = get_device()

    # Download helper functions
    import_helper_func()

    # Call the function to generate the data
    X_train, X_test, y_train, y_test = generate_data(samples=hp['n_samples'], noise=hp['noise'], seed=hp['random_seed'], size=hp['test_size'])

    # Set with hyperparameters    
    model_0 = ClassificationModel(input_features=hp['input_features'], output_features=hp['output_features'], hidden_units=hp['hidden_units']).to(device)

    # Make predictions
    with torch.inference_mode():
        untrained_preds = model_0(X_test.to(device)) # pass test data to model/device

    loss_fn = nn.BCEWithLogitsLoss()

    # Optimizer
    optimizer = torch.optim.SGD(params=model_0.parameters(), lr=hp['learning_rate'])

    # View first 5 outputs of the forweard pass on the test data
    model_0.eval() # Use training mode when making predictions
    with torch.inference_mode(): # Use inference mode when making predictions
        y_logits = model_0(X_test.to(device))[:5]

    y_pred_probs = torch.sigmoid(y_logits)

    ##### Find the predicted labels
    # We got raw logits, then turned them into pred probs, now we need pred labels
    y_preds = torch.round(y_pred_probs) # predicted labels

    # In Full (logits -> pred probs -> pred labels)
    y_pred_labels = torch.round(torch.sigmoid(model_0(X_test.to(device))[:5]))

    # Get rid of extra dimension
    y_preds.squeeze()

    set_seed(hp['random_seed'], device)

    X_train, y_train, X_test, y_test = move_to_device(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, device=device)

    if hp['cpu_only'] == False:
        scaler = torch.amp.GradScaler(device)
        train_test_model(model_0, X_train, y_train, X_test, y_test, loss_fn, optimizer, hp['epochs'], device, scaler)
    else:
        train_test_model(model_0, X_train, y_train, X_test, y_test, loss_fn, optimizer, hp['epochs'], device)

    # Plot decision boundary of the model
    plt.figure(figsize = (12, 6))
    plt.subplot(1, 2, 1)
    plt.title("Train")
    plot_decision_boundary(model_0, X_train, y_train)
    plt.subplot(1, 2, 2)
    plt.title("Test")
    plot_decision_boundary(model_0, X_test, y_test)

# Run the main function
if __name__ == '__main__':
    main()

2025-02-02 22:37:40,371 - root - INFO - Seed set to: 817593105
2025-02-02 22:37:40,372 - root - INFO - Using device: cuda
2025-02-02 22:37:40,397 - root - INFO - Seed has been set: 817593105
2025-02-02 22:37:40,458 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /distilbert/distilgpt2/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
2025-02-02 22:37:40,459 - filelock - DEBUG - Attempting to acquire lock 1345733461440 on C:\Users\user\.cache\huggingface\hub\.locks\models--distilbert--distilgpt2\be4d21d94f3b4687e5a54d84bf6ab46ed0f8defd.lock
2025-02-02 22:37:40,460 - filelock - DEBUG - Lock 1345733461440 acquired on C:\Users\user\.cache\huggingface\hub\.locks\models--distilbert--distilgpt2\be4d21d94f3b4687e5a54d84bf6ab46ed0f8defd.lock
2025-02-02 22:37:40,517 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "GET /distilbert/distilgpt2/resolve/main/tokenizer_config.json HTTP/1.1" 200 26
2025-02-02 22:37:40,520 - filelock - DEBUG - Attempting to release loc

ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`.