# Train And Test - Classification XPU

## Classification

- Imports
  - standard libs
  - 3rd party libs
  - alpabetical or logical grouping
- Set random seed
- Config and Hyperparams
- Dataset and Dataloader
- Model definition/class
- Helper functions (training, eval, visualization)
- Then main code

Note: You can flip torch.amp on and off to test, this is work on XPU. Note this is not a great example case for leveraging amp but it is functional for testing. This is a setting with the hyperparameters.

In [1]:
# Standard library imports
from pathlib import Path
import requests

# Third-party library imports
import torch
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.datasets import make_circles
from sklearn.model_selection import train_test_split

# Static seed for reproducibility or turn on randomization
RANDOM_SEED = 42
RANDOMIZE_SEED = False

# Check if we should use a static seed, if not randomize it
if RANDOMIZE_SEED:
    RANDOM_SEED = random.randint(0, 1000000000)
    print(f"Using seed: {RANDOM_SEED}")

# Setup device agnostic code, you can extend to include cuda as well
# device = "xpu" if torch.xpu.is_available() else "cuda" if torch.xpu.is_available else "cpu"
# This will try to use xpu, fallback to cuda, then to cpu, I have this in my mixed gpu environments
device = "xpu" if torch.xpu.is_available() else "cpu"
print(f"Using device: {device}")

# Initialize Hyperparameters
# Make n samples for dataset
n_samples = 5000
test_size = 0.2 # 0.2 = 20% test, 80% train
learning_rate = 0.02
noise = 0.03 # Noise for circles which will make them not perfect circles
epochs = 5000
input_features = 2
output_features = 1
hidden_units = 24
# Set to True to use mixed precision training (automatic mixed precision)
# This will be less accurate when turned on but technically faster
use_amp = True

# Generate the dataset
# Create Circles
X, y = make_circles(n_samples, noise = noise, random_state = RANDOM_SEED)

plt.scatter(x = X[:, 0],
            y = X[:, 1],
            c = y,
            cmap = plt.cm.RdYlBu);

# Turn data into tensors
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

# Scikit learn has methods to do Train/Test split on data
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size = test_size, 
                                                    random_state = RANDOM_SEED)

# Build model - multi-class
class ClassificationModel(nn.Module):
    def __init__(self, input_features = 2, output_features = 1, hidden_units = 8): # defaults
        super().__init__()
        self.linear_layer_stack = nn.Sequential(
            nn.Linear(in_features = input_features, out_features = hidden_units),
            nn.ReLU(),
            nn.Linear(in_features = hidden_units, out_features = hidden_units),
            nn.ReLU(),
            nn.Linear(in_features = hidden_units, out_features = hidden_units),
            nn.ReLU(),
            nn.Linear(in_features = hidden_units, out_features = hidden_units),
            nn.ReLU(),
            nn.Linear(in_features = hidden_units, out_features = output_features),
        )
    def forward(self, x):
        return self.linear_layer_stack(x)

# Calculate accuracy out of 100 examples
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # item gets the value out as a single item, this also moves it to cpu
    acc = (correct / len(y_pred)) * 100
    return acc

# Set with hyperparameters    
model_0 = ClassificationModel(input_features = input_features, output_features = output_features, hidden_units = hidden_units).to(device)

# Make predictions
with torch.inference_mode():
    untrained_preds = model_0(X_test.to(device)) # pass test data to model/device

loss_fn = nn.BCEWithLogitsLoss() # Sigmoid activation function built in, this is the numerical stable way

# Optimizer
optimizer = torch.optim.SGD(params = model_0.parameters(), lr = learning_rate)

# View first 5 outputs of the forweard pass on the test data
model_0.eval() # Use training mode when making predictions
with torch.inference_mode(): # Use inference mode when making predictions
    y_logits = model_0(X_test.to(device))[:5]

y_pred_probs = torch.sigmoid(y_logits)

##### Find the predicted labels
# We got raw logits, then turned them into pred probs, now we need pred labels
y_preds = torch.round(y_pred_probs) # predicted labels

# In Full (logits -> pred probs -> pred labels)
y_pred_labels = torch.round(torch.sigmoid(model_0(X_test.to(device))[:5]))

# Get rid of extra dimension
y_preds.squeeze()

# Seed
torch.manual_seed(RANDOM_SEED)
torch.xpu.manual_seed(RANDOM_SEED)

# Put data on target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

# Initialize GradScaler for mixed precision training
scaler = torch.GradScaler(device)

# Building and eval loop
for epoch in range(epochs):
    # Training
    model_0.train()

    if use_amp:
        # Forward pass with autocast for mixed precision
        with torch.amp.autocast(device):
            # Forward Pass
            y_logits = model_0(X_train).squeeze() # pass raw logits
            y_pred = torch.round(torch.sigmoid(y_logits)) # turn logits -> pred probs -> pred labels
    else:
        # Forward Pass
        y_logits = model_0(X_train).squeeze() # pass raw logits
        y_pred = torch.round(torch.sigmoid(y_logits)) # turn logits -> pred probs -> pred labels

    loss = loss_fn(y_logits, y_train)
    acc = accuracy_fn(y_true = y_train, y_pred = y_pred)
    optimizer.zero_grad()

    # Backward pass with GradScaler for mixed precision
    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()

    # Testing
    model_0.eval()
    with torch.inference_mode():

        if use_amp:
            # Forward pass with autocast for mixed precision
            with torch.amp.autocast(device):
                # Testing forward pass
                test_logits = model_0(X_test).squeeze()
                test_pred = torch.round(torch.sigmoid(test_logits))

        else:
            # Testing forward pass
            test_logits = model_0(X_test).squeeze()
            test_pred = torch.round(torch.sigmoid(test_logits))
            
        # Testing Calc loss and accuracy
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_true = y_test, y_pred = test_pred)

    # Print output
    if epoch % (epochs / 10) == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f} | Acc: {acc:.2f}% | Test Loss: {test_loss:.5f} | Test Acc: {test_acc:.2f}% | Device: {device}")

# Download helper func from learn pytorch repo if its not downlaoded
if Path("helper_functions.py").is_file():
    print("File exists, skipping download")
else:
    print("Downloading")
    request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
    with open("helper_functions.py", "wb") as f:
        f.write(request.content)

# Then import the file
from helper_functions import plot_predictions, plot_decision_boundary

# Plot decision boundary of the model
plt.figure(figsize = (12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model_0, X_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_0, X_test, y_test)



