In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.nn.functional as F # import convolution functions like Relu

## CPU oder GPU?
Torch erlaubt das Ausführen (vor allem der aufwändigen Lernphase) des künstlichen neuronalen Netzes auf CPU oder GPU. Ob die GPU zur Verfügung steht hängt von dem installierten Python-Paket und der Unterstützung der Grafikkarte durch Torch ab.

In [2]:

# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


## Knünstliches Neuronale Netzwerke in Torch
In Torch werden neuronale Netzwerke wie durch Python-Klassen repräsentiert, die von der Klasse nn.Module erben.
Diese beinhalten die Netztopologie sowie die Berechnungsart.

In [3]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

## Training des Netzes
Das Training eines Netzes definiert man am besten als eigene Funktion. Das Training folgt hierbei dem bereits aus der Vorlesung bekannten Backpropagation-Verfahren. Die Art der Optimierung, der ```optimizer```, kann durch verschiedene bereits implementierte Optimierer bestimmt werden. Siehe hierzu auch den Aufruf in der ```main()``` Methode 

In [4]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

## Testing
Auch das Testing ist am besten als Funktion zu kapseln. Hierbei wird das aktuelle ```model```, also das aktuelle künstliche neuronale Netz mit den Testdaten geprüft. Diese dürfen nicht Teil des Trainings sein. 

In [5]:
def test(dataloader, model, loss_fn):
    # Get the size of the dataset and the number of batches
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    
    # Set the model to evaluation mode
    model.eval()
    
    # Initialize variables to store test loss and correct predictions
    test_loss, correct = 0, 0
    
    # Lists to store details about misclassified samples
    misclassified_samples = []
    
    # Disable gradient computation during testing
    with torch.no_grad():
        # Iterate over batches in the dataloader
        for set, (X, y) in enumerate(dataloader, 1):  # Adding 1 to start indexing from 1
            # Move input data and labels to the device (e.g., GPU)
            X, y = X.to(device), y.to(device)
            
            # Forward pass to get predictions from the model
            pred = model(X)
            
            # Compute the test loss using the specified loss function
            test_loss += loss_fn(pred, y).item()
            
            # Count the number of correct predictions
            pred_argmax = pred.argmax(1)
            comparison = pred_argmax == y
            correct += (comparison).type(torch.float).sum().item()

            y_list = y.tolist()
            pred_list = pred.argmax(1).tolist()

            for i in range(len(y_list)):
                # Check if the prediction is correct
                if pred_list[i] != y_list[i]:
                    # Save details about misclassified sample
                    misclassified_samples.append({
                        'set': set,
                        'index': i,
                        'expected_label': y_list[i],
                        'predicted_label': pred_list[i]
                    })

    # Calculate average test loss and accuracy
    test_loss /= num_batches
    correct /= size
    
    # Print the test results
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

    # Print details about misclassified samples
    if misclassified_samples:
        print("Misclassified Samples:")
        for sample in misclassified_samples:
            print(f"Set {sample['set']} Index {sample['index']}: Expected {sample['expected_label']}, Predicted {sample['predicted_label']}")
    else:
        print("All samples classified correctly.")


## Die Hauptroutine
Hier passiert alles:
1. Die MNIST-Daten werden geladen und in Trainings- und Testdaten aufgeteilt
2. Die Lernparameter und Optimierungsstrategien werden festgelegt
3. Das Lernen erfolgt, gefolgt von einer Testphase pro Epoche.
4. Das Netz wird abgespeichert (so kann es schnell wieder geladen werden, ohne dass ein erneutes Training erfolgt)

In [6]:
def main():
    # Download training data from open datasets.
    training_data = datasets.MNIST(
        root="data",
        train=True,
        download=True,
        transform=ToTensor(),
    )

    # Download test data from open datasets.
    test_data = datasets.MNIST(
        root="data",
        train=False,
        download=True,
        transform=ToTensor(),
    )

    batch_size = 64

    # Create data loaders.
    train_dataloader = DataLoader(training_data, batch_size=batch_size)
    test_dataloader = DataLoader(test_data, batch_size=batch_size)

    for X, y in test_dataloader:
        print(f"Shape of X [N, C, H, W]: {X.shape}")
        print(f"Shape of y: {y.shape} {y.dtype}")
        break

    model = NeuralNetwork().to(device)
    print(model)

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    
    epochs = 10
    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        train(train_dataloader, model, loss_fn, optimizer)
        test(test_dataloader, model, loss_fn)
    print("Done!")

    torch.save(model.state_dict(), "model.pth")
    print("Saved PyTorch Model State to model.pth")

    model = NeuralNetwork().to(device)
    model.load_state_dict(torch.load("model.pth"))

    classes = test_data.classes


In [7]:
main()

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
Epoch 1
-------------------------------
loss: 2.305692  [   64/60000]


loss: 0.740919  [ 6464/60000]
loss: 0.402162  [12864/60000]
loss: 0.335466  [19264/60000]
loss: 0.233207  [25664/60000]
loss: 0.326106  [32064/60000]
loss: 0.220903  [38464/60000]
loss: 0.394687  [44864/60000]
loss: 0.370335  [51264/60000]
loss: 0.274210  [57664/60000]
Test Error: 
 Accuracy: 93.0%, Avg loss: 0.226574 

Misclassified Samples:
Set 1 Index 8: Expected 5, Predicted 6
Set 1 Index 33: Expected 4, Predicted 6
Set 1 Index 38: Expected 2, Predicted 3
Set 2 Index 47: Expected 7, Predicted 1
Set 2 Index 55: Expected 2, Predicted 8
Set 2 Index 60: Expected 7, Predicted 4
Set 3 Index 21: Expected 2, Predicted 9
Set 3 Index 44: Expected 2, Predicted 3
Set 4 Index 1: Expected 9, Predicted 4
Set 4 Index 25: Expected 6, Predicted 5
Set 4 Index 41: Expected 8, Predicted 7
Set 4 Index 49: Expected 9, Predicted 8
Set 4 Index 53: Expected 3, Predicted 5
Set 4 Index 57: Expected 2, Predicted 1
Set 5 Index 0: Expected 2, Predicted 1
Set 5 Index 1: Expected 8, Predicted 1
Set 5 Index 3: Expe