In [5]:
!pip install torchvision lightning tensorboard

Defaulting to user installation because normal site-packages is not writeable


In [7]:
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
import lightning as L
import torch.nn.functional as F

class MNISTModel(L.LightningModule):
    def __init__(self):
        super().__init__()
        # Model architecture using nn.Linear
        self.linear = torch.nn.Linear(28*28, 10)
        self.learning_rate = 0.1
    
    def forward(self, x):
        # Forward pass using the defined linear layer
        x = x.flatten(start_dim=1)        
        return self.linear(x)
    
    def training_step(self, batch, batch_idx):
        # Training logic
        X, y = batch
        output = self(X)
        loss = F.cross_entropy(output, y)
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        # Validation logic
        X, y = batch
        output = self(X)
        loss = F.cross_entropy(output, y)
        acc = accuracy(output, y)
        self.log('val_loss', loss)
        self.log('val_acc', acc)
        return {'loss': loss, 'accuracy': acc}
    
    def configure_optimizers(self):
        # Configure optimizer
        optimizer = torch.optim.SGD(self.linear.parameters(), lr=self.learning_rate)
        return optimizer

def accuracy(output, label):
    # Accuracy calculation
    return (output.argmax(1) == label).float().mean()

# Data setup

transform = transforms.Compose([
    transforms.ToTensor()
    ])

dataset = MNIST(root='./data', train=True, 
                            download=True, transform=transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

val_dataset = MNIST(root='./data', train=False, 
                            download=True, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model, Trainer and Training
model = MNISTModel()
trainer = L.Trainer(max_epochs=1, log_every_n_steps=10, val_check_interval = 500, accelerator="gpu")
trainer.fit(model, train_loader, val_loader)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type   | Params | Mode 
------------------------------------------
0 | linear | Linear | 7.9 K  | train
------------------------------------------
7.9 K     Trainable params
0         Non-trainable params
7.9 K     Total params
0.031     Total estimated model params size (MB)
1         Modules in train mode
0         Modules in eval mode


                                                                            

/home/openai/source/workshop-ki-deepdive-09-10/.venv/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=5` in the `DataLoader` to improve performance.
/home/openai/source/workshop-ki-deepdive-09-10/.venv/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=5` in the `DataLoader` to improve performance.


Epoch 2: 100%|██████████| 1875/1875 [00:24<00:00, 76.04it/s, v_num=5]

`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch 2: 100%|██████████| 1875/1875 [00:24<00:00, 76.03it/s, v_num=5]


In [8]:
trainer.validate(model,val_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 313/313 [00:02<00:00, 145.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         val_acc            0.9207000136375427
        val_loss            0.28309309482574463
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 0.28309309482574463, 'val_acc': 0.9207000136375427}]

## Wie können wir das Ergebniss verbessern?

* Länger Tranieren (mehr epochen)
* Sinvollere Lernrate (ausprobieren!)
* Größeres Netz (mehr Schichten)
* Aktivierungsfunktionen (zwischen den Schichten!)
* Normalisierung (Batch, Layer, Input)
* Dropout
* SGD mit Momentum
* Adam Optimizer
* Data Augmentation
* ChatGPT Fragen :)

In [9]:
%load_ext tensorboard
%tensorboard --logdir ./lightning_logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 13086), started 0:00:42 ago. (Use '!kill 13086' to kill it.)