# Copyright

<PRE>
This notebook was created as part of the "Deep learning / VITMMA19" class at
Budapest University of Technology and Economics, Hungary,
https://portal.vik.bme.hu/kepzes/targyak/VITMMA19

Any re-use or publication of any part of the notebook is only allowed with the
 written consent of the authors.

2024 (c) Mohammed Salah Al-Radhi and Tamás Gábor Csapó (malradhi@tmit.bme.hu)
</PRE>

In [1]:
### HYPEROPT: task during the class - we will do this together
# add WandB.ai integration to the code
# (help: https://docs.wandb.ai/guides/integrations/lightning )
# run at least 3 different trainings

In [2]:
# install pytorch lithening
!pip install pytorch-lightning --quiet
!pip install wandb --quiet

In [3]:
import pytorch_lightning as pl
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader,random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from pytorch_lightning.loggers import WandbLogger
import wandb


In [4]:
# create one class to deal with data
class CifarDataModule(pl.LightningDataModule):
  def __init__(self, batch_size, data_dir="./"):
    super().__init__()
    self.data_dir=data_dir
    self.batch_size=batch_size
    self.transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
    self.num_classes=10

  def prepare_data(self):
    CIFAR10(self.data_dir,train=True,download=True)
    CIFAR10(self.data_dir,train=False,download=True)

  def setup(self, stage=None):
    if stage=='fit' or stage is None:
      cifar_full=CIFAR10(self.data_dir,train=True,transform=self.transform)
      self.cifar_train,self.cifar_val=random_split(cifar_full,[45000,5000])

    if stage=='test' or stage is None:
      self.cifar_test=CIFAR10(self.data_dir,train=False,transform=self.transform)

  def train_dataloader(self):
    return DataLoader(self.cifar_train,batch_size=self.batch_size,shuffle=True,num_workers=2)

  def val_dataloader(self):
    return DataLoader(self.cifar_val,batch_size=self.batch_size,shuffle=False,num_workers=2)

  def test_dataloader(self):
    return DataLoader(self.cifar_test,batch_size=self.batch_size,shuffle=False,num_workers=2)

In [5]:
class CIFAR10LitModel(pl.LightningModule):
    def __init__(self, input_shape, num_classes, neurons_FC1=512, neurons_FC2=128, learning_rate=1e-3, optimizer='adam'):
      super().__init__()
      self.save_hyperparameters()
      self.input_shape=input_shape
      self.learning_rate=learning_rate
      self.optimizer_choice = optimizer
      self.neurons_FC1 = neurons_FC1
      self.neurons_FC2 = neurons_FC2

      # model architecture
      self.conv1=nn.Conv2d(3,32,3,1)
      self.conv2=nn.Conv2d(32,32,3,1)
      self.conv3=nn.Conv2d(32,64,3,1)
      self.conv4=nn.Conv2d(64,64,3,1)
      self.pool1=nn.MaxPool2d(2)
      self.pool2=nn.MaxPool2d(2)

      n_sizes = self._get_output_shape(input_shape)
      self.fc1=nn.Linear(n_sizes, self.neurons_FC1)
      self.fc2=nn.Linear(self.neurons_FC1, self.neurons_FC2)
      self.fc3=nn.Linear(self.neurons_FC2, num_classes)

      self.train_acc=Accuracy(task='multiclass',num_classes=10)
      self.val_acc=Accuracy(task='multiclass',num_classes=10)
      self.test_acc=Accuracy(task='multiclass',num_classes=10)


    def _get_output_shape(self, shape):
          '''returns the size of the output tensor from the conv layers'''
          batch_size = 1
          input = torch.autograd.Variable(torch.rand(batch_size, *shape))
          output_feat = self._feature_extractor(input)
          n_size = output_feat.data.view(batch_size, -1).size(1)
          return n_size


  # conv1,relu, conv2,relu, maxpool,conv3,relu,conv4,relu,maxpool
    def _feature_extractor(self,x):
      x=F.relu(self.conv1(x))
      x=self.pool1(F.relu(self.conv2(x)))
      x=F.relu(self.conv3(x))
      x=self.pool2(F.relu(self.conv4(x)))
      return x


    def forward(self,x):
      x=self._feature_extractor(x)
      x=x.view(x.size(0),-1)
      x=F.relu(self.fc1(x))
      x=F.relu(self.fc2(x))
      x=F.log_softmax(self.fc3(x),dim=1)
      return x

    def training_step(self, batch, batch_idx):
      x, y = batch
      logits = self(x)
      loss = F.nll_loss(logits, y)
      # metric
      preds = torch.argmax(logits, dim=1)
      acc = self.train_acc(preds, y)
      self.log('train_loss', loss, on_step=True, on_epoch=True, logger=True)
      self.log('train_acc', acc, on_step=True, on_epoch=True, logger=True)
      return loss

    # validation loop
    def validation_step(self, batch, batch_idx):
      x, y = batch
      logits = self(x)
      loss = F.nll_loss(logits, y)
      preds = torch.argmax(logits, dim=1)
      acc = self.val_acc(preds, y)
      self.log('val_loss', loss, prog_bar=True)
      self.log('val_acc', acc, prog_bar=True)
      return loss

    # test loop
    def test_step(self,batch,batch_idx):
      x,y=batch
      logits=self(x)
      loss=F.nll_loss(logits,y)

      pred=torch.argmax(logits,dim=1)
      acc=self.test_acc(pred,y)
      self.log('test_loss',loss,on_epoch=True)
      self.log('test_acc',acc,on_epoch=True)
      return loss

    def configure_optimizers(self):
      if self.optimizer_choice == 'adam':
            optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
      elif self.optimizer_choice == 'sgd':
            optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate, momentum=0.9)
      return optimizer


In [6]:
# class for visualizing one batch of validation images along with predicted and rall class label
class ImagePredictionLogger(pl.Callback):
    def __init__(self, val_samples, num_samples=32):
        super().__init__()
        self.val_imgs, self.val_labels = val_samples
        self.val_imgs = self.val_imgs[:num_samples]
        self.val_labels = self.val_labels[:num_samples]

    def on_validation_epoch_end(self, trainer, pl_module):
        val_imgs = self.val_imgs.to(device=pl_module.device)
        logits = pl_module(val_imgs)
        preds = torch.argmax(logits, 1)

        trainer.logger.experiment.log({
            "examples": [wandb.Image(x, caption=f"Pred:{pred}, Label:{y}")
                            for x, pred, y in zip(val_imgs, preds, self.val_labels)],
            "global_step": trainer.global_step
            })

In [7]:
# Instantiate the cifar and model
cifar = CifarDataModule(batch_size=32)
cifar.prepare_data()
cifar.setup()

# Grab samples to log predictions on
samples = next(iter(cifar.val_dataloader()))

Files already downloaded and verified
Files already downloaded and verified


In [8]:
wandb.login(key='c5e6ab417acd849694f2c0db0cb79412d14620a3')
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'val_acc',
        'goal': 'maximize'
    },
    'parameters': {
        'learning_rate': {
            'values': [1e-3, 1e-4, 1e-5]
        },
        'optimizer': {
            'values': ['adam', 'sgd']
        },
        'neurons_FC1': {
            'values': [256, 512]
        },
        'neurons_FC2': {
            'values': [128, 256]
        },
        'batch_size': {
            'values': [32, 64]
        }
    }
}


sweep_id = wandb.sweep(sweep_config, project="DeepLearning")


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33m736593829[0m ([33m736593829-budapest-university-of-technology-and-economics[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: gaz5ba63
Sweep URL: https://wandb.ai/736593829-budapest-university-of-technology-and-economics/DeepLearning/sweeps/gaz5ba63


In [9]:
### WandB, you have have an account(if you don't, create one)
def train_model():
    wandb.init()
    config=wandb.config
    wandb_logger = WandbLogger(project='DeepLearning', job_type='train', log_model="all")
    # instantiate classes
    dm = CifarDataModule(config.batch_size)
    dm.prepare_data()
    dm.setup()
    model = CIFAR10LitModel(
        input_shape=(3, 32, 32),
        num_classes=dm.num_classes,
        neurons_FC1=config.neurons_FC1,
        neurons_FC2=config.neurons_FC2,
        learning_rate=config.learning_rate,
        optimizer=config.optimizer
    )
    wandb_logger.watch(model)
    # Initialize Callbacks
    checkpoint_callback = pl.callbacks.ModelCheckpoint()
    early_stop_callback = pl.callbacks.EarlyStopping(monitor="val_acc", patience=3, verbose=False, mode="max")
    ### WandB
    trainer = pl.Trainer(max_epochs=5,
                     logger=wandb_logger,
                     callbacks=[checkpoint_callback, early_stop_callback,ImagePredictionLogger(samples)]
                    )
    # Train the model
    trainer.fit(model, dm)

    # Evaluate the model
    trainer.test(dataloaders=cifar.test_dataloader())
    # tell the WandB you have finished
    wandb.finish()
wandb.agent(sweep_id, function=train_model, count=10)

[34m[1mwandb[0m: Agent Starting Run: f7ytu3x5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 256
[34m[1mwandb[0m: 	optimizer: sgd


Files already downloaded and verified
Files already downloaded and verified


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 9.2 K  | train
2  | conv3     | Conv2d             | 18.5 K | train
3  | conv4     | Conv2d             | 36.9 K | train
4  | pool1     | MaxPool2d          | 0      | train
5  | pool2     | MaxPool2d          | 0      | train
6  | fc1       | Linear             | 819 K  | train
7  | fc2       | Linear             | 131 K  | train
8  | fc3       | Linear             | 2.6 K  | train
9  | train_acc | MulticlassAccuracy | 0      | train
10 | val_acc   | MulticlassAccuracy | 0      | train
11 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]



Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./DeepLearning/f7ytu3x5/checkpoints/epoch=3-step=2816.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./DeepLearning/f7ytu3x5/checkpoints/epoch=3-step=2816.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='31.540 MB of 31.540 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆█
global_step,▁▃▅▆█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▁▁▁
train_acc_step,▂▅▄▅▃▆▄▆▅▄▃▇▂▃▅▄▄▃▇▇▅▄▄▅▅▆▃▇▄█▅▂▇▇▂▂▅▆▁▃
train_loss_epoch,█▅▃▁
train_loss_step,▅▅▆▆▆▄▅▃▃▅▅▃▄▅▆▂▆▅▄▇▅▂▆▃▂▂▅▃▄▆▇▄▁▆▅▅▄█▅▄
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇████
val_acc,▁▁▁▁

0,1
epoch,4.0
global_step,2816.0
test_acc,0.1
test_loss,2.3032
train_acc_epoch,0.09982
train_acc_step,0.10938
train_loss_epoch,2.30324
train_loss_step,2.29974
trainer/global_step,2816.0
val_acc,0.1016


[34m[1mwandb[0m: Agent Starting Run: gmzheatd with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons_FC1: 256
[34m[1mwandb[0m: 	neurons_FC2: 128
[34m[1mwandb[0m: 	optimizer: sgd


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112871422220552, max=1.0…

Files already downloaded and verified
Files already downloaded and verified


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 9.2 K  | train
2  | conv3     | Conv2d             | 18.5 K | train
3  | conv4     | Conv2d             | 36.9 K | train
4  | pool1     | MaxPool2d          | 0      | train
5  | pool2     | MaxPool2d          | 0      | train
6  | fc1       | Linear             | 409 K  | train
7  | fc2       | Linear             | 32.9 K | train
8  | fc3       | Linear             | 1.3 K  | train
9  | train_acc | MulticlassAccuracy | 0      | train
10 | val_acc   | MulticlassAccuracy | 0      | train
11 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./DeepLearning/gmzheatd/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./DeepLearning/gmzheatd/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='19.957 MB of 19.957 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▇▇▇▇▇▇▇█
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▂▅▇█
train_acc_step,▂▂▂▃▂▂▂▁▂▄▃▃▅▄▅▅▄▅▆▅▅▅▆▅▆▅▅▆▅▄▆▆▄█▅▇▆▆▆▇
train_loss_epoch,██▄▂▁
train_loss_step,██████████████▆▆▅▅▅▆▄▃▄▆▂▃▄▃▄▃▃▂▃▂▂▂▃▁▃▂
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇███
val_acc,▁▄▆▇█

0,1
epoch,5.0
global_step,7035.0
test_acc,0.4534
test_loss,1.47668
train_acc_epoch,0.43047
train_acc_step,0.53125
train_loss_epoch,1.54622
train_loss_step,1.47443
trainer/global_step,7035.0
val_acc,0.444


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yrlg9gdx with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 128
[34m[1mwandb[0m: 	optimizer: sgd


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 9.2 K  | train
2  | conv3     | Conv2d             | 18.5 K | train
3  | conv4     | Conv2d             | 36.9 K | train
4  | pool1     | MaxPool2d          | 0      | train
5  | pool2     | MaxPool2d          | 0      | train
6  | fc1       | Linear             | 819 K  | train
7  | fc2       | Linear             | 65.7 K | train
8  | fc3       | Linear             | 1.3 K  | train
9  | train_acc | MulticlassAccuracy | 0      | train
10 | val_acc   | MulticlassAccuracy | 0      | train
11 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
952 K     Trainable params
0         Non-trainable params
952 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./DeepLearning/yrlg9gdx/checkpoints/epoch=3-step=5628.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./DeepLearning/yrlg9gdx/checkpoints/epoch=3-step=5628.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='29.496 MB of 29.496 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆▆▆▆▆███████████
global_step,▁▃▅▆█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▁▁▁
train_acc_step,▅▆█▆▄▃▂▄▆▆▂▃▃▄▆▅▃▄▃▁▅▃▂▆█▃▅▄▆▃▇▇▄▄▃▃▃▅▇▃
train_loss_epoch,█▆▃▁
train_loss_step,▃▅▄▄▂▃▄▅▇▆▄▅▅▇▂▄▅▂▃▃█▅▅▃▅▅▃▅▇▄▄▅▅▅▄▅▄▁▄▅
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇████
val_acc,▁▁▁▁

0,1
epoch,4.0
global_step,5628.0
test_acc,0.1
test_loss,2.30318
train_acc_epoch,0.1002
train_acc_step,0.09375
train_loss_epoch,2.30319
train_loss_step,2.30314
trainer/global_step,5628.0
val_acc,0.0982


[34m[1mwandb[0m: Agent Starting Run: 1ce1d44e with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 256
[34m[1mwandb[0m: 	optimizer: sgd


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 9.2 K  | train
2  | conv3     | Conv2d             | 18.5 K | train
3  | conv4     | Conv2d             | 36.9 K | train
4  | pool1     | MaxPool2d          | 0      | train
5  | pool2     | MaxPool2d          | 0      | train
6  | fc1       | Linear             | 819 K  | train
7  | fc2       | Linear             | 131 K  | train
8  | fc3       | Linear             | 2.6 K  | train
9  | train_acc | MulticlassAccuracy | 0      | train
10 | val_acc   | MulticlassAccuracy | 0      | train
11 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./DeepLearning/1ce1d44e/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./DeepLearning/1ce1d44e/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='39.395 MB of 39.395 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆██████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▄▆▇█
train_acc_step,▂▁▂▂▃▁▂▂▃▂▂▃▂▄▂▅▅▆▄▅▄▆▅▅▅▆▅▆▆▆▅▆█▅▄▇▆▅▅▆
train_loss_epoch,█▆▃▂▁
train_loss_step,█████████▇▅▆▇▅▆▅▃▃▅▃▄▃▃▂▄▄▅▃▃▃▁▁▃▂▄▂▂▃▂▂
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇█████
val_acc,▁▄▆▇█

0,1
epoch,5.0
global_step,7035.0
test_acc,0.4887
test_loss,1.40328
train_acc_epoch,0.4652
train_acc_step,0.5
train_loss_epoch,1.47253
train_loss_step,1.19771
trainer/global_step,7035.0
val_acc,0.4988


[34m[1mwandb[0m: Agent Starting Run: twafmxi5 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 256
[34m[1mwandb[0m: 	optimizer: sgd


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 9.2 K  | train
2  | conv3     | Conv2d             | 18.5 K | train
3  | conv4     | Conv2d             | 36.9 K | train
4  | pool1     | MaxPool2d          | 0      | train
5  | pool2     | MaxPool2d          | 0      | train
6  | fc1       | Linear             | 819 K  | train
7  | fc2       | Linear             | 131 K  | train
8  | fc3       | Linear             | 2.6 K  | train
9  | train_acc | MulticlassAccuracy | 0      | train
10 | val_acc   | MulticlassAccuracy | 0      | train
11 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./DeepLearning/twafmxi5/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./DeepLearning/twafmxi5/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='39.395 MB of 39.395 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▃▃▃▃▃▃▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆███████████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▂▄▆█
train_acc_step,▂▁▁▁▂▃▁▂▂▂▃▅▄▄▂▃▅▁▆▅▆▃▄▅▅▅▅▅▅█▇▇▆▆▄█▇▆▆█
train_loss_epoch,██▆▃▁
train_loss_step,█████████████████▆▅▅▆▅▄▅▄▅▃▅▅▄▂▃▄▄▃▆▄▄▄▁
trainer/global_step,▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇█
val_acc,▁▂▅▇█

0,1
epoch,5.0
global_step,7035.0
test_acc,0.4434
test_loss,1.53342
train_acc_epoch,0.4084
train_acc_step,0.53125
train_loss_epoch,1.6297
train_loss_step,1.37416
trainer/global_step,7035.0
val_acc,0.4312


[34m[1mwandb[0m: Agent Starting Run: 4mo0jlbk with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 256
[34m[1mwandb[0m: 	optimizer: sgd


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 9.2 K  | train
2  | conv3     | Conv2d             | 18.5 K | train
3  | conv4     | Conv2d             | 36.9 K | train
4  | pool1     | MaxPool2d          | 0      | train
5  | pool2     | MaxPool2d          | 0      | train
6  | fc1       | Linear             | 819 K  | train
7  | fc2       | Linear             | 131 K  | train
8  | fc3       | Linear             | 2.6 K  | train
9  | train_acc | MulticlassAccuracy | 0      | train
10 | val_acc   | MulticlassAccuracy | 0      | train
11 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./DeepLearning/4mo0jlbk/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./DeepLearning/4mo0jlbk/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='39.395 MB of 39.395 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▃▃▃▃▃▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆██████████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▂▄▇█
train_acc_step,▂▂▂▂▂▂▂▂▁▂▃▃▃▂▄▅▅▄▄▃▆▂▅▄▇▄▄▆▄▇▆█▅▅▇▇█▇▆▆
train_loss_epoch,██▅▂▁
train_loss_step,████████████████████▇▇▆▆▅▄▅▆▇▄▂▄▃▁▃▁▃▃▃▃
trainer/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇█████
val_acc,▁▃▆▇█

0,1
epoch,5.0
global_step,7035.0
test_acc,0.4474
test_loss,1.49413
train_acc_epoch,0.43298
train_acc_step,0.375
train_loss_epoch,1.54357
train_loss_step,1.58432
trainer/global_step,7035.0
val_acc,0.4512


[34m[1mwandb[0m: Agent Starting Run: jpblxbaf with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 256
[34m[1mwandb[0m: 	optimizer: sgd


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 9.2 K  | train
2  | conv3     | Conv2d             | 18.5 K | train
3  | conv4     | Conv2d             | 36.9 K | train
4  | pool1     | MaxPool2d          | 0      | train
5  | pool2     | MaxPool2d          | 0      | train
6  | fc1       | Linear             | 819 K  | train
7  | fc2       | Linear             | 131 K  | train
8  | fc3       | Linear             | 2.6 K  | train
9  | train_acc | MulticlassAccuracy | 0      | train
10 | val_acc   | MulticlassAccuracy | 0      | train
11 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./DeepLearning/jpblxbaf/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./DeepLearning/jpblxbaf/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='39.395 MB of 39.395 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▃▃▃▃▃▃▃▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆███████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▃▆▇█
train_acc_step,▂▃▃▃▁▂▂▄▄▃▄▄▅▃▆▄▄▅▄▄▄▆▅▄▅▆▅▅▃▅▆▆▄▇▅█▅▅▅▆
train_loss_epoch,█▇▄▂▁
train_loss_step,██████████████▇▅▅▅▄▅▅▄▅▅▅▆▄▄▄▄▄▄▃▅▂▃▁▃▆▃
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██
val_acc,▁▄▇▇█

0,1
epoch,5.0
global_step,7035.0
test_acc,0.4426
test_loss,1.49721
train_acc_epoch,0.43813
train_acc_step,0.4375
train_loss_epoch,1.53591
train_loss_step,1.39724
trainer/global_step,7035.0
val_acc,0.4496


[34m[1mwandb[0m: Agent Starting Run: 908nyd3i with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 256
[34m[1mwandb[0m: 	optimizer: sgd


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 9.2 K  | train
2  | conv3     | Conv2d             | 18.5 K | train
3  | conv4     | Conv2d             | 36.9 K | train
4  | pool1     | MaxPool2d          | 0      | train
5  | pool2     | MaxPool2d          | 0      | train
6  | fc1       | Linear             | 819 K  | train
7  | fc2       | Linear             | 131 K  | train
8  | fc3       | Linear             | 2.6 K  | train
9  | train_acc | MulticlassAccuracy | 0      | train
10 | val_acc   | MulticlassAccuracy | 0      | train
11 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./DeepLearning/908nyd3i/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./DeepLearning/908nyd3i/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='39.395 MB of 39.395 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▃▃▃▃▃▃▃▃▃▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆██████████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▃▅▇█
train_acc_step,▁▂▂▂▁▃▁▁▂▂▂▃▁▅▃▃▄▃▁▄▄▄▃▅▃▅▄▅▃▇▃▆▅▆▆▅▅▆█▄
train_loss_epoch,█▇▄▂▁
train_loss_step,█████████▇▅▇█▇▆▆▅▆▆▅▅▇▄▄▄▃▄▅▂▁▃▃▃▃▄▃▂▁▁▄
trainer/global_step,▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
val_acc,▁▃▅▇█

0,1
epoch,5.0
global_step,7035.0
test_acc,0.4799
test_loss,1.42514
train_acc_epoch,0.45431
train_acc_step,0.3125
train_loss_epoch,1.50118
train_loss_step,1.59683
trainer/global_step,7035.0
val_acc,0.4744


[34m[1mwandb[0m: Agent Starting Run: 9u4m35mk with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 256
[34m[1mwandb[0m: 	optimizer: sgd


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 9.2 K  | train
2  | conv3     | Conv2d             | 18.5 K | train
3  | conv4     | Conv2d             | 36.9 K | train
4  | pool1     | MaxPool2d          | 0      | train
5  | pool2     | MaxPool2d          | 0      | train
6  | fc1       | Linear             | 819 K  | train
7  | fc2       | Linear             | 131 K  | train
8  | fc3       | Linear             | 2.6 K  | train
9  | train_acc | MulticlassAccuracy | 0      | train
10 | val_acc   | MulticlassAccuracy | 0      | train
11 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./DeepLearning/9u4m35mk/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./DeepLearning/9u4m35mk/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='39.395 MB of 39.395 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▇▇▇▇▇▇▇▇█
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▂▅▇█
train_acc_step,▂▂▁▂▃▁▁▁▁▃▃▂▂▃▁▃▂▂▅▄▃▃▅▃▆▄▆▄▅▅▆▆▅▆▅▆▆▇█▅
train_loss_epoch,██▅▂▁
train_loss_step,███████████████▇▇▇▇▆▅▆▅▆▆▃▇▆▅▄▅▆▄▄▂▃▃▃▁▄
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
val_acc,▁▃▆▇█

0,1
epoch,5.0
global_step,7035.0
test_acc,0.4475
test_loss,1.48671
train_acc_epoch,0.42756
train_acc_step,0.40625
train_loss_epoch,1.55254
train_loss_step,1.66735
trainer/global_step,7035.0
val_acc,0.4502


[34m[1mwandb[0m: Agent Starting Run: atp7piqs with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 256
[34m[1mwandb[0m: 	optimizer: sgd


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 9.2 K  | train
2  | conv3     | Conv2d             | 18.5 K | train
3  | conv4     | Conv2d             | 36.9 K | train
4  | pool1     | MaxPool2d          | 0      | train
5  | pool2     | MaxPool2d          | 0      | train
6  | fc1       | Linear             | 819 K  | train
7  | fc2       | Linear             | 131 K  | train
8  | fc3       | Linear             | 2.6 K  | train
9  | train_acc | MulticlassAccuracy | 0      | train
10 | val_acc   | MulticlassAccuracy | 0      | train
11 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./DeepLearning/atp7piqs/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./DeepLearning/atp7piqs/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='39.395 MB of 39.395 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆█████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▂▅▇█
train_acc_step,▃▁▂▂▁▂▁▂▃▂▂▃▂▃▃▁▃▃▇▆▇▅▆▆▃█▆▇█▇▅▇▆▆▆▆▆▆▆▇
train_loss_epoch,██▅▂▁
train_loss_step,██████████████████▇▇▅▆▆▅▃▄▆▃▂▃▃▆▂▃▅▄▂▃▁▅
trainer/global_step,▁▁▂▂▂▂▂▂▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇███
val_acc,▁▃▆▇█

0,1
epoch,5.0
global_step,7035.0
test_acc,0.455
test_loss,1.47932
train_acc_epoch,0.43027
train_acc_step,0.4375
train_loss_epoch,1.54904
train_loss_step,1.35648
trainer/global_step,7035.0
val_acc,0.4638


In [10]:
# Start training the model
train_model()

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112658955557384, max=1.0…

Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory ./DeepLearning/atp7piqs/checkpoints exists and is not empty.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 9.2 K  | train
2  | conv3     | Conv2d             | 18.5 K | train
3  | conv4     | Conv2d             | 36.9 K | train
4  | pool1     | MaxPool2d          | 0      | train
5  | pool2     | MaxPool2d          | 0      | train
6  | fc1       | Linear             | 819 K  | train
7  | fc2       | Linear             | 131 K  | train
8  | fc3       | Linear             | 2.6 K  | train
9  | train_acc | MulticlassAccuracy | 0      | train
10 | val_acc   | MulticlassAccuracy | 0      | train

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./DeepLearning/atp7piqs/checkpoints/epoch=4-step=7035-v1.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./DeepLearning/atp7piqs/checkpoints/epoch=4-step=7035-v1.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='39.395 MB of 39.395 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▃▃▃▃▃▃▅▅▅▅▅▅▆▆▆▆▆▆▆▆██████████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▃▅▇█
train_acc_step,▁▁▁▁▃▁▂▂▄▃▂▃▂▁▂▅▅▃▄▅▆▁▆▅▅▃▅▅▆▅▆█▆▇▄▆█▇▇▇
train_loss_epoch,█▇▅▃▁
train_loss_step,██████████████▇█▇▅▇▆▆▇▆▄▆▆▅▄▅▄▄▄▄▄▃▄▄▁▂▄
trainer/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▇▇▇▇▇▇▇███
val_acc,▁▄▅▇█

0,1
epoch,5.0
global_step,7035.0
test_acc,0.465
test_loss,1.48013
train_acc_epoch,0.42964
train_acc_step,0.4375
train_loss_epoch,1.56399
train_loss_step,1.70069
trainer/global_step,7035.0
val_acc,0.4584


In [11]:
### TASK OF THE STUDENT

# extend WandB.ai integration in the code with sweeps
# (e.g. add variables like learning rate, optimizer, neurons_FC1, neurons_FC2)
# help: https://docs.wandb.ai/guides/sweeps and
#       https://github.com/wandb/wandb/issues/5003
# store the hyperparameters and val_acc to wandb
# run at least 10 trainings
# in wandb.ai, export the result of the runs as a .csv file,
# in wandb.ai, create a report from the sweep results and share it by submitting
# the link in Moodle.