In [1]:
import torch
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import lightning as L
import numpy as np
from torchsummary import summary
from lightning.pytorch.loggers.tensorboard import TensorBoardLogger

from nn import NN
from utils import Parser as P

  warn(


In [2]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [9]:
config = P.read_config(config_file= "scripts/config/base.yml")

In [10]:
# Initialize VAE model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Initialize NN model
nn_config_file = config['model']['config']
classifier = NN(config_file= nn_config_file).to(device)

In [11]:
# Summarize the structure of the model
summary(classifier,(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1                 [-1, 3072]               0
            Linear-2                   [-1, 10]          30,730
Total params: 30,730
Trainable params: 30,730
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.02
Params size (MB): 0.12
Estimated Total Size (MB): 0.15
----------------------------------------------------------------


In [12]:
# Initialize optimizer
optimizer_type = config['trainer']['optimizer']['type']
optimizer_args = config['trainer']['optimizer']['args']
classifier.optimizer = getattr(torch.optim, optimizer_type)(classifier.parameters(), **optimizer_args)

In [13]:
# Initialize TensorBoard logger
logger = TensorBoardLogger("logs", name="nn_experiment")

In [14]:
# Define dataset and dataloader
train_transform = P.transforms(config['data']['train_transform'])

# Define dataset and dataloader
test_transform = P.transforms(config['data']['test_transform'])

In [17]:
# Load dataset and apply transforms
dataset_name = config['data']['dataset']
val_split = config['data']['val_split']
train_batch_size = config['data']['train_batch_size']
val_batch_size = config['data']['val_batch_size']
test_batch_size = config['data']['test_batch_size']
num_workers = config['data']['num_workers']

In [18]:
# Load CIFAR10 dataset
train_dataset = datasets.__dict__[dataset_name](root= 'data', train=True, download=True, transform=train_transform)
test_dataset = datasets.__dict__[dataset_name](root= 'data', train=False, download=True, transform=test_transform)
val_size = int(val_split * len(train_dataset))
train_size = len(train_dataset) - val_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=val_batch_size, shuffle=False, num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False, num_workers=num_workers)

Files already downloaded and verified
Files already downloaded and verified


In [19]:
# Initialize Lightning Trainer
trainer = L.Trainer(max_epochs=config['trainer']['max_epochs'],
                    logger=logger,
                    callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=3, verbose=True)],
                    default_root_dir="./checkpoints")

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [20]:
# Train the model
trainer.fit(classifier, train_loader, val_loader)
#trainer.fit(vae, train_loader, val_loader, ckpt_path="./logs/vae_experiment/version_0/checkpoints/vae_model(2).pth")

Missing logger folder: logs/nn_experiment

  | Name       | Type               | Params
--------------------------------------------------
0 | classifier | Sequential         | 30.7 K
1 | train_acc  | MulticlassAccuracy | 0     
2 | val_acc    | MulticlassAccuracy | 0     
3 | test_acc   | MulticlassAccuracy | 0     
--------------------------------------------------
30.7 K    Trainable params
0         Non-trainable params
30.7 K    Total params
0.123     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 2.140


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.092 >= min_delta = 0.0. New best score: 2.048


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 2.048. Signaling Trainer to stop.
`Trainer.fit` stopped: `max_epochs=5` reached.


In [21]:
trainer.test(classifier, dataloaders=test_loader,ckpt_path="best")

Restoring states from the checkpoint path at logs/nn_experiment/version_0/checkpoints/epoch=4-step=1565.ckpt
Loaded model weights from the checkpoint at logs/nn_experiment/version_0/checkpoints/epoch=4-step=1565.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 2.045199394226074, 'test_acc': 0.34529998898506165}]

In [22]:
%load_ext tensorboard

In [23]:
%tensorboard --logdir logs

Launching TensorBoard...