<a href="https://colab.research.google.com/github/linshaochieh2019/test/blob/main/rsna_efficientnet3d_debug_val-loss-0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Debugging: using dumb_dataset and make sure the code is bug-free

In [2]:
#!pip install torchio
#!pip install pytorch-lightning
#!pip install git+https://github.com/shijianjian/EfficientNet-PyTorch-3D
#!pip install 'neptune-client[pytorch-lightning]'

In [3]:
# Mount to Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Import packages
import sys
import random
import pytorch_lightning as pl
from pytorch_lightning.metrics.classification import AUROC

import torch
from torch import nn
import torch.nn.functional as F
from torchmetrics import AUROC, Accuracy

from efficientnet_pytorch_3d import EfficientNet3D

# Setup seed
torch.manual_seed(0)
random.seed(0)

In [70]:
class Efficientnet3DModel(pl.LightningModule):
  def __init__(self, net, criterion, learning_rate, optimizer_class):
    super().__init__()
    self.lr = learning_rate
    self.net = net
    self.criterion = criterion
    self.optimizer_class = optimizer_class

  def configure_optimizers(self):
    optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
    return optimizer

  def prepare_batch(self, batch):
    # Get inputs
    input_flair = batch['FLAIR']['data']
    input_t1w = batch['T1w']['data']
    input_t1wce = batch['T1wCE']['data']
    input_t2w = batch['T2w']['data']

    # Concat inputs
    ls_input = [input_flair, input_t1w, input_t1wce, input_t2w]
    input_cat = torch.cat(ls_input, dim=4)

    # Get label
    label = batch['MGMT_value'].view(-1,1).float()
    return input_cat, label
    
  def infer_batch(self, batch):
    x, y = self.prepare_batch(batch)
    y_hat = self.net(x)
    return y_hat, y

  def training_step(self, batch, batch_idx):
    y_hat, y  = self.infer_batch(batch)
    logits = torch.sigmoid(y_hat)
    loss = self.criterion(logits, y)

    # calculate accuracy
    y_hat_bi = torch.where(logits>=0.5, 1, 0)
    acc = torch.sum(y_hat_bi == y).item() / (len(y) * 1.0)
    
    values = {'train_loss': loss, 
              'train_acc': acc
              }
    self.log_dict(values, prog_bar=True)
    return loss

  def validation_step(self, batch, batch_idx):
    y_hat, y  = self.infer_batch(batch)
    logits = torch.sigmoid(y_hat)
    loss = self.criterion(logits, y)
  
    # calculate accuracy
    y_hat_bi = torch.where(logits>=0.5, 1, 0)
    acc = torch.sum(y_hat_bi == y).item() / (len(y) * 1.0)

    values = {'val_loss': loss, 
              'val_acc': acc
              }
    self.log_dict(values, prog_bar=True)
    return loss

In [68]:
# load debug_loaders
loader_dir = '/content/drive/MyDrive/Colab Notebooks/RSNA/loaders/'
train_loader_path = loader_dir + 'debug_train_loader.pth'
val_loader_path = loader_dir + 'debug_val_loader.pth'
debug_train_loader = torch.load(train_loader_path)
debug_val_loader = torch.load(val_loader_path)

In [72]:
from neptune.new.integrations.pytorch_lightning import NeptuneLogger

neptune_logger = NeptuneLogger(
            api_key='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2ZWQzMTYwZC1iOTU5LTQxN2UtYTg2My1iNjkxN2NjNWU4NGMifQ==',
            project="linshaochieh2019/test", 
            name='lightning-run',  # Optional
        )

In [73]:
net = EfficientNet3D.from_name("efficientnet-b0", override_params={'num_classes': 1}, in_channels=1)

model = Efficientnet3DModel(
                          net=net,
                          criterion=torch.nn.BCELoss(),
                          learning_rate=1e-3,
                          optimizer_class=torch.optim.AdamW,
                          )

trainer = pl.Trainer(gpus=1,
                     #overfit_batches=0.1,
                     max_epochs=200,
                     logger=neptune_logger
                     )

trainer.fit(model, 
            train_dataloaders=debug_train_loader, 
            val_dataloaders=debug_val_loader)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


https://app.neptune.ai/linshaochieh2019/test/e/TES-8
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.



  | Name      | Type           | Params
---------------------------------------------
0 | net       | EfficientNet3D | 4.7 M 
1 | criterion | BCELoss        | 0     
---------------------------------------------
4.7 M     Trainable params
0         Non-trainable params
4.7 M     Total params
18.759    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  f"Your {mode}_dataloader has `shuffle=True`, it is best practice to turn"
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


# Experiment note


1.   Lower batch size from 8 to 1 - not converge.
2.   Lower input size to (8,1,64,64,64) - not converge.
3.   Lower batch size and input_size - not converge.
4.   Toydata with value range(0,1), also more ones to make it easier.
     >managed to reduce both train_loss and val_loss to zero!

