In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
%%capture
!pip install libauc==1.2.0
!pip install medmnist
!pip install torchio

In [3]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms

import medmnist
from medmnist import INFO, Evaluator

from libauc.models import resnet18
from libauc.sampler import DualSampler
from libauc.metrics import auc_prc_score

import torchvision.transforms as transforms
from torch.utils.data import Dataset
from PIL import Image

from torchio import Image



In [4]:
print(f"MedMNIST v{medmnist.__version__} @ {medmnist.HOMEPAGE}")

MedMNIST v2.2.1 @ https://github.com/MedMNIST/MedMNIST/


In [5]:
#data_flag = 'breastmnist'
#data_flag = 'pneumoniamnist'
#data_flag = 'chestmnist'
data_flag = 'nodulemnist3d'
#data_flag = 'adrenalmnist3d'
#data_flag = 'vesselmnist3d'

download = True

#NUM_EPOCHS = 3
BATCH_SIZE = 32
#lr = 0.001

info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])

DataClass = getattr(medmnist, info['python_class'])

print(info)
print(n_channels)
print(n_classes)
print(DataClass)

{'python_class': 'NoduleMNIST3D', 'description': 'The NoduleMNIST3D is based on the LIDC-IDRI, a large public lung nodule dataset, containing images from thoracic CT scans. The dataset is designed for both lung nodule segmentation and 5-level malignancy classification task. To perform binary classification, we categorize cases with malignancy level 1/2 into negative class and 4/5 into positive class, ignoring the cases with malignancy level 3. We split the source dataset with a ratio of 7:1:2 into training, validation and test set, and center-crop the spatially normalized images (with a spacing of 1mm×1mm×1mm) into 28×28×28.', 'url': 'https://zenodo.org/record/6496656/files/nodulemnist3d.npz?download=1', 'MD5': '8755a7e9e05a4d9ce80a24c3e7a256f3', 'task': 'binary-class', 'label': {'0': 'benign', '1': 'malignant'}, 'n_channels': 1, 'n_samples': {'train': 1158, 'val': 165, 'test': 310}, 'license': 'CC BY 4.0'}
1
2
<class 'medmnist.dataset.NoduleMNIST3D'>


In [6]:
from torchio.transforms import RandomAffine, RandomFlip, RandomNoise, RandomGamma


class ImageDataset(Dataset):
    def __init__(self, images, targets, image_size=28, crop_size=24, mode='train'):
        self.images = images
        self.targets = targets
        self.mode = mode
        
        self.transform_train = transforms.Compose([RandomAffine(scales=(0.9, 1.2),
                                                                degrees=15,
                                                                p=0.5),
#                                                    RandomAffine(),
                                                   RandomGamma(log_gamma=(-0.3, 0.3), p=0.5),
                                                   RandomFlip(p = 0.5),
                                                   RandomNoise(p=0.2),
                                                  ])
        self.transform_test = transforms.Compose([RandomAffine(p=0),
#                                                    RandomAffine(),
                                                   RandomFlip(p=0),
                                                   RandomNoise(p=0),
                                                  ])
        
#         increased_dataset = torch.utils.data.ConcatDataset([transformed_dataset,original])
        
#         self.transform_train = transforms.Compose([                                                
#                               transforms.ToTensor(),
# #                               transforms.RandomCrop((crop_size, crop_size, crop_size), padding=None),
#                               transforms.RandomHorizontalFlip(),
# #                               transforms.Resize((image_size, image_size, image_size)),
#                               ])
#         self.transform_test = transforms.Compose([
#                              transforms.ToTensor(),
# #                              transforms.Resize((image_size, image_size, image_size)),
# #                              transforms.Normalize(mean=[.5], std=[.5])
#                               ])
        
        
        # for loss function
        self.pos_indices = np.flatnonzero(targets==1)
        self.pos_index_map = {}
        for i, idx in enumerate(self.pos_indices):
            self.pos_index_map[idx] = i

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        target = self.targets[idx]
#         image = Image.fromarray(image.astype('uint8'))
        if self.mode == 'train':
           idx = self.pos_index_map[idx] if idx in self.pos_indices else -1
           image = self.transform_train(image)
        else:
           image = self.transform_test(image)
        return idx, image, target 

In [7]:
train_dataset = DataClass(split='train',  download=download)

# encapsulate data into dataloader form
#train_loader = data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)

val_dataset = DataClass(split='val',  download=download)
#train_loader_at_eval = data.DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=True)

test_dataset = DataClass(split='test',  download=download)
#test_loader = data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=True)

Downloading https://zenodo.org/record/6496656/files/nodulemnist3d.npz?download=1 to /root/.medmnist/nodulemnist3d.npz


100%|██████████| 29299364/29299364 [00:01<00:00, 15053979.84it/s]


Using downloaded and verified file: /root/.medmnist/nodulemnist3d.npz
Using downloaded and verified file: /root/.medmnist/nodulemnist3d.npz


In [8]:
train_images = np.array([np.asarray(image) for (image, target) in train_dataset])
train_labels = np.array([target for (image, target) in train_dataset])
print(type(train_images[0]))

val_images = np.array([np.asarray(image) for (image, target) in val_dataset])
val_labels = np.array([target for (image, target) in val_dataset])
print(type(val_images[0]))

test_images = np.array([np.asarray(image) for (image, target) in test_dataset])
test_labels = [target for (image, target) in test_dataset]
print(type(test_images[0]))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [9]:
batch_size = 32
sampling_rate = 0.5

trainSet = ImageDataset(train_images, train_labels)
trainSet_eval = ImageDataset(val_images, val_labels, mode='test')
testSet = ImageDataset(test_images, test_labels, mode='test')

sampler = DualSampler(dataset=trainSet, batch_size=batch_size, sampling_rate=sampling_rate)
train_loader = torch.utils.data.DataLoader(trainSet, batch_size=batch_size, sampler=sampler, num_workers=2)
train_loader_at_eval = torch.utils.data.DataLoader(trainSet_eval, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(testSet, batch_size=batch_size, shuffle=True, num_workers=2)

In [10]:
print(train_loader)

<torch.utils.data.dataloader.DataLoader object at 0x7c98e5c71f90>


In [11]:
x, y = train_dataset[0]

print(x.shape, y.shape)

(1, 28, 28, 28) (1,)


In [12]:
# frames = train_dataset.montage(length=20)
# frames[10]

In [13]:
lables = train_dataset.labels
cnt1 = 0
cnt0 = 0
for i in range(len(lables)):
  if lables[i][0] == 0:
    cnt0+=1
  else:
    cnt1+=1
print(cnt0)
print(cnt1)
print(cnt0*100/(cnt0+cnt1))

863
295
74.52504317789293


In [14]:
lables = test_dataset.labels
cnt1 = 0
cnt0 = 0
for i in range(len(lables)):
  if lables[i][0] == 0:
    cnt0+=1
  else:
    cnt1+=1
print(cnt0)
print(cnt1)
print(cnt0*100/(cnt0+cnt1))

246
64
79.35483870967742


In [15]:
from libauc.losses import AUCMLoss, CrossEntropyLoss
from libauc.optimizers import PESG, Adam
from libauc.models import densenet121 as DenseNet121


from PIL import Image
from torch.utils.data import Dataset
from sklearn.metrics import roc_auc_score

import torch
import torch.nn as nn
import torchvision.models as models
# from torch.optim import Adam

In [16]:
def set_all_seeds(SEED):
    # REPRODUCIBILITY
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [17]:
device = torch.device(0 if torch.cuda.is_available() else 'cpu')

In [18]:
# m2 = models.video.r3d_18(pretrained=True)
# m2

In [19]:
# paramaters
SEED = 123
BATCH_SIZE = 32
lr = 3e-4
weight_decay = 1e-5
num_classes=1

# model
model = models.video.r3d_18(pretrained=True)
input_channel = model.fc.in_features
#for param in model.parameters():
    #param.requires_grad = False
model.fc = nn.Sequential(nn.Linear(input_channel, 128),
                         nn.ReLU(),
#                          nn.Dropout(p=0.2),
                         nn.Linear(128, 32),
                         nn.ReLU(),
#                          nn.Dropout(p=0.1),
                         nn.Linear(32, num_classes),
                         nn.Sigmoid()
                         )


model = model.to(device)

# create a binary cross-entropy loss function
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# training
best_val_auc = 0 
for epoch in range(10):
    print("epoch: ", epoch+1)
    for idx, (index, data, labels) in enumerate(train_loader):
      train_data, train_labels = data, labels
      train_data=train_data.repeat(1,3,1,1,1)
      train_labels=train_labels.float()

      train_data, train_labels  = train_data.to(device), train_labels.to(device)
      # break
      y_pred = model(train_data.float())
      loss = criterion(y_pred, train_labels)
      #print("Training Loss= ", loss.item())
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      
    print(f'Validating epoch: {epoch+1} Loss: {loss.item()}')  
      # validation  
      #if idx % 100 == 0:
      #if (1):
    model.eval()
    with torch.no_grad():    
         test_pred = []
         test_true = [] 
         for jdx, (index, data, labels) in enumerate(train_loader_at_eval):
             test_data, test_labels = data, labels
             test_data=test_data.repeat(1,3,1,1,1)
             test_labels=test_labels.float()
             test_data = test_data.to(device)
             y_pred = model(test_data.float())
             test_pred.append(y_pred.cpu().detach().numpy())
             test_true.append(test_labels.numpy())
     
         test_true = np.concatenate(test_true)
         test_pred = np.concatenate(test_pred)
         val_auc_mean =  roc_auc_score(test_true, test_pred) 
         #model.train
         if best_val_auc < val_auc_mean:
            best_val_auc = val_auc_mean
            torch.save(model.state_dict(), 'ce_pretrained_model.pth')
         print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f'%(epoch, idx, val_auc_mean, best_val_auc) )

Downloading: "https://download.pytorch.org/models/r3d_18-b3b3357e.pth" to /root/.cache/torch/hub/checkpoints/r3d_18-b3b3357e.pth
100%|██████████| 127M/127M [00:00<00:00, 221MB/s]  


epoch:  1
Validating epoch: 1 Loss: 0.5094549655914307
Epoch=0, BatchID=52, Val_AUC=0.8161, Best_Val_AUC=0.8161
epoch:  2
Validating epoch: 2 Loss: 0.39538413286209106
Epoch=1, BatchID=52, Val_AUC=0.8879, Best_Val_AUC=0.8879
epoch:  3
Validating epoch: 3 Loss: 0.4484274983406067
Epoch=2, BatchID=52, Val_AUC=0.9021, Best_Val_AUC=0.9021
epoch:  4
Validating epoch: 4 Loss: 0.2285785675048828
Epoch=3, BatchID=52, Val_AUC=0.9094, Best_Val_AUC=0.9094
epoch:  5
Validating epoch: 5 Loss: 0.2461239993572235
Epoch=4, BatchID=52, Val_AUC=0.8910, Best_Val_AUC=0.9094
epoch:  6
Validating epoch: 6 Loss: 0.4676474332809448
Epoch=5, BatchID=52, Val_AUC=0.9148, Best_Val_AUC=0.9148
epoch:  7
Validating epoch: 7 Loss: 0.64267897605896
Epoch=6, BatchID=52, Val_AUC=0.8945, Best_Val_AUC=0.9148
epoch:  8
Validating epoch: 8 Loss: 0.26917675137519836
Epoch=7, BatchID=52, Val_AUC=0.8908, Best_Val_AUC=0.9148
epoch:  9
Validating epoch: 9 Loss: 0.19812816381454468
Epoch=8, BatchID=52, Val_AUC=0.9131, Best_Val_AU

In [20]:
    print(f'Testing ...')  
    PATH = 'ce_pretrained_model.pth' 
    model_state_dict = torch.load(PATH)
    #model = MyModel()  # Create an instance of your model
    model.load_state_dict(model_state_dict)  # Load the saved parameters into the model
    best_val_auc=0.0
    with torch.no_grad():    
         test_pred = []
         test_true = [] 
         for jdx, (index, data, labels) in enumerate(test_loader):
             test_data, test_labels = data, labels
             test_data=test_data.repeat(1,3,1,1,1)
             test_labels=test_labels.float()
             test_data = test_data.to(device)
             y_pred = model(test_data.float())
             test_pred.append(y_pred.cpu().detach().numpy())
             test_true.append(test_labels.numpy())
     
         test_true = np.concatenate(test_true)
         test_pred = np.concatenate(test_pred)
         val_auc_mean =  roc_auc_score(test_true, test_pred) 
        # model.train
        # if best_val_auc < val_auc_mean:
        #    best_val_auc = val_auc_mean
        #    #torch.save(model.state_dict(), 'ce_pretrained_model.pth')
         print('Val_AUC=%.4f'%( val_auc_mean))

Testing ...
Val_AUC=0.9367


In [21]:
from torch.optim.lr_scheduler import StepLR, CosineAnnealingLR, ReduceLROnPlateau, CosineAnnealingWarmRestarts
# from torch.optim import Adam

import warnings
warnings.filterwarnings('ignore')
# parameters
class_id = 1

# paramaters
SEED = 123
# BATCH_SIZE = 32 #[16, 32, 64, 128]
# #imratio = train_dataset.imratio
# lr = 0.05 # using smaller learning rate is better [0.001, 0.01, 0.05, 0.1]

# lrs = [0.001, 0.01, 0.05, 0.1]
lrs = [0.1]
#BATCH_SIZEs = [16, 32, 64, 128]
BATCH_SIZEs = [32]

epoch_decay = 2e-3
weight_decay = 1e-5
margin = 1.0

for lr in lrs:
    print("------------------lr: {}--------------------".format(lr))
    for BATCH_SIZE in BATCH_SIZEs:
        print("================ Batch Size: {} ===================".format(BATCH_SIZE))
        # model
        set_all_seeds(SEED)
        # model
        model = models.video.r3d_18(pretrained=False)
        input_channel = model.fc.in_features
        #for param in model.parameters():
            #param.requires_grad = False
        model.fc = nn.Sequential(nn.Linear(input_channel, 128),
                                 nn.ReLU(),
#                                  nn.Dropout(p=0.2),
                                 nn.Linear(128, 32),
                                 nn.ReLU(),
#                                  nn.Dropout(p=0.1),
                                 nn.Linear(32, num_classes),
                                 nn.Sigmoid()
                                 )


        model = model.to(device)

        #model = models.resnet18(pretrained=False)
        #
        #model.fc = nn.Sequential(
        #    nn.Linear(512, 1),
        #    nn.Sigmoid()
        #)
        #model = model.cuda()


        # load pretrained model
        if True:
          PATH = 'ce_pretrained_model.pth' 
          state_dict = torch.load(PATH)
          state_dict.pop('classifier.weight', None)
          state_dict.pop('classifier.bias', None) 
          model.load_state_dict(state_dict, strict=False)


        # define loss & optimizer
        loss_fn = AUCMLoss()
        optimizer = PESG(model, 
                         loss_fn=loss_fn, 
                         lr=lr, 
                         margin=margin, 
                         epoch_decay=epoch_decay, 
                         weight_decay=weight_decay)
        
#         optimizer = Adam(model.parameters(), lr = lr, weight_decay= weight_decay)
        lr_scheduler_opt = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1, verbose = True)
#         scheduler_1 = CosineAnnealingLR(optimizer, 20, eta_min = 1e-08, verbose = True)
        scheduler_2 = ReduceLROnPlateau(optimizer, patience=2,  verbose=True, factor=0.5, 
                              threshold=1e-04, min_lr=1e-08, mode = 'max')

        best_val_auc = 0
        for epoch in range(20):
          for idx, (index, data, labels) in enumerate(train_loader):
              train_data, train_labels = data, labels
              train_data=train_data.repeat(1,3,1,1,1)
              train_labels=train_labels.float()

              train_data, train_labels = train_data.to(device), train_labels.to(device)
              y_pred = model(train_data.float())
#               y_pred = torch.sigmoid(y_pred)
              loss = loss_fn(y_pred, train_labels)
              optimizer.zero_grad()
              loss.backward()
              optimizer.step()

              # validation
          #if idx % 400 == 0:
          print(f'Validating epoch: {epoch+1} Loss: {loss.item()}')
          model.eval()
          val_auc = 0
          with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx, (index, data, labels) in enumerate(train_loader_at_eval):
                    test_data, test_label = data, labels
                    test_data=test_data.repeat(1,3,1,1,1)
                    test_labels=test_labels.float()
                    test_data = test_data.to(device)
                    y_pred = model(test_data.float())
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_label.numpy())

                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc =  roc_auc_score(test_true, test_pred) 
                #model.train(
                if best_val_auc < val_auc:
                   best_val_auc = val_auc
                   torch.save(model.state_dict(), 'adrenal3d_resnet3d_pesg_0.1_SLR_RLRP.pth'.format(lr, BATCH_SIZE))

          print ('Epoch={}, BatchID={}, Val_AUC={} lr={}'.format(epoch, idx, val_auc, optimizer.lr))
          scheduler_2.step(val_auc)
          lr_scheduler_opt.step()
#           scheduler_1.step()
        print ('Best Val_AUC is %.4f'%best_val_auc)



------------------lr: 0.1--------------------
Adjusting learning rate of group 0 to 1.0000e-01.
Validating epoch: 1 Loss: 0.08442236483097076
Epoch=0, BatchID=52, Val_AUC=0.8981804103755323 lr=0.1
Adjusting learning rate of group 0 to 1.0000e-01.
Validating epoch: 2 Loss: 0.018581364303827286
Epoch=1, BatchID=52, Val_AUC=0.9012775842044135 lr=0.1
Adjusting learning rate of group 0 to 1.0000e-01.
Validating epoch: 3 Loss: 0.07650500535964966
Epoch=2, BatchID=52, Val_AUC=0.9132791327913279 lr=0.1
Adjusting learning rate of group 0 to 1.0000e-01.
Validating epoch: 4 Loss: 0.015698812901973724
Epoch=3, BatchID=52, Val_AUC=0.9076655052264808 lr=0.1
Adjusting learning rate of group 0 to 1.0000e-01.
Validating epoch: 5 Loss: 0.06704504042863846
Epoch=4, BatchID=52, Val_AUC=0.9063104916763454 lr=0.1
Adjusting learning rate of group 0 to 1.0000e-01.
Validating epoch: 6 Loss: 0.06935092061758041
Epoch=5, BatchID=52, Val_AUC=0.9134727061556329 lr=0.1
Adjusting learning rate of group 0 to 1.0000e-

In [23]:
print(f'Testing ...')  
# PATH = 'aucm_trained_model_0.0003_32_scheduler_20epochs.pth' 
PATH = '/kaggle/working/adrenal3d_resnet3d_pesg_0.1_SLR_RLRP.pth'
model_state_dict = torch.load(PATH)
#model = MyModel()  # Create an instance of your model
model.load_state_dict(model_state_dict)  # Load the saved parameters into the model
best_val_auc=0.0
with torch.no_grad():    
     test_pred = []
     test_true = [] 
     for jdx, (index, data, labels) in enumerate(test_loader):
         test_data, test_labels = data, labels
         test_data=test_data.repeat(1,3,1,1,1)
         test_labels=test_labels.float()
         test_data = test_data.to(device)
         y_pred = model(test_data.float())
         test_pred.append(y_pred.cpu().detach().numpy())
         test_true.append(test_labels.numpy())
 
     test_true = np.concatenate(test_true)
     test_pred = np.concatenate(test_pred)
     val_auc_mean =  roc_auc_score(test_true, test_pred) 
    # model.train
    # if best_val_auc < val_auc_mean:
    #    best_val_auc = val_auc_mean
    #    #torch.save(model.state_dict(), 'ce_pretrained_model.pth')
     print('Val_AUC=%.4f'%( val_auc_mean))

Testing ...
Val_AUC=0.9303
