In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

import warnings
warnings.filterwarnings('ignore')

In [2]:
%%capture
!pip install libauc==1.2.0
!pip install medmnist
!pip install torchio

In [3]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms

import medmnist
from medmnist import INFO, Evaluator

from libauc.models import resnet18
from libauc.sampler import DualSampler
from libauc.metrics import auc_prc_score

import torchvision.transforms as transforms
from torch.utils.data import Dataset
from PIL import Image, ImageFilter
from torchio import Image

In [4]:
print(f"MedMNIST v{medmnist.__version__} @ {medmnist.HOMEPAGE}")

MedMNIST v2.2.1 @ https://github.com/MedMNIST/MedMNIST/


**CHESTTMNIST DATA**

In [5]:
#data_flag = 'breastmnist'
# data_flag = 'pneumoniamnist'
data_flag = 'chestmnist'

download = True

#NUM_EPOCHS = 3
# BATCH_SIZE = 128
#lr = 0.001

info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])

DataClass = getattr(medmnist, info['python_class'])

In [6]:
print(info)
print(n_channels)
print(n_classes)
print(DataClass)

{'python_class': 'ChestMNIST', 'description': 'The ChestMNIST is based on the NIH-ChestXray14 dataset, a dataset comprising 112,120 frontal-view X-Ray images of 30,805 unique patients with the text-mined 14 disease labels, which could be formulized as a multi-label binary-class classification task. We use the official data split, and resize the source images of 1×1024×1024 into 1×28×28.', 'url': 'https://zenodo.org/record/6496656/files/chestmnist.npz?download=1', 'MD5': '02c8a6516a18b556561a56cbdd36c4a8', 'task': 'multi-label, binary-class', 'label': {'0': 'atelectasis', '1': 'cardiomegaly', '2': 'effusion', '3': 'infiltration', '4': 'mass', '5': 'nodule', '6': 'pneumonia', '7': 'pneumothorax', '8': 'consolidation', '9': 'edema', '10': 'emphysema', '11': 'fibrosis', '12': 'pleural', '13': 'hernia'}, 'n_channels': 1, 'n_samples': {'train': 78468, 'val': 11219, 'test': 22433}, 'license': 'CC BY 4.0'}
1
14
<class 'medmnist.dataset.ChestMNIST'>


In [7]:
from torchio.transforms import RandomAffine, RandomFlip, RandomNoise, RandomGamma
class ImageDataset(Dataset):
    def __init__(self, images, targets, image_size=28, crop_size=24, mode='train', kernel_size=3):
        self.images = images.astype(np.uint8)
        self.targets = targets
        self.mode = mode

        self.transform_train = transforms.Compose([
                                                    transforms.ToTensor(),
                                                    transforms.Resize((image_size, image_size)),
                                                    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.2), shear=0.2),
                                                    transforms.RandomApply([transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1)], p=0.5),
                                                    transforms.RandomHorizontalFlip(p=0.5),
#                                                     transforms.RandomVerticalFlip(p=0.5),
                                                    transforms.RandomApply([transforms.GaussianBlur(kernel_size=3)], p=0.2),
                                                ])

        self.transform_test = transforms.Compose([
                             transforms.ToTensor(),
                             #transforms.GaussianBlur(kernel_size=(kernel_size, kernel_size), sigma=(0.1, 2.0)),
                             #transforms.BilateralFilter(diameter=5, sigma_color=0.1, sigma_space=15),
#                              transforms.Resize((image_size, image_size)),
                              ])
        
        
        # for loss function
        self.pos_indices = np.flatnonzero(targets==1)
        self.pos_index_map = {}
        for i, idx in enumerate(self.pos_indices):
            self.pos_index_map[idx] = i

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        target = self.targets[idx]
        image = Image.fromarray(image.astype('uint8'))
        #image = torch.from_numpy(image).unsqueeze(0) 
        if self.mode == 'train':
           idx = self.pos_index_map[idx] if idx in self.pos_indices else -1
           #image = image.filter(ImageFilter.GaussianBlur(radius=3))
           #image = image.filter(ImageFilter.BLUR)
           #image = image.filter(ImageFilter.UnsharpMask(radius=3, percent=150, threshold=3))

           image = self.transform_train(image)
        else:
           #image = image.filter(ImageFilter.GaussianBlur(radius=3))
           #image = image.filter(ImageFilter.BLUR)
           #image = image.filter(ImageFilter.UnsharpMask(radius=3, percent=150, threshold=3))
           image = self.transform_test(image)
        return idx, image, target 

In [8]:
train_dataset = DataClass(split='train', download=download)
print(train_dataset[0])
val_dataset = DataClass(split='val', download=download)
print(train_dataset[0])
test_dataset = DataClass(split='test', download=download)
print(test_dataset[0])

Downloading https://zenodo.org/record/6496656/files/chestmnist.npz?download=1 to /root/.medmnist/chestmnist.npz


100%|██████████| 82802576/82802576 [00:03<00:00, 20786097.96it/s]


(<PIL.Image.Image image mode=L size=28x28 at 0x7D9BC388AA10>, array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
Using downloaded and verified file: /root/.medmnist/chestmnist.npz
(<PIL.Image.Image image mode=L size=28x28 at 0x7D9BC388AA10>, array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
Using downloaded and verified file: /root/.medmnist/chestmnist.npz
(<PIL.Image.Image image mode=L size=28x28 at 0x7D9BC388AA10>, array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))


In [9]:
train_images = np.array([np.asarray(image) for (image, target) in train_dataset])
train_labels = np.array([target for (image, target) in train_dataset])
print(type(train_images[0]))

<class 'numpy.ndarray'>


In [10]:
val_images = np.array([np.asarray(image) for (image, target) in val_dataset])
val_labels = [target for (image, target) in val_dataset]
print(type(val_images[0]))

<class 'numpy.ndarray'>


In [11]:
test_images = np.array([np.asarray(image) for (image, target) in test_dataset])
test_labels = [target for (image, target) in test_dataset]
print(type(test_images[0]))

<class 'numpy.ndarray'>


In [12]:
batch_size = 512
sampling_rate = 0.5

trainSet = ImageDataset(train_images, train_labels)
trainSet_eval = ImageDataset(val_images, val_labels, mode='test')
testSet = ImageDataset(test_images, test_labels, mode='test')

sampler = DualSampler(trainSet, batch_size, sampling_rate=sampling_rate)
train_loader = torch.utils.data.DataLoader(trainSet, batch_size=batch_size, sampler=sampler, num_workers=2)
train_loader_at_eval = torch.utils.data.DataLoader(trainSet_eval, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(testSet, batch_size=batch_size, shuffle=True, num_workers=2)

In [13]:
print(train_dataset)
print("===================")
print(test_dataset)

Dataset ChestMNIST (chestmnist)
    Number of datapoints: 78468
    Root location: /root/.medmnist
    Split: train
    Task: multi-label, binary-class
    Number of channels: 1
    Meaning of labels: {'0': 'atelectasis', '1': 'cardiomegaly', '2': 'effusion', '3': 'infiltration', '4': 'mass', '5': 'nodule', '6': 'pneumonia', '7': 'pneumothorax', '8': 'consolidation', '9': 'edema', '10': 'emphysema', '11': 'fibrosis', '12': 'pleural', '13': 'hernia'}
    Number of samples: {'train': 78468, 'val': 11219, 'test': 22433}
    Description: The ChestMNIST is based on the NIH-ChestXray14 dataset, a dataset comprising 112,120 frontal-view X-Ray images of 30,805 unique patients with the text-mined 14 disease labels, which could be formulized as a multi-label binary-class classification task. We use the official data split, and resize the source images of 1×1024×1024 into 1×28×28.
    License: CC BY 4.0
Dataset ChestMNIST (chestmnist)
    Number of datapoints: 22433
    Root location: /root/.medm

In [14]:
#print(train_dataset[0][0].shape)

# # montage
# train_dataset.montage(length=20)

**Checking Imbalance in Dataset**

In [26]:
from libauc.losses import AUCMLoss, CrossEntropyLoss, CompositionalAUCLoss
from libauc.optimizers import PESG, Adam
# from libauc.models import densenet121 as DenseNet121


from PIL import Image
from torch.utils.data import Dataset
from sklearn.metrics import roc_auc_score

import torch
import torch.nn as nn
import torchvision.models as models
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR

In [15]:
#configure GPU
device = torch.device(0 if torch.cuda.is_available() else 'cpu')

In [16]:
import warnings
warnings.filterwarnings("ignore")

In [17]:
import os, random
def set_all_seeds(SEED):
    # REPRODUCIBILITY
    torch.manual_seed(SEED)
    #torch.cuda.manual_seed(SEED)
    np.random.seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    #os.environ["PYTHONHASHSEED"]=str(SEED)
    #random.seed(SEED)
    #torch.use_deterministic_algorithms(True)

In [18]:
m = models.resnet18()
m

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [23]:
# paramaters
SEED = 123
BATCH_SIZE = 512
lr = 3e-4
weight_decay = 1e-5
num_classes=14

# model
model = models.resnet18(pretrained=True)
# model = models.resnet18(pretrained=False)

input_channel = model.fc.in_features

model.fc = nn.Sequential(nn.Linear(input_channel, 256),
                         nn.ReLU(),
#                          nn.Dropout(p=0.2),
                         nn.Linear(256, 128),
                         nn.ReLU(),
#                          nn.Dropout(p=0.1),
                         nn.Linear(128, num_classes),
#                          nn.LogSoftmax()
                        )


model = model.to(device)

# create a binary cross-entropy loss function
CELoss = CrossEntropyLoss()
# NLLLoss = nn.NLLLoss()
optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = ReduceLROnPlateau(optimizer, patience=3,  verbose=True, factor=0.5, 
                              threshold=0.0001, min_lr=1e-8, mode = 'max')

# training
best_val_auc = 0 
for epoch in range(20):
    print("epoch=",epoch)
    for idx, (index, data, labels) in enumerate(train_loader):
      #print("idx=",idx) 
      train_data, train_labels = data.to(device), labels.to(device)
      train_data=train_data.repeat(1,3,1,1)
      y_pred = model(train_data)
#       print(y_pred)
#       print(train_labels.float())
      loss = CELoss(y_pred, train_labels.float())
      #print("Training Loss= ", loss.item())
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      
    print(f'Validating epoch: {epoch+1} Loss: {loss.item()}')  
    model.eval()
    val_auc_mean = 0

    with torch.no_grad():    
         val_pred = []
         val_true = [] 
         for jdx, (index, data, targets) in enumerate(train_loader_at_eval):
             val_data, val_labels = data.to(device), targets.to(device)
             val_data=val_data.repeat(1,3,1,1)

             y_pred = model(val_data)
             val_pred.append(y_pred.cpu().detach().numpy())
             val_true.append(val_labels.cpu().detach().numpy())
     
         val_true = np.concatenate(val_true)
         val_pred = np.concatenate(val_pred)
         #val_pred = [1 if x > 0.5 else 0 for x in val_pred]
         val_auc_mean =  roc_auc_score(val_true, val_pred) 
         
         if best_val_auc < val_auc_mean:
            best_val_auc = val_auc_mean
            torch.save(model.state_dict(), 'ce_bmnist_pretrained_model.pth')
         print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f'%(epoch, idx, val_auc_mean, best_val_auc) )
    scheduler.step(val_auc_mean)

epoch= 0
Validating epoch: 1 Loss: 0.22213214635849
Epoch=0, BatchID=274, Val_AUC=0.7004, Best_Val_AUC=0.7004
epoch= 1
Validating epoch: 2 Loss: 0.21465912461280823
Epoch=1, BatchID=274, Val_AUC=0.7074, Best_Val_AUC=0.7074
epoch= 2
Validating epoch: 3 Loss: 0.2101481705904007
Epoch=2, BatchID=274, Val_AUC=0.7205, Best_Val_AUC=0.7205
epoch= 3
Validating epoch: 4 Loss: 0.2197813093662262
Epoch=3, BatchID=274, Val_AUC=0.7272, Best_Val_AUC=0.7272
epoch= 4
Validating epoch: 5 Loss: 0.20240944623947144
Epoch=4, BatchID=274, Val_AUC=0.7352, Best_Val_AUC=0.7352
epoch= 5
Validating epoch: 6 Loss: 0.2080138474702835
Epoch=5, BatchID=274, Val_AUC=0.7322, Best_Val_AUC=0.7352
epoch= 6
Validating epoch: 7 Loss: 0.20323942601680756
Epoch=6, BatchID=274, Val_AUC=0.7383, Best_Val_AUC=0.7383
epoch= 7
Validating epoch: 8 Loss: 0.19559107720851898
Epoch=7, BatchID=274, Val_AUC=0.7293, Best_Val_AUC=0.7383
epoch= 8
Validating epoch: 9 Loss: 0.20457823574543
Epoch=8, BatchID=274, Val_AUC=0.7384, Best_Val_AUC

Pretraining2

In [25]:
# # paramaters
# SEED = 123
# BATCH_SIZE = 32
# lr = 1e-4
# weight_decay = 1e-5
# num_classes=14

# model = models.resnet18(pretrained=True)
# input_channel = model.fc.in_features
# #for param in model.parameters():
#     #param.requires_grad = False
# model.fc = nn.Sequential(nn.Linear(input_channel, 256),
#                          nn.ReLU(),
#                          nn.Dropout(p=0.2),
#                          nn.Linear(256, 128),
#                          nn.ReLU(),
#                          nn.Dropout(p=0.1),
#                          nn.Linear(128, num_classes)
# #                          nn.LogSoftmax()
#                         )


# model = model.to(device)

# #model = models.resnet18(pretrained=False)
# #
# #model.fc = nn.Sequential(
# #    nn.Linear(512, 1),
# #    nn.Sigmoid()
# #)
# #model = model.cuda()


# # load pretrained model
# if True:
#   PATH = '/kaggle/input/chest-model/ce_cmnist_pretrained_model.pth' 
#   state_dict = torch.load(PATH)
#   #state_dict.pop('classifier.weight', None)
#   #state_dict.pop('classifier.bias', None) 
#   model.load_state_dict(state_dict, strict=False)


# # create a binary cross-entropy loss function
# CELoss = CrossEntropyLoss()
# optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
# scheduler = ReduceLROnPlateau(optimizer, patience=3,  verbose=True, factor=0.5, 
#                               threshold=0.0001, min_lr=1e-8, mode = 'max')

# # training
# best_val_auc = 0 
# for epoch in range(20):
#     print("epoch=",epoch)
#     for idx, (index, data, labels) in enumerate(train_loader):
#       #print("idx=",idx) 
#       train_data, train_labels = data.to(device), labels.to(device)
#       train_data=train_data.repeat(1,3,1,1)
#       y_pred = model(train_data)
#       loss = CELoss(y_pred, train_labels.float())
#       #print("Training Loss= ", loss.item())
#       optimizer.zero_grad()
#       loss.backward()
#       optimizer.step()

      
#     print(f'Validating epoch: {epoch+1} Loss: {loss.item()}')  
#     model.eval()
#     val_auc_mean = 0

#     with torch.no_grad():    
#          val_pred = []
#          val_true = [] 
#          for jdx, (index, data, targets) in enumerate(train_loader_at_eval):
#              val_data, val_labels = data.to(device), targets.to(device)
#              val_data=val_data.repeat(1,3,1,1)

#              y_pred = model(val_data)
#              val_pred.append(y_pred.cpu().detach().numpy())
#              val_true.append(val_labels.cpu().detach().numpy())
     
#          val_true = np.concatenate(val_true)
#          val_pred = np.concatenate(val_pred)
#          #val_pred = [1 if x > 0.5 else 0 for x in val_pred]
#          val_auc_mean =  roc_auc_score(val_true, val_pred) 
         
#          if best_val_auc < val_auc_mean:
#             best_val_auc = val_auc_mean
#             torch.save(model.state_dict(), 'ce_cmnist_pretrained_model_2.pth')
#          print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f'%(epoch, idx, val_auc_mean, best_val_auc) )
#     scheduler.step(val_auc_mean)

epoch= 0
Validating epoch: 1 Loss: 0.21057210862636566


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Epoch=0, BatchID=274, Val_AUC=0.7514, Best_Val_AUC=0.7514
epoch= 1
Validating epoch: 2 Loss: 0.20706136524677277


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/op

Epoch=1, BatchID=274, Val_AUC=0.7663, Best_Val_AUC=0.7663
epoch= 2
Validating epoch: 3 Loss: 0.18850626051425934
Epoch=2, BatchID=274, Val_AUC=0.7653, Best_Val_AUC=0.7663
epoch= 3
Validating epoch: 4 Loss: 0.18569399416446686
Epoch=3, BatchID=274, Val_AUC=0.7722, Best_Val_AUC=0.7722
epoch= 4
Validating epoch: 5 Loss: 0.18701043725013733
Epoch=4, BatchID=274, Val_AUC=0.7742, Best_Val_AUC=0.7742
epoch= 5
Validating epoch: 6 Loss: 0.18936726450920105
Epoch=5, BatchID=274, Val_AUC=0.7822, Best_Val_AUC=0.7822
epoch= 6
Validating epoch: 7 Loss: 0.17394167184829712


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Epoch=6, BatchID=274, Val_AUC=0.7851, Best_Val_AUC=0.7851
epoch= 7
Validating epoch: 8 Loss: 0.18493129312992096
Epoch=7, BatchID=274, Val_AUC=0.7842, Best_Val_AUC=0.7851
epoch= 8
Validating epoch: 9 Loss: 0.17420141398906708


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/op

Epoch=8, BatchID=274, Val_AUC=0.7951, Best_Val_AUC=0.7951
epoch= 9
Validating epoch: 10 Loss: 0.17208394408226013
Epoch=9, BatchID=274, Val_AUC=0.8000, Best_Val_AUC=0.8000
epoch= 10
Validating epoch: 11 Loss: 0.1713232696056366


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/op

Epoch=10, BatchID=274, Val_AUC=0.7990, Best_Val_AUC=0.8000
epoch= 11
Validating epoch: 12 Loss: 0.16280314326286316


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/op

Epoch=11, BatchID=274, Val_AUC=0.8047, Best_Val_AUC=0.8047
epoch= 12
Validating epoch: 13 Loss: 0.15564578771591187


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Epoch=12, BatchID=274, Val_AUC=0.8046, Best_Val_AUC=0.8047
epoch= 13
Validating epoch: 14 Loss: 0.15669678151607513
Epoch=13, BatchID=274, Val_AUC=0.8110, Best_Val_AUC=0.8110
epoch= 14
Validating epoch: 15 Loss: 0.1546756774187088
Epoch=14, BatchID=274, Val_AUC=0.8141, Best_Val_AUC=0.8141
epoch= 15
Validating epoch: 16 Loss: 0.14047561585903168
Epoch=15, BatchID=274, Val_AUC=0.8257, Best_Val_AUC=0.8257
epoch= 16
Validating epoch: 17 Loss: 0.13880035281181335


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/op

Epoch=16, BatchID=274, Val_AUC=0.8252, Best_Val_AUC=0.8257
epoch= 17
Validating epoch: 18 Loss: 0.13368859887123108
Epoch=17, BatchID=274, Val_AUC=0.8280, Best_Val_AUC=0.8280
epoch= 18
Validating epoch: 19 Loss: 0.12919074296951294


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/op

Epoch=18, BatchID=274, Val_AUC=0.8284, Best_Val_AUC=0.8284
epoch= 19
Validating epoch: 20 Loss: 0.1380927413702011


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7caf430ce3b0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/op

Epoch=19, BatchID=274, Val_AUC=0.8335, Best_Val_AUC=0.8335


In [26]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [24]:
    print(f'Testing ...')  
    PATH = '/kaggle/working/ce_bmnist_pretrained_model.pth' 
    model_state_dict = torch.load(PATH)
    #model = MyModel()  # Create an instance of your model
    model.load_state_dict(model_state_dict)  # Load the saved parameters into the model
    with torch.no_grad():    
         test_pred = []
         test_true = [] 
         for jdx, (index, data, targets) in enumerate(test_loader):
             test_data, test_labels = data.to(device), targets.to(device)
             test_data=test_data.repeat(1,3,1,1)
#              test_labels=test_labels.float()
#              test_data = test_data.to(device)
             y_pred = model(test_data)
             test_pred.append(y_pred.cpu().detach().numpy())
             test_true.append(test_labels.cpu().detach().numpy())
     
         test_true = np.concatenate(test_true)
         test_pred = np.concatenate(test_pred)
         #test_pred = [1 if x > 0.5 else 0 for x in test_pred]
         test_auc_mean =  roc_auc_score(test_true, test_pred) 
        # model.train
        # if best_val_auc < val_auc_mean:
        #    best_val_auc = val_auc_mean
        #    #torch.save(model.state_dict(), 'ce_pretrained_model.pth')
         print('Test_AUC=%.4f'%( test_auc_mean))

Testing ...
Test_AUC=0.7342


In [27]:
from ignite.engine import *
from ignite.handlers import *
from ignite.metrics import *
from ignite.utils import *
from ignite.contrib.metrics.regression import *
from ignite.contrib.metrics import *
from torch.optim.lr_scheduler import ExponentialLR


# parameters
class_id = 1
num_classes=14
# paramaters
SEED = 123
BATCH_SIZE = 512 #[16, 32, 64, 128]
#imratio = train_dataset.imratio
lr =0.05#0.05# 0.04#0.05 # using smaller learning rate is better [0.001, 0.01, 0.05, 0.1]
epoch_decay = 2e-3#1e-4
weight_decay = 1e-5#1e-5
margin = 1.0

# model
set_all_seeds(SEED)
# model
model = models.resnet18(pretrained=False)
input_channel = model.fc.in_features
#for param in model.parameters():
    #param.requires_grad = False
model.fc = nn.Sequential(nn.Linear(input_channel, 256),
                         nn.ReLU(),
#                          nn.Dropout(p=0.2),
                         nn.Linear(256, 128),
                         nn.ReLU(),
#                          nn.Dropout(p=0.1),
                         nn.Linear(128, num_classes),
#                          nn.LogSoftmax()
                        )

model = model.to(device)

#model = models.resnet18(pretrained=False)
#
#model.fc = nn.Sequential(
#    nn.Linear(512, 1),
#    nn.Sigmoid()
#)
#model = model.cuda()


# load pretrained model
if True:
  PATH = '/kaggle/working/ce_bmnist_pretrained_model.pth' 
  state_dict = torch.load(PATH)
  state_dict.pop('classifier.weight', None)
  state_dict.pop('classifier.bias', None) 
  model.load_state_dict(state_dict, strict=False)


# define loss & optimizer
# loss_fn = AUCMLoss()
loss_fn = CompositionalAUCLoss()
optimizer = PESG(model,
                 loss_fn=loss_fn, 
                 lr=lr, 
                 margin=margin, 
                 epoch_decay=epoch_decay, 
                 weight_decay=weight_decay)
#optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
#lr_scheduler_opt = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
#optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = ReduceLROnPlateau(optimizer, patience=3,  verbose=True, factor=0.5, 
                              threshold=0.0001, min_lr=1e-8, mode = 'max')

best_val_auc = 0
for epoch in range(20):
  print("epoch=",epoch)
  if epoch%10 == 0 and epoch > 0:
      optimizer.update_regularizer(decay_factor=10)
  #lr_scheduler_opt.step()
  
  for idx, (index, data, labels) in enumerate(train_loader):
      train_data, train_labels = data.to(device), labels.to(device)
      train_data=train_data.repeat(1,3,1,1)
#       train_labels=train_labels.float()

#       train_data, train_labels = train_data.to(device), train_labels.to(device)
      y_pred = model(train_data)
      #y_pred = torch.sigmoid(y_pred)
      #y_pred=torch.where(y_pred > 0.5, torch.tensor([1.], device=device), torch.tensor([0.], device=device))
      #y_pred = [1 if x > 0.5 else 0 for x in y_pred]
      #print(y_pred)
      loss = loss_fn(y_pred, train_labels.float())
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # validation
  #if idx % 400 == 0:
  print(f'Validating epoch: {epoch+1} Loss: {loss.item()}')
  model.eval()
  val_auc = 0
  with torch.no_grad():    
        test_pred = []
        test_true = [] 
        for jdx, (index, data, targets) in enumerate(train_loader_at_eval):
            test_data, test_label = data.to(device), targets.to(device)
            test_data=test_data.repeat(1,3,1,1)
#             test_labels=test_labels.float()
#             test_data = test_data.to(device)
            y_pred = model(test_data)
            test_pred.append(y_pred.cpu().detach().numpy())
            test_true.append(test_label.cpu().detach().numpy())
        
        test_true = np.concatenate(test_true)
        test_pred = np.concatenate(test_pred)
        #test_pred = [1 if x > 0.5 else 0 for x in test_pred]
        val_auc =  roc_auc_score(test_true, test_pred) 
        #model.train(
        if best_val_auc < val_auc:
           best_val_auc = val_auc
        #if epoch==99:
           torch.save(model, 'aucm_trained_model_chestmnist.pth')
  scheduler.step(val_auc)
  lr_scheduler.step()
  #scheduler.step()
  print ('Epoch=%s, BatchID=%s, Val_AUC=%.7f, lr=%.15f'%(epoch, idx, val_auc,  optimizer.lr))
  #for param_group in optimizer.param_groups:
   #     print('Epoch {}, LR: {}'.format(epoch, param_group['lr']))
print ('Best Val_AUC is %.4f'%best_val_auc)

        

epoch= 0
Validating epoch: 1 Loss: 0.24280396103858948
Epoch=0, BatchID=274, Val_AUC=0.7183754, lr=0.050000000000000
epoch= 1
Validating epoch: 2 Loss: -0.23712235689163208
Epoch=1, BatchID=274, Val_AUC=0.7300261, lr=0.050000000000000
epoch= 2
Validating epoch: 3 Loss: 0.2758781313896179
Epoch=2, BatchID=274, Val_AUC=0.7330958, lr=0.050000000000000
epoch= 3
Validating epoch: 4 Loss: -0.11821025609970093
Epoch=3, BatchID=274, Val_AUC=0.7329857, lr=0.050000000000000
epoch= 4
Validating epoch: 5 Loss: 0.29243895411491394
Epoch=4, BatchID=274, Val_AUC=0.7318068, lr=0.050000000000000
epoch= 5
Validating epoch: 6 Loss: 0.5657498836517334
Epoch=5, BatchID=274, Val_AUC=0.7307633, lr=0.050000000000000
epoch= 6
Validating epoch: 7 Loss: 0.37884604930877686
Epoch 00007: reducing learning rate of group 0 to 2.5000e-02.
Epoch=6, BatchID=274, Val_AUC=0.7256421, lr=0.050000000000000
epoch= 7
Validating epoch: 8 Loss: 2.535165309906006
Epoch=7, BatchID=274, Val_AUC=0.7252152, lr=0.025000000000000
epoc

In [28]:
print(f'Testing ...')  
PATH = '/kaggle/working/aucm_trained_model_chestmnist.pth' 
model = torch.load(PATH)
#model = MyModel()  # Create an instance of your model
##model=torch.load(model_dumped)  # Load the saved parameters into the model
best_val_auc=0.0
with torch.no_grad():    
     test_pred = []
     test_true = [] 
     for jdx, (index, data, targets) in enumerate(test_loader):
         test_data, test_labels = data.to(device), targets.to(device)
         test_data=test_data.repeat(1,3,1,1)
#          test_labels=test_labels.float()
#          test_data = test_data.to(device)
         y_pred = model(test_data)
         test_pred.append(y_pred.cpu().detach().numpy())
         test_true.append(test_labels.cpu().detach().numpy())
 
     test_true = np.concatenate(test_true)
     test_pred = np.concatenate(test_pred)
     #test_pred = [1 if x > 0.5 else 0 for x in test_pred]
     #print(test_pred)
     #print(test_true)
     val_auc_mean =  roc_auc_score((test_true), test_pred) 
    # model.train
    # if best_val_auc < val_auc_mean:
    #    best_val_auc = val_auc_mean
    #    #torch.save(model.state_dict(), 'ce_pretrained_model.pth')
     print('Test_AUC=%.7f'%( val_auc_mean))

Testing ...
Test_AUC=0.7274890
