<a href="https://colab.research.google.com/github/mveerara/Teleradiology/blob/main/modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os, sys
import pickle
from collections import defaultdict
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
from sklearn.metrics import roc_auc_score
import torch.optim as optim
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [None]:
drive.mount('/MyDrive')

Mounted at /MyDrive


In [None]:
def compute_AUCs(gt, pred):
	
    AUROCs = []
    gt_np = gt.cpu().numpy()
    pred_np = pred.cpu().numpy()
    for i in range(N_CLASSES):
        AUROCs.append(roc_auc_score(gt_np[:, i], pred_np[:, i]))
    return AUROCs

In [None]:
!ls /MyDrive/MyDrive/Teleradiology/Example_Program/example1/Dataset/output

 bounding_box.txt
'Copy of valid_X_small.npy'
 DenseNet121_aug4_pretrain_noWeight_10_0.5324519643951093.pkl
 DenseNet121_aug4_pretrain_noWeight_3_0.5509544913986415.pkl
 heatmap.npy
 label_encoder.pkl
 train_X_small.npy
 train_y_onehot.pkl
 valid_heatmap
 valid_X_small.npy
 valid_y_onehot.pkl


In [None]:
# ====== prepare dataset ======
class ChestXrayDataSet(Dataset):
    def __init__(self, train_or_valid = "train", transform=None):

        data_path =  '/MyDrive/MyDrive/Teleradiology/Example_Program/example1/Dataset/output/'# sys.argv[1]
        #data_path =  '/MyDrive/MyDrive/output/'# sys.argv[1]
        self.train_or_valid = train_or_valid
        if train_or_valid == "train":
            self.X = np.uint8(np.load(data_path + "train_X_small.npy")*255*255)
            with open(data_path + "train_y_onehot.pkl", "rb") as f:
                self.y = pickle.load(f)
            sub_bool = (self.y.sum(axis=1)!=0)
            self.y = self.y[sub_bool,:]
            self.X = self.X[sub_bool,:]
        else:
            self.X = np.uint8(np.load(data_path + "valid_X_small.npy")*255*255)
            with open(data_path + "valid_y_onehot.pkl", "rb") as f:
                self.y = pickle.load(f)
        
        self.label_weight_pos = (len(self.y)-self.y.sum(axis=0))/len(self.y)
        self.label_weight_neg = (self.y.sum(axis=0))/len(self.y)
#         self.label_weight_pos = len(self.y)/self.y.sum(axis=0)
#         self.label_weight_neg = len(self.y)/(len(self.y)-self.y.sum(axis=0))
        self.transform = transform

    def __getitem__(self, index):
        """
        Args:
            index: the index of item 
        Returns:
            image and its labels
        """
        current_X = np.tile(self.X[index],3) 
        label = self.y[index]
        label_inverse = 1- label
        weight = np.add((label_inverse * self.label_weight_neg),(label * self.label_weight_pos))
        if self.transform is not None:
            image = self.transform(current_X)
        return image, torch.from_numpy(label).type(torch.FloatTensor), torch.from_numpy(weight).type(torch.FloatTensor)
    def __len__(self):
        return len(self.y)

In [None]:
# construct model
class DenseNet121(nn.Module):
    """Model modified.
    The architecture of our model is the same as standard DenseNet121
    except the classifier layer which has an additional sigmoid function.
    """
    def __init__(self, out_size):
        super(DenseNet121, self).__init__()
        self.densenet121 = torchvision.models.densenet121(pretrained=True)
        num_ftrs = self.densenet121.classifier.in_features
        self.densenet121.classifier = nn.Sequential(
            nn.Linear(num_ftrs, out_size),nn.ReLU(),nn.Dropout(.6),nn.ReLU(),nn.Dropout(.6),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.densenet121(x)
        return x


In [None]:
if __name__ == '__main__':

 # prepare training set
 print('preparing training data......')
 train_dataset = ChestXrayDataSet(train_or_valid="train",
                                    transform=transforms.Compose([
                                        transforms.ToPILImage(),
                                        transforms.RandomCrop(224),
                                        transforms.RandomHorizontalFlip(),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
                                        ]))
 augment_img = []
 augment_label = []
 augment_weight = []
 for i in range(8):
	 for j in range(len(train_dataset)):
		 single_img, single_label, single_weight = train_dataset[j]
		 augment_img.append(single_img)
		 augment_label.append(single_label)
		 augment_weight.append(single_weight)
		 if j % 1000==0:
		  print(j)
#train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=False, num_workers=2)#{DataLoader(dataset=train_dataset, batch_size=64, shuffle=True,num_workers=2) for x in range(len(train_dataset))}
#print(train_loader)
#train_images, train_labels = next(iter(enumerate(train_loader)))
#print(train_labels)
#rand_idx = np.random.randint(len(train_dataset))
#class_names = train_dataset.classes
#print("label: {}".format(labels[rand_idx].item()))

#dataset_sizes = {x: len(train_dataset[x]) for x in }
   
# shuffe data
print('shuffling data......')
perm_index = torch.randperm(len(augment_label))
augment_img = torch.stack(augment_img)[perm_index]
augment_label = torch.stack(augment_label)[perm_index]
augment_weight = torch.stack(augment_weight)[perm_index]

 # prepare validation set
print('preparing test data ......')
valid_dataset = ChestXrayDataSet(train_or_valid="valid",
				transform=transforms.Compose([
						transforms.ToPILImage(),
						transforms.CenterCrop(224),
						transforms.ToTensor(),
						transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
						]))
valid_loader = DataLoader(dataset=valid_dataset, batch_size=32, shuffle=False, num_workers=2)#{DataLoader(dataset=valid_dataset, batch_size=64, shuffle=True,num_workers=2) for x in range(len(valid_dataset))}
#valid_images, valid_labels = next(iter(enumerate(valid_loader)))
#print(valid_labels)
#rand_idx1 = np.random.randint(len(valid_dataset))
#class_names1 = valid_dataset.classes
#print("label: {}".format(labels[rand_idx1].item()))

 # ====== start trianing =======
print('start training .....')

cudnn.benchmark = True
N_CLASSES = 8
BATCH_SIZE = 64

 # initialize and load the model
print('initialize and load model ....')
model = DenseNet121(N_CLASSES).cuda()
model = torch.nn.DataParallel(model).cuda()

optimizer = optim.Adam(model.parameters(),lr=0.0002, betas=(0.9, 0.999))
total_length = len(augment_img)
 # Run thru epochs
for epoch in range(10):  # loop over the dataset multiple times
 print("Epoch:",epoch)
 running_loss = 0.0
 running_corrects=0.0

	# shuffle
 perm_index = torch.randperm(len(augment_label))
 augment_img = augment_img[perm_index]
 augment_label = augment_label[perm_index]
 augment_weight = augment_weight[perm_index]

 for index in range(0, total_length , BATCH_SIZE):
   if index+BATCH_SIZE > total_length:
     break
   # zero the parameter gradients
 optimizer.zero_grad()
 inputs_sub = augment_img[index:index+BATCH_SIZE]
 labels_sub = augment_label[index:index+BATCH_SIZE]
 weights_sub = augment_weight[index:index+BATCH_SIZE]
 inputs_sub, labels_sub = inputs_sub.cuda(),labels_sub.cuda()
 weights_sub = Variable(weights_sub)

		# forward + backward + optimize
 #model.train()   
 outputs = model(inputs_sub)
 _, preds = torch.max(outputs, 1)
 criterion = nn.BCELoss()
 loss = criterion(outputs, labels_sub())
 loss.backward()
 optimizer.step()
 running_loss += loss.data
 running_corrects += torch.sum(preds == labels_sub.data)
 epoch_loss = running_loss / total_length
 epoch_acc = running_corrects.double() / total_length
 print('Loss: {:.4f} Acc: {:.4f}'.format( epoch_loss, epoch_acc))
 # switch to evaluate mode
model = model.load_state_dict(torch.load("/MyDrive/MyDrive/Teleradiology/Example_Program/example1/Dataset/output/DenseNet121_aug4_pretrain_noWeight_10_0.5324519643951093.pkl"))
model.eval()


# initialize the ground truth and output tensor
gt = torch.FloatTensor()
gt = gt.cuda()
pred = torch.FloatTensor()
pred = pred.cuda()


for i, (inp, target, weight) in enumerate(valid_loader):
  target = target.cuda()
  gt = torch.cat((gt, target), 0)
  #     bs, n_crops, c, h, w = inp.size()
  input_var = Variable(inp.view(-1, 3, 224, 224).cuda(), volatile=True)
  output = model(input_var)
  #     output_mean = output.view(bs, n_crops, -1).mean(1)
  pred = torch.cat((pred, output.data), 0)

  CLASS_NAMES = ['Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass','Nodule','Pneumonia','Pneumothorax']#['Atelectasis', 'Cardiomegaly','Effusion', 'Infiltration',
  #'Mass','Nodule', 'Pneumonia', 'Pneumothorax']

  #try:
  AUROCs = compute_AUCs(gt, pred)
  #except ValueError:
   #pass
  AUROC_avg = np.array(AUROCs).mean()
  print('The average AUROC is {AUROC_avg:.3f}'.format(AUROC_avg=AUROC_avg))
  for i in range(N_CLASSES):
      print('The AUROC of {} is {}'.format(CLASS_NAMES[i], AUROCs[i]))

  model.train()
  # print statistics
  print('[%d] loss: %.3f' % (epoch + 1, running_loss / 715 ))
  torch.save(model.state_dict(),'/MyDrive/MyDrive/Teleradiology/Example_Program/example1/Dataset/output/DenseNet121_aug4_pretrain_noWeight_'+str(epoch+1)+'_'+str(AUROC_avg)+'.pkl')

print('Finished Training')













    
	


preparing training data......
0
0
0
0
0
0
0
0
shuffling data......
preparing test data ......
start training .....
initialize and load model ....
Epoch: 0


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


TypeError: ignored

In [None]:
if __name__ == '__main__':

 # prepare training set
 print('preparing training data......')
 train_dataset = ChestXrayDataSet(train_or_valid="train",
                                    transform=transforms.Compose([
                                        transforms.ToPILImage(),
                                        transforms.RandomCrop(224),
                                        transforms.RandomHorizontalFlip(),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
                                        ]))
 #print(train_dataset)
 #train_dataset_array = np.array(train_dataset)

preparing training data......


In [None]:
augment_img = []
augment_label = []
augment_weight = []
for i in range(8):
	for j in range(len(train_dataset)):
		single_img, single_label, single_weight = train_dataset[j]
		augment_img.append(single_img)
		augment_label.append(single_label)
		augment_weight.append(single_weight)
		if j % 1000==0:
			print(j)



0
0
0
0
0
0
0
0


In [None]:
 #shuffe data
perm_index = torch.randperm(len(augment_label))
augment_img = torch.stack(augment_img)[perm_index]
augment_label = torch.stack(augment_label)[perm_index]
augment_weight = torch.stack(augment_weight)[perm_index]


In [None]:
import torch, gc

gc.collect()
torch.cuda.empty_cache()

In [None]:

# prepare validation set
valid_dataset = ChestXrayDataSet(train_or_valid="valid",
				transform=transforms.Compose([
						transforms.ToPILImage(),
						transforms.CenterCrop(224),
						transforms.ToTensor(),
						transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
						]))



In [None]:
valid_loader = DataLoader(dataset=valid_dataset, batch_size=128, shuffle=False)


In [None]:
# ====== start trianing =======

cudnn.benchmark = True
N_CLASSES = 8
BATCH_SIZE = 128



In [None]:
import torch, gc

gc.collect()
torch.cuda.empty_cache()

In [None]:
# initialize and load the model
model = DenseNet121(N_CLASSES).cuda()
model = torch.nn.DataParallel(model).cuda()

optimizer = optim.Adam(model.parameters(),lr=0.0002, betas=(0.9, 0.999))
total_length = len(augment_img)


In [None]:
import torch, gc

gc.collect()
torch.cuda.empty_cache()

In [None]:
#model.train()
for epoch in range(10):  # loop over the dataset multiple times
	print("Epoch:",epoch)
	running_loss = 0.0

	# shuffle
	perm_index = torch.randperm(len(augment_label))
	augment_img = augment_img[perm_index]
	augment_label = augment_label[perm_index]
	augment_weight = augment_weight[perm_index]

	for index in range(0, total_length , BATCH_SIZE):
		if index+BATCH_SIZE > total_length:
			break
		# zero the parameter gradients
		optimizer.zero_grad()
		inputs_sub = augment_img[index:index+BATCH_SIZE]
		labels_sub = augment_label[index:index+BATCH_SIZE]
		weights_sub = augment_weight[index:index+BATCH_SIZE]
		inputs_sub, labels_sub = inputs_sub.cuda(),labels_sub.cuda()
		weights_sub = Variable(weights_sub)

		# forward + backward + optimize
		outputs = model(inputs_sub)
    #_, preds = torch.max(outputs, 1)
    #print(preds)
		criterion = nn.BCELoss()
		loss = criterion(outputs, labels_sub)
		loss.backward()
		optimizer.step()
		running_loss += loss.data
print('something')
    #running_corrects += torch.sum(preds == labels_sub.data)
    #epoch_loss = running_loss / total_length
    #epoch_acc = running_corrects.double() / total_length
    #print('Loss: {:.4f}'.format(running_loss)
    

 

Epoch: 0


RuntimeError: ignored

In [None]:
import torch, gc

gc.collect()
torch.cuda.empty_cache()

In [None]:
# switch to evaluate mode
#model = model.load_state_dict(torch.load("/MyDrive/MyDrive/Teleradiology/Example_Program/example1/Dataset/output/DenseNet121_aug4_pretrain_noWeight_10_0.5324519643951093.pkl"))
model.eval()


# initialize the ground truth and output tensor
gt = torch.FloatTensor()
gt = gt.cuda()
pred = torch.FloatTensor()
pred = pred.cuda()


for i, (inp, target, weight) in enumerate(valid_loader):
  target = target.cuda()
  gt = torch.cat((gt, target), 0)
  #     bs, n_crops, c, h, w = inp.size()
  input_var = Variable(inp.view(-1, 3, 224, 224).cuda(),requires_grad = False ) #volatile=True
  output = model(input_var)
  #     output_mean = output.view(bs, n_crops, -1).mean(1)
  pred = torch.cat((pred, output.data), 0)

  CLASS_NAMES = ['Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass','Nodule','Pneumonia','Pneumothorax']#['Atelectasis', 'Cardiomegaly','Effusion', 'Infiltration',
  #'Mass','Nodule', 'Pneumonia', 'Pneumothorax']

  #try:
  AUROCs = compute_AUCs(gt, pred)
  #except ValueError:
   #pass
  AUROC_avg = np.array(AUROCs).mean()
  print('The average AUROC is {AUROC_avg:.3f}'.format(AUROC_avg=AUROC_avg))
  for i in range(N_CLASSES):
      print('The AUROC of {} is {}'.format(CLASS_NAMES[i], AUROCs[i]))

  model.train()
  # print statistics
  print('[%d] loss: %.3f' % (epoch + 1, running_loss / 715 ))
  torch.save(model.state_dict(),'/MyDrive/MyDrive/Teleradiology/Example_Program/example1/Dataset/output/DenseNet121_aug4_pretrain_noWeight_'+str(epoch+1)+'_'+str(AUROC_avg)+'.pkl')

print('Finished Training')

RuntimeError: ignored