In [1]:
import pandas as pd
import numpy as np
import torch
import torchvision 
from torch import nn
from torch import optim
from torchvision import transforms, models
import matplotlib.pyplot as plt
from utils import Utility
from PIL import Image, ImageOps
import random
from sklearn import metrics
import tensorflow as tf
from typing import Callable, Dict, List, Tuple, Union

In [2]:
train_images, train_data = Utility.read_dataset("train","2020")
test_images, test_data = Utility.read_dataset("val","2020")

train data size= 4500
val data size= 500


In [3]:
train_data['question_parsed'], train_data['answer_parsed'] = Utility.clean(train_data)
test_data['question_parsed'], test_data['answer_parsed'] = Utility.clean(test_data)

  df = df.apply(lambda x: x.str.replace("("," "))
  df = df.apply(lambda x: x.str.replace(")"," "))
  df = df.apply(lambda x: x.str.replace('\\'," "))
  df = df.apply(lambda x: x.str.replace('?',''))


In [None]:
train_images[0]

In [4]:
train_data['image_name'] = train_images
test_data['image_name'] = test_images

train_data.head()

Unnamed: 0,image_name,question,answer,question_parsed,answer_parsed
0,Data/train_2020/train_images/synpic25788.jpg,is this a normal gastrointestinal image?,yes,is this a normal gastrointestinal image,yes
1,Data/train_2020/train_images/synpic36500.jpg,is the x-ray normal?,yes,is the x ray normal,yes
2,Data/train_2020/train_images/synpic24022.jpg,is this image normal?,yes,is this image normal,yes
3,Data/train_2020/train_images/synpic25624.jpg,is there an abnormality in the x-ray?,no,is there an abnormality in the x ray,no
4,Data/train_2020/train_images/synpic35678.jpg,is there evidence of any abnormalities?,no,is there evidence of any abnormalities,no


In [None]:
test_data.head()

#### Dividing train set into train and validation

In [5]:
from sklearn.model_selection import train_test_split
train_data, val_data = train_test_split(train_data, test_size=500, random_state=42)

In [6]:
class_names = sorted(set(train_data['answer_parsed']))
print(len(class_names))

332


In [7]:
len(train_data)

4000

In [8]:
class_dict = { i : class_names[i] for i in range(0, len(class_names) ) }
class_dict = {y:x for x,y in class_dict.items()}

In [9]:
class_names_val = sorted(set(val_data['answer_parsed']))
print(len(class_names_val))

240


In [10]:
len(val_data)

500

In [11]:
class_names_test = sorted(set(test_data['answer_parsed']))
print(len(class_names_test))

236


In [12]:
len(test_data)

500

In [13]:
train = set(class_names)
val = set(class_names_val)
test = set(class_names_test)

In [14]:
#all class names in val are present in train
len(train.intersection(val))

240

In [15]:
#all class names in test are present in train
len(train.intersection(test))

236

In [16]:
all(c in train for c in val)

True

In [17]:
all(c in train for c in test)

True

In [18]:
class MedVQA(torch.utils.data.Dataset):
    def __init__(self, mode, data, class_dict, class_names_val,class_names_test,transform = None):
        #these class_dict and class_names_val may differ, there may be unseen classes in class_names_val
        def get_images(data, class_name):
                temp_df = data[data['answer_parsed'] == class_name]
                images = [x for x in temp_df['image_name']]
                #print(f'Found {len(images)} {class_name} examples')
                return images
            
        self.df = data
        self.class_names = list(class_dict.keys())
        self.class_names_val=class_names_val
        self.class_names_test = class_names_test
        self.mode= mode
        self.class_dict=class_dict
        #print(self.class_names)
        self.images = {} #this is the dictionary where key is answer and value is a list of all images for that answer

        if self.mode == 'train':
            
              for class_name in self.class_names:
                self.images[class_name] = get_images(data, class_name)
        elif self.mode == 'val':
              for class_name in self.class_names_val:
                self.images[class_name] = get_images(data, class_name)
        else:
            for class_name in self.class_names_test:
                self.images[class_name] = get_images(data, class_name)

        self.transform = transform    

    def __getitem__(self, index):
        
        if self.mode == 'train':
            class_name = random.choice(self.class_names)
        elif self.mode == 'val':
            class_name = random.choice(self.class_names_val)
        else:
            class_name = random.choice(self.class_names_test)

        index = index % len(self.images[class_name])
        image_path = self.images[class_name][index]
        image = Image.open(image_path).convert('RGB')
        question = self.df[self.df['image_name']==image_path]['question_parsed'].values
        answer = self.df[self.df['image_name']==image_path]['answer_parsed'].values
        #label = self.class_names.index(class_name)
        label = self.class_dict[class_name]
        if self.transform:
            image = self.transform(image)

        return image, question[0], answer[0], label

    def __len__(self):
        return len(self.df)

In [19]:
class XRayTransform:
    """XRayTransform base class."""

    def __repr__(self):
        return "XRayTransform: {}".format(self.__class__.__name__)

class HistogramNormalize(XRayTransform):
    """
    Apply histogram normalization.
    Args:
        number_bins: Number of bins to use in histogram.
    """

    def __init__(self, number_bins: int = 256):
        self.number_bins = number_bins

    def __call__(self, sample: Dict) -> Dict:
        image = sample.numpy()

        # get image histogram, bins is the list of bin edges, density=True gives the value of pds, such that integral is 1
        image_histogram, bins = np.histogram(
            image.flatten(), self.number_bins, density=True
        )
        cdf = image_histogram.cumsum()  # cumulative distribution function
        cdf = 255 * cdf / cdf[-1]  # normalize
        #cdf[-1] here is the total sum
        # use linear interpolation of cdf to find new pixel values
        image_equalized = np.interp(image.flatten(), bins[:-1], cdf)
        image_equalized.reshape(image.shape)

        sample = torch.tensor(image_equalized.reshape(image.shape)).to(
            sample
        )

        return sample

class TensorToRGB(XRayTransform):
    """
    Convert Tensor to RGB by replicating channels.
    Args:
        num_output_channels: Number of output channels (3 for RGB).
    """

    def __init__(self, num_output_channels: int = 3):
        self.num_output_channels = num_output_channels

    def __call__(self, sample: Dict) -> Dict:
        expands = list()
        for i in range(sample.ndim):
            if i == 0:
                expands.append(self.num_output_channels)
            else:
                expands.append(-1)
        sample = sample.expand(*expands)

        return sample

In [20]:
data_transforms = {
    'train': transforms.Compose([transforms.Resize(size=(224,224)),
                                 transforms.CenterCrop(size=(224,224)),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.RandomVerticalFlip(),
                                 transforms.RandomRotation(10),
                                 transforms.ToTensor(),
                                  transforms.Normalize(mean = np.array([0.2577, 0.2578, 0.2578]),
                                  std=np.array([0.2206, 0.2206, 0.2205])),
                                 #HistogramNormalize(),
                                 #TensorToRGB()

    ]),
    'test': transforms.Compose([transforms.Resize(size=(224,224)),
                                #transforms.CenterCrop(size=(224,224)),
                                transforms.ToTensor(),
                                transforms.Normalize(mean = [0.2577, 0.2578, 0.2578],
                                std=[0.2206, 0.2206, 0.2205]),
#                                 HistogramNormalize(),
#                                 TensorToRGB()    
    ])

}
train_dataset = MedVQA('train', train_data, class_dict, class_names_val,class_names_test, data_transforms['train'])
val_dataset = MedVQA('val', val_data, class_dict, class_names_val,class_names_test, data_transforms['test'])
test_dataset = MedVQA('test', test_data, class_dict,  class_names_val,class_names_test,data_transforms['test'])

In [21]:
batch_size = 32

dl_train = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dl_val = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
dl_test = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

print('Number of training batches', len(dl_train))
print('Number of training batches', len(dl_val))
print('Number of test batches', len(dl_test))

Number of training batches 125
Number of training batches 16
Number of test batches 16


In [22]:
class_names = list(class_dict.keys())

def show_images(images,questions, labels, preds):
    plt.figure(figsize=(15,12))
    for i, image in enumerate(images):
        plt.subplot(2, 3, i + 1, xticks=[], yticks=[])
        image = (image.numpy()*255).transpose((1, 2, 0))
        #mean = np.array([0.485, 0.456, 0.406])
        #std = np.array([0.229, 0.224, 0.225])
        #image = image * std + mean
        image = np.clip(image, 0., 1.)
        plt.imshow(image)
        
        col = 'green' if preds[i] == labels[i] else 'red'
        
        plt.title(f'{questions[i]}', fontsize = 10)
        plt.xlabel(f'{labels[i]}', fontsize = 10)
        plt.ylabel(f'{preds[i]}', color = col, fontsize = 10)
        
    plt.tight_layout()
    plt.show()

#### Loading pretrained weights
moco_model is the name of our model, you can substitute it with your own

In [23]:
pretrained_dict = torch.load('moco_model.ckpt')['state_dict']

In [24]:
state_dict = {}
for k, v in pretrained_dict.items():
    if k.startswith("model.encoder_q."):
        k = k.replace("model.encoder_q.", "")
        state_dict[k] = v

In [26]:
del state_dict['classifier.0.weight']

In [27]:
del state_dict['classifier.0.bias']

In [28]:
state_dict = {"classifier.weight" if key == 'classifier.2.weight' else key:value for key, value in state_dict.items()}


In [29]:
state_dict = {"classifier.bias" if key == 'classifier.2.bias' else key:value for key, value in state_dict.items()}


In [30]:
if "model.encoder_q.classifier.2.weight" in pretrained_dict.keys():
    feature_dim = pretrained_dict[
        "model.encoder_q.classifier.2.weight"
    ].shape[0]
    in_features = pretrained_dict[
        "model.encoder_q.classifier.2.weight"
    ].shape[1]

    model = torchvision.models.__dict__['densenet121'](num_classes = feature_dim)
    model.load_state_dict(state_dict)
    del model.classifier
    model.add_module(
         "classifier", nn.Linear(in_features, 512)
     )
    model.classifier = nn.Sequential(model.classifier,
                                     nn.BatchNorm1d(512),
                                     nn.ReLU(inplace=True),
                                     nn.Linear(512,332))
else:
    raise RuntimeError("Unrecognized classifier.")

In [31]:
print(model)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [200]:
#freezing everything except final classifier layer
c = 0
for child in model.children():
    if c < 1:
        for param in child.parameters():
            param.requires_grad = False
    c += 1

In [32]:
for name, param in model.named_parameters():
    print(name, ':', param.requires_grad)

features.conv0.weight : True
features.norm0.weight : True
features.norm0.bias : True
features.denseblock1.denselayer1.norm1.weight : True
features.denseblock1.denselayer1.norm1.bias : True
features.denseblock1.denselayer1.conv1.weight : True
features.denseblock1.denselayer1.norm2.weight : True
features.denseblock1.denselayer1.norm2.bias : True
features.denseblock1.denselayer1.conv2.weight : True
features.denseblock1.denselayer2.norm1.weight : True
features.denseblock1.denselayer2.norm1.bias : True
features.denseblock1.denselayer2.conv1.weight : True
features.denseblock1.denselayer2.norm2.weight : True
features.denseblock1.denselayer2.norm2.bias : True
features.denseblock1.denselayer2.conv2.weight : True
features.denseblock1.denselayer3.norm1.weight : True
features.denseblock1.denselayer3.norm1.bias : True
features.denseblock1.denselayer3.conv1.weight : True
features.denseblock1.denselayer3.norm2.weight : True
features.denseblock1.denselayer3.norm2.bias : True
features.denseblock1.dense

In [33]:
if torch.cuda.is_available():
    device = torch.device('cuda')
print(device)
model = model.to(device)
loss_fn = torch.nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)
optimizer = torch.optim.Adam(model.classifier.parameters(), lr=0.01)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 5)

cuda


In [215]:
def show_preds():
    model.eval()
    images,questions,_ ,labels = next(iter(dl_test))
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)

    
    images = images.to('cpu')
    labels = labels.to('cpu')
    _, preds = torch.max(outputs, 1)
    show_images(images[:6], questions, labels, preds)

In [34]:
best_model = model

def train(epochs):
    print('Starting training..')
    best_accuracy = 0
    accuracy_list = []
    train_loss_list = []
    val_loss_list = []
    for e in range(0, epochs):
        print('='*16)
        print(f'Starting epoch {e + 1}/{epochs}')
        print('='*16)

        train_loss = 0.
        val_loss = 0.
        
        accuracy_avg = []
        val_loss_avg = []
        model.train() # set model to training phase

        for train_step, (images, questions,_, labels) in enumerate(dl_train):


            images = images.to(device)
            labels = labels.to(device)
          
            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            #2scheduler.step()
            
            train_loss += loss.item()
            if train_step % 16 == 0:
                print('Evaluating at step', train_step)

                accuracy = 0
                with torch.no_grad():
                    

                    model.eval() # set model to eval phase

                    for val_step, (images, question, _,  labels) in enumerate(dl_val):

                        images = images.to(device)
                        labels = labels.to(device)

                        outputs = model(images)
                        loss = loss_fn(outputs, labels)
                        val_loss += loss.item()

                        _, preds = torch.max(outputs, 1)
                        
                        accuracy += sum((preds == labels))

                    val_loss /= (val_step + 1)
                    accuracy = accuracy/len(val_dataset)
                    accuracy_avg.append(round(accuracy.item(),4))
                    val_loss_avg.append(round(val_loss,4))
                    if (accuracy > best_accuracy):
                        best_model=model
                        best_accuracy = accuracy
                    print(f'Validation Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}')


                model.train()

                if accuracy >= 0.95:
                    print('Performance condition satisfied, stopping..')
                    return

        train_loss /= (train_step + 1)
        accuracy_list.append(np.mean(accuracy_avg))
        train_loss_list.append(round(train_loss,4))
        val_loss_list.append(np.mean(val_loss_avg))
        print(f'Training Loss: {train_loss:.4f}')
    print('Training complete..')
    return accuracy_list, val_loss_list, train_loss_list

In [35]:
%%time

accuracy_list, val_loss_list, train_loss_list = train(epochs=15)

Starting training..
Starting epoch 1/15
Evaluating at step 0
Validation Loss: 5.9964, Accuracy: 0.0040
Evaluating at step 16
Validation Loss: 7.5351, Accuracy: 0.0020
Evaluating at step 32
Validation Loss: 7.0550, Accuracy: 0.0120
Evaluating at step 48
Validation Loss: 6.5140, Accuracy: 0.0180
Evaluating at step 64
Validation Loss: 6.0393, Accuracy: 0.0320
Evaluating at step 80
Validation Loss: 5.5493, Accuracy: 0.0820
Evaluating at step 96
Validation Loss: 5.1012, Accuracy: 0.1440
Evaluating at step 112
Validation Loss: 5.2353, Accuracy: 0.0920
Training Loss: 5.0981
Starting epoch 2/15
Evaluating at step 0
Validation Loss: 4.7445, Accuracy: 0.1420
Evaluating at step 16
Validation Loss: 4.9285, Accuracy: 0.1460
Evaluating at step 32
Validation Loss: 5.1843, Accuracy: 0.0860
Evaluating at step 48
Validation Loss: 5.1870, Accuracy: 0.1520
Evaluating at step 64
Validation Loss: 5.0939, Accuracy: 0.1840
Evaluating at step 80
Validation Loss: 4.8962, Accuracy: 0.1940
Evaluating at step 96
V

Validation Loss: 6.2138, Accuracy: 0.3220
Training Loss: 0.6379
Starting epoch 15/15
Evaluating at step 0
Validation Loss: 5.3428, Accuracy: 0.3040
Evaluating at step 16
Validation Loss: 6.2661, Accuracy: 0.3000
Evaluating at step 32
Validation Loss: 6.2808, Accuracy: 0.3100
Evaluating at step 48
Validation Loss: 5.9231, Accuracy: 0.3600
Evaluating at step 64
Validation Loss: 5.5888, Accuracy: 0.3180
Evaluating at step 80
Validation Loss: 5.4860, Accuracy: 0.3660
Evaluating at step 96
Validation Loss: 5.4858, Accuracy: 0.3840
Evaluating at step 112
Validation Loss: 5.7943, Accuracy: 0.3620
Training Loss: 0.5941
Training complete..
CPU times: user 8h 12min 49s, sys: 11min 28s, total: 8h 24min 18s
Wall time: 31min 31s


In [36]:
model = best_model
model = model.to('cpu')
model.eval() # set model to eval phase
y_true=[]
y_pred=[]
for test_step, (images, question, _,  labels) in enumerate(dl_test):


    outputs = model(images)

    y_true.append(labels)
    y_pred.append(outputs)

In [37]:
num_classes=332
Y_true=torch.cat((torch.flatten(torch.stack(y_true[:-1])),y_true[-1]),dim=0)
Y_pred=torch.stack(y_pred[:-1])
Y_pred=torch.reshape(torch.stack(y_pred[:-1]),(Y_pred.shape[0]*Y_pred.shape[1],num_classes))
Y_pred=torch.cat((Y_pred,y_pred[-1]),dim=0)
Y_prob=Y_pred
_, Y_pred = torch.max(Y_pred, 1)
Y_true=Y_true.to('cpu')
Y_pred=Y_pred.to('cpu')
Y_prob=Y_prob.to('cpu')

Y_true=Y_true.numpy()
Y_pred=Y_pred.numpy()
Y_prob=Y_prob.detach().numpy()
Y_prob=tf.nn.softmax(Y_prob)
Y_true_oh=tf.keras.utils.to_categorical(Y_true, num_classes=332)
print('Y_true:', Y_true.shape)
print(Y_pred.shape)
print(Y_prob.shape)
print(Y_true_oh.shape)

Y_true: (500,)
(500,)
(500, 332)
(500, 332)


In [None]:
from sklearn.preprocessing import LabelBinarizer

def multiclass_roc_auc_score(y_test, y_pred, average="macro"):
    lb = LabelBinarizer()
    lb.fit(y_test)
    y_test = lb.transform(y_test)
    y_pred = lb.transform(y_pred)
    return metrics.roc_auc_score(y_test, y_pred, average=average)

In [221]:
multiclass_roc_auc_score(Y_true, Y_pred)

0.6956222601297944

In [222]:
acc=np.mean(Y_true==Y_pred)
F1=metrics.f1_score(Y_true, Y_pred, average='weighted')

print('F1 Score:', F1)
print('Accuracy: ', acc)

F1 Score: 0.37043311133311135
Accuracy:  0.408


In [97]:
torch.save(model.state_dict(), 'DenseNet_UnF.pth')