In [None]:
# Import stuff
import random
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.optim
import pandas as pd
from torch.utils.data import Dataset
from glob import glob
import random
import torch.distributed as dist
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
from torchvision.transforms import ToTensor
import torchvision
import cv2
from sklearn.metrics import roc_curve, auc
import os

In [None]:
class Dataset():

    def __init__(self, file_list, mode):
        self.file_list = file_list
        self.mode = mode

    def __getitem__(self,idx):
        image_filepath = self.file_list[idx]
        image, code = self.loadimage(image_filepath)
        
        sample = {'Image': image,
              'Code': code,
              }

        return sample

    def __len__(self):
        return len(self.file_list)

    def loadimage(self, image_filepath):
        img = cv2.imread(image_filepath, 0)
        img = np.pad(img, [(262,262),(0,0)])
        img = cv2.resize(img, dsize=(382, 382))
        
        image = ToTensor()(img) 
        image = 2*(image/torch.max(image))-1
        code = image_filepath.split('/')[4][:-4] 
        return image, code

In [None]:
gt_df = pd.read_csv("../input/ranzcr-clip-catheter-line-classification/train.csv")
train_dir = '../input/ranzcr-clip-catheter-line-classification/train'
test_dir = '../input/ranzcr-clip-catheter-line-classification/test'
total_files = []
total_files.extend(sorted(glob(train_dir + '/*.jpg')))
val_length = int(len(total_files) * 0.2)
train_files = total_files[val_length:]
val_files = total_files[:val_length]
test_files = []
test_files.extend(sorted(glob(test_dir + '/*.jpg')))

Train_Dataset = Dataset(train_files, "Train")
Train_dataloader = DataLoader(Train_Dataset, shuffle=True, num_workers=2, batch_size=200, pin_memory=True)

Val_Dataset = Dataset(val_files, "Val")
Val_dataloader = DataLoader(Val_Dataset, shuffle=False, num_workers=2, batch_size=200, pin_memory=True)

Test_Dataset = Dataset(test_files, "Test")
Test_dataloader = DataLoader(Train_Dataset, shuffle=True, num_workers=2, batch_size=1, pin_memory=True)

In [None]:
class resconv_block_2D(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(resconv_block_2D, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(ch_in, ch_out, kernel_size = 3, stride = 1, padding = 1, bias = True),
            nn.BatchNorm2d(ch_out),
            nn.ReLU(inplace = True),
            nn.Conv2d(ch_out, ch_out, kernel_size = 3, stride = 1, padding = 1, bias = True),
            nn.BatchNorm2d(ch_out),
            nn.ReLU(inplace = True)
        )
        self.Conv_1x1 = nn.Conv2d(ch_in, ch_out, kernel_size = 1, stride = 1, padding = 0)

    def forward(self,x):

        residual = self.Conv_1x1(x)
        x = self.conv(x)
        return residual + x

class ResNet(nn.Module):
    def __init__(
        self,
        img_ch = 1,
        output_ch = 11,
        ):
        super(ResNet,self).__init__()

        self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2)
        self.Softmax = nn.Softmax(dim=1)
        self.Sigmoid = nn.Sigmoid()

        self.Conv1 = resconv_block_2D(ch_in=img_ch,ch_out=8)
        self.Conv2 = resconv_block_2D(ch_in=8,ch_out=16)
        self.Conv3 = resconv_block_2D(ch_in=16,ch_out=32)
        self.Conv4 = resconv_block_2D(ch_in=32,ch_out=64)
        self.Conv5 = resconv_block_2D(ch_in=64,ch_out=128)

        feature_dimension = 67712
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(feature_dimension, 1024),
            nn.ReLU(True),

            nn.Dropout(),
            nn.Linear(1024, 128),
            nn.Sigmoid(),

            nn.Linear(128, output_ch),
         )

    def forward(self,x):
        x1 = self.Conv1(x)
        x2 = self.Maxpool(x1)
        x2 = self.Conv2(x2)
        x3 = self.Maxpool(x2)
        x3 = self.Conv3(x3)
        x4 = self.Maxpool(x3)
        x4 = self.Conv4(x4)
        x5 = self.Maxpool(x4)
        x = self.Conv5(x5) 
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        x = self.Sigmoid(x)
        return x

In [None]:
model = ResNet()
model = model.cuda()

In [None]:
criterion=nn.BCELoss()
criterion = criterion.cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [None]:
def find_gt(code, df):
    index = df.index[df['StudyInstanceUID'] == code].tolist()
    g = df.iloc[index]
    g = g.to_numpy()[0]
    GT = (g[1:-1])
    return GT

train_global_losses = []
val_global_losses = []
val_global_auc = []

for epoch in range(0, 20):
    
        # Declare lists to keep track of losses and metrics within the epoch
        train_epoch_losses = []
        val_epoch_losses = []
        
        val_auc = []
        val_gt = {i:[] for i in range(0,11)}
        val_predictions = {i:[] for i in range(0,11)}

        model.train()
        
        count = 0
    
        for i, data in enumerate(Train_dataloader):

            input_img = data['Image']
            codes = data['Code']
            codes_formatted = np.zeros([len(codes),11])
            
            for k in range(0,len(codes)):
                g = find_gt(codes[k], gt_df)
                codes_formatted[k,:] += np.array(g, dtype='float')
                        
            input_img = input_img.cuda(non_blocking=True)
            codes_formatted = torch.tensor(codes_formatted)
            codes_formatted = codes_formatted.to(torch.float)
            codes_formatted = codes_formatted.cuda(non_blocking=True)
            
            output = model(input_img)
            
            loss = []
            for j in range(0,11):
                cur_output = output[:,j].unsqueeze(1)
                cur_gt = codes_formatted[:,j].unsqueeze(1)
                loss.append(criterion(cur_output, cur_gt))
            
            total_loss = sum(loss)
            train_epoch_losses.append(total_loss.item())

            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        # Transition to val mode
        model.eval()

        # Avoid computing gradients during validation to save memory
        with torch.no_grad():
            count = 0

            for i, data in enumerate(Val_dataloader):
                
                input_img = data['Image']
                codes = data['Code']
                codes_formatted = np.zeros([len(codes),11])

                for k in range(0,len(codes)):
                    g = find_gt(codes[k], gt_df)
                    codes_formatted[k,:] += np.array(g, dtype='float')

                input_img = input_img.cuda(non_blocking=True)
                codes_formatted = torch.tensor(codes_formatted)
                codes_formatted = codes_formatted.to(torch.float)
                codes_formatted = codes_formatted.cuda(non_blocking=True)

                output = model(input_img)

                loss = []
                for j in range(0,11):
                    cur_output = output[:,j].unsqueeze(1)
                    cur_gt = codes_formatted[:,j].unsqueeze(1)
                    loss.append(criterion(cur_output, cur_gt))
                    val_gt[j].extend(list(codes_formatted[:,j].cpu().detach().numpy()))
                    val_predictions[j].extend(list(output[:,j].cpu().detach().numpy()))

                total_loss = sum(loss)
                val_epoch_losses.append(total_loss.item())

        train_net_loss = sum(train_epoch_losses) / len(train_epoch_losses)
        val_net_loss = sum(val_epoch_losses) / len(val_epoch_losses)
        train_global_losses.append(train_net_loss)
        val_global_losses.append(val_net_loss)

        for j in range(0,11):
            gt = np.array(val_gt[j]).flatten()
            pred = np.array(val_predictions[j]).flatten()
            fpr, tpr, _ = roc_curve(gt, pred)
            AUC = auc(fpr, tpr)
            val_auc.append(AUC)
                        
        average_epoch_auc = sum(val_auc)/len(val_auc)

        print('Epoch: {} | Train Loss: {} | Val Loss: {} | Avg Val AUC: {} |'.format(epoch, train_net_loss, val_net_loss, average_epoch_auc))

        checkpoint_dir = './'
        # Save the model if it reaches a new min validation loss
        if val_global_losses[-1] == min(val_global_losses):
            print('saving model at the end of epoch ' + str(epoch))
            if epoch > 5:
                best_epoch = epoch
                file_name = os.path.join(checkpoint_dir, 'model_epoch_{}.pth'.format(epoch))
                torch.save({
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'optim_dict': optimizer.state_dict(),
                    },
                    file_name)

In [None]:
best_epoch = np.argmin(np.array(val_global_losses))

model = ResNet()
model = model.cuda()

load_dir = './model_epoch_' + str(best_epoch) + '.pth'
checkpoint = torch.load(load_dir)
model.load_state_dict(checkpoint['state_dict'])
model.cuda()

output_df = pd.DataFrame(columns=['StudyInstanceUID', 'ETT - Abnormal', 'ETT - Borderline',
       'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
       'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present'])

with torch.no_grad():
    count = 0
    
    for i, data in enumerate(Test_dataloader):
        final = []
        input_img = data['Image']
        codes = data['Code']
        
        final.append(codes[0])

        input_img = input_img.cuda(non_blocking=True)
        output = model(input_img)
        output = output.squeeze(0)
        final.extend(list(output.cpu().detach().numpy()))
        
        output_df.loc[i] = final

In [None]:
output_df.to_csv('output.csv')