In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import pydicom
import cv2
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import torch
from skimage import io, transform
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn            # for torch.nn.Module, the parent object for PyTorch models
import torch.nn.functional as F
import os
from tqdm import tqdm

# gpus = tf.config.experimental.list_physical_devices('GPU')
# for gpu in gpus:
#     tf.config.experimental.set_memory_growth(gpu, True)

# Globals

In [None]:
SIZE = 100
START = 0
INPUT_PATH = "../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/"
train_images_dir = INPUT_PATH + 'stage_2_train/'
test_images_dir = INPUT_PATH + 'stage_2_test/'

# Loading Data
We will load data with pandas

In [None]:
def load_dataframe(csv_file):
    info = pd.read_csv(csv_file)
    info['Sub_type'] = info['ID'].str.split("_", n = 3, expand = True)[2]
    info['ID'] = info['ID'].str.split("_", n = 3, expand = True)[0]+'_'+info['ID'].str.split("_", n = 3, expand = True)[1]
    info = info.sort_values(by=['ID','Sub_type'], ignore_index=True)

    info['any'] = np.where(info['Sub_type'] == 'any', info['Label'], 0)
    info['epidural'] = np.where(info['Sub_type'] == 'epidural', info['Label'], 0)
    info['intraparenchymal'] = np.where(info['Sub_type'] == 'intraparenchymal', info['Label'], 0)
    info['intraventricular'] = np.where(info['Sub_type'] == 'intraventricular', info['Label'], 0)
    info['subarachnoid'] = np.where(info['Sub_type'] == 'subarachnoid', info['Label'], 0)
    info['subdural'] = np.where(info['Sub_type'] == 'subdural', info['Label'], 0)

    df = info.drop(columns=['Label', 'Sub_type']).groupby('ID').sum().reset_index()
    
    return df

In [None]:
df = load_dataframe(INPUT_PATH + "stage_2_train.csv")
test_df = load_dataframe(INPUT_PATH + 'stage_2_sample_submission.csv')
print("Loaded!!!")

In [None]:
dfw = df.copy()
zeros = df.copy()
# dfw = dfw[['ID','any']]
zeros = zeros[zeros['any'] == 0]

print("any:", len(dfw[dfw['any']==1]))
print("epidural:", len(dfw[dfw['epidural']==1]))
print("intraparenchymal:", len(dfw[dfw['intraparenchymal']==1]))
print("intraventricular", len(dfw[dfw['intraventricular']==1]))
print("subarachnoid:", len(dfw[dfw['subarachnoid']==1]))
print("subdural:", len(dfw[dfw['subdural']==1]))

zeros_to_be_dropped = np.random.choice(zeros.index, 6*len(zeros)//7, replace=False)
# dfw = dfw.drop(index=zeros_to_be_dropped)
# # dfw = dfw.sample(frac=.1)
train, test = train_test_split(dfw, test_size=0.2, shuffle=True)

In [None]:
train = train.reset_index(drop=True)
test = test.reset_index(drop=True)

In [None]:
# test

# Windowing Function
It will take a dicom image and convert into an RGB like image

In [None]:
def final_windowing(img_path):

    window_sizes = [(40, 80), (75,215), (600,2800)]

    def preprocess(img_path, WINDOW_LEVEL, WINDOW_WIDTH):
        # params
        window_min = WINDOW_LEVEL-(WINDOW_WIDTH // 2)
        window_max = WINDOW_LEVEL+(WINDOW_WIDTH // 2)
        # read dicom file
        r = pydicom.read_file(img_path)
        # convert to hounsfield unit
        img = (r.pixel_array * r.RescaleSlope) + r.RescaleIntercept
        # apply brain window
        img = np.clip(img, window_min, window_max)
        img = 255 * ((img - window_min)/WINDOW_WIDTH)
        img = img.astype(np.uint8)
        return img

    new_arr = []
    try:
        for x in window_sizes:
            imag = preprocess(img_path, x[0], x[1] )
            new_arr.append(imag)
    except:
        new_arr = []
        for i in range(3):
            new_arr.append(np.zeros((512, 512)))

    new_arr_2 = np.dstack((new_arr[0], new_arr[1], new_arr[2]))

    return new_arr_2

In [None]:
fig=plt.figure(figsize=(20, 10))
columns = 4; rows = 2
for i in range(1, rows*rows +1):
    ds = pydicom.dcmread(train_images_dir + train['ID'][0:20][i] + '.dcm').pixel_array
    fig.add_subplot(rows, columns, i)
    plt.imshow(ds, cmap=plt.cm.bone)
    fig.add_subplot

for i in range(1, rows*rows +1):
    ds = final_windowing(train_images_dir + train['ID'][0:20][i] + '.dcm')
    fig.add_subplot(rows, columns, i+4)
    plt.imshow(ds, cmap=plt.cm.bone)
    fig.add_subplot

# Custom Data Loader

In [None]:
class HemorrhageImageDataset(Dataset):
    """CT scan dataset."""

    def __init__(self, dataframe, root_dir, windowing=final_windowing, transform=transforms.ToTensor()):
        """
        Args:
            data_frame (pd.df): Panda Dataframe of the labels.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.(windowing function)
        """
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform
        self.windowing = windowing
        
    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = self.dataframe.iloc[idx, 0]
        image_arr = self.windowing(self.root_dir + img_name + '.dcm')
        image_arr.resize((224, 224, 3), refcheck=False)
        image = image_arr/256 # normalization
        
        labels = self.dataframe.iloc[idx, 1:]
        labels = np.array([labels])
        labels = labels.astype('float').reshape(6,)
        
        image = image.transpose((2, 0, 1))
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = torch.from_numpy(image)
        labels = torch.from_numpy(labels)
        
        return image, labels

In [None]:
trainset = HemorrhageImageDataset(
    train,
    train_images_dir
)
train_loader = DataLoader(
    trainset, 
    batch_size=1,
    shuffle=True, 
    num_workers=2
)


testset = HemorrhageImageDataset(
    test,
    train_images_dir
)
test_loader = DataLoader(
    testset, 
    batch_size=1,
    shuffle=True, 
    num_workers=2
)

In [None]:
# for batch, (X, y) in enumerate(dataloader):
    

# Model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        resnext = torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x8d_wsl')
#         inception = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
        self.base = torch.nn.Sequential(*list(resnext.children())[:-1])
        self.rnn = nn.LSTM(2048, 256, 1, bidirectional=True)
        self.fc = torch.nn.Linear(512+2048, 6)
        

    def forward(self, input):
        #----------------RESNET OUTPUTS----------------
        res_out = self.base(input).reshape(-1, 2048)
        #-----------------BLSTM OUTPUTS----------------
        h0 = torch.randn(2, 256).to(device)
        c0 = torch.randn(2, 256).to(device)
        rnn_out, (hn, cn) = self.rnn(res_out, (h0, c0))
        #----------------CONCATENATION-----------------
        concatenated = torch.cat((rnn_out, res_out), 1)
        #----------------FINAL FEATURES-----------------
        logits = self.fc(concatenated)
        #----------------------------------------------
        return logits


    
class ResNeXtModel(torch.nn.Module):
    def __init__(self):
        super(ResNeXtModel, self).__init__()
        resnext = torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x8d_wsl')
        self.base = torch.nn.Sequential(*list(resnext.children())[:-1])
        self.fc = torch.nn.Linear(2048, 6)

    def forward(self, input):
        features = self.base(input).reshape(-1, 2048)
        logits = self.fc(features)
        return logits


    
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 3, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(3, 3, 5)
        self.flatten_out_shape = 0
        self.fc1 = nn.Linear(8427, 1048)
        self.fc2 = nn.Linear(1048, 128)
        self.fc3 = nn.Linear(128, 6)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = ResNeXtModel().to(device=device)


In [None]:
print(model)

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    
    train_loss, correct = 0, 0
    
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        X = X.float().to(device)
        y = y.float().to(device)
        pred = model(X)
        loss = loss_fn(pred, y)
        
        
        train_loss += loss.item()
        pred_bin = torch.where(pred < .5, 0., 1.)
        correct += int(torch.sum(torch.prod(pred_bin.eq(y), 1)))

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            print(f"train_loss: {train_loss:>7f}   Accuracy: {correct / (batch+1)*32 * 100}")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X = X.float().to(device)
            y = y.float().to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            pred = torch.where(pred < .5, 0., 1.)
            correct += int(torch.sum(torch.prod(pred.eq(y), 1)))

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

## Loss Fn

In [None]:
# def custom_loss(y, y_t):
#     y = nn.Sigmoid()(y)
#     ones = torch.ones(*y.size()).to(device)
#     loss = y_t*torch.log(y) + (ones-y_t)*torch.log(ones-y)
#     print("y_t:", y_t)
#     print("y:", y)
#     print("ones:", ones)
#     print("losses:", loss)
#     print(torch.sum(loss))
#     return torch.sum(loss)

# Final Trainning

In [None]:
torch.cuda.empty_cache()
learning_rate = 5e-4
loss_fn = nn.BCEWithLogitsLoss()
# loss_fn = custom_loss
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, model, loss_fn, optimizer)
    test_loop(test_loader, model, loss_fn)
print("Done!")