In [1]:
import binascii
import os
import pydicom
import random
import sys
import torch

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torchvision.transforms as transforms

from fastai.vision.all import *
from PIL import Image
from pydicom.pixel_data_handlers.util import apply_voi_lut
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from torch import  optim
from torch.nn import BCEWithLogitsLoss, Conv2d, Flatten, Linear, MaxPool2d, Module, Sequential, Sigmoid
from torch.nn.functional import log_softmax
from torch.nn.init import xavier_uniform_
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

In [2]:
df = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv', dtype=object)
df = df[~df.BraTS21ID.isin(["00109", "00123", "00709"])]

In [3]:
#https://stackoverflow.com/a/4836734/8245487
def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)

In [4]:
INPUT = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'

if not os.path.exists('./train'):
    os.makedirs('./train')
    
if not os.path.exists('./test'):
    os.makedirs('./test')

In [5]:
def process_dicom(path, outpath):
    dicom = pydicom.read_file(path)
    data = apply_voi_lut(dicom.pixel_array, dicom)
    if dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    
    height = len(data)
    width = len(data[0])
    
    pixels_out = []
    for row in data:
        pixels_out.extend(row)
    assert(len(pixels_out) == height * width)
    
    image_out = Image.new('L', (width, height))
    image_out.putdata(pixels_out)
    image_out.save(outpath)

In [6]:
def get_dicom_files(input_dir, dataset='train'):
    for subdir, dirs, files in os.walk(f"{input_dir}/{dataset}"):
        if len(files) == 0:
            continue
        filename = natural_sort(files)[len(files)//2] #take middle most image -- FLAIR DCM file per training item.
        filepath = os.path.join(subdir, filename)
        
        if filepath.endswith(".dcm") and "FLAIR" in filepath:
            cur_id = subdir.split('/')[-2]
            outpath = os.path.join(f'./{dataset}',f'{cur_id}.png')
            
            process_dicom(filepath, outpath)

In [7]:
get_dicom_files(INPUT, 'train')
get_dicom_files(INPUT, 'test')

  import sys


In [8]:
def create_dataframe(df, csv_path, data_type = 'train', directory_name = 'FLAIR'):
#     path = './' + data_type + '/' + directory_name + '/'
    path = './' + data_type + '/'
    dataframe = pd.DataFrame()
    dataframe['BraTS21ID'] = df['BraTS21ID']
    dataframe['image_id'] = df['BraTS21ID']
    if data_type == 'test':
        dataframe['image'] = [path + '0' * (5 - len(str(idx))) + str(idx) +'.png' for idx in df['BraTS21ID']]
        dataframe['target'] = [0.5 for idx in df['BraTS21ID']]
    else:
        dataframe['MGMT_value'] = df['MGMT_value']
        dataframe['image'] = [path + str(idx) +'.png' for idx in df['BraTS21ID']]
        dataframe['target'] = df['MGMT_value']
    dataframe.to_csv(csv_path, index = False)

In [9]:
csv_path = './train.csv'
create_dataframe(df, csv_path)

In [10]:
class BrainTumorDataset(Dataset):
    def __init__(self, csv_path):
        self.dataframe = pd.read_csv(csv_path)
        self.transform = transforms.Compose([transforms.ToTensor()])
    def __len__(self):
        return self.dataframe.shape[0]
    def __getitem__(self, idx):
        image = Image.open(self.dataframe.iloc[idx]['image'])
        image = image.resize((224, 224))
        image = self.transform(image)
        label = torch.FloatTensor([self.dataframe.iloc[idx]['target']])
        return image, label

In [11]:
class Model(Module):
    def __init__(self):
        super().__init__()
        self.conv_layers = Sequential(Conv2d(1, 4, kernel_size=5, stride=1),
                                      MaxPool2d(kernel_size=2, stride=2),
                                      Conv2d(4, 16, kernel_size=5, stride=1),
                                      MaxPool2d(kernel_size=2, stride=2),
                                      Conv2d(16, 32, kernel_size=5, stride=1),
                                      MaxPool2d(kernel_size=2, stride=2),
                                      Flatten())
        
        self.linear_layers = Sequential(Linear(18432, 1024),
                                        Linear(1024, 64),
                                        Linear(64, 1))
        
    def forward(self, x):
        x = self.conv_layers(x)
        output = self.linear_layers(x)
        return output

In [12]:
batch_size = 64
threshold = 0.5
learning_rate = 1e-3
flag_continue = False
train_csv_path = './train.csv'
checkpoint_load = './Checkpointv1.pt'
checkpoint_path = './Checkpointv1.pt'
start_epoch = 1
end_epoch = 20

In [13]:
model = Model()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
criterion = BCEWithLogitsLoss()

In [14]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print("Parameters:", params)
print("--------------------------------------------")

Parameters: 18955609
--------------------------------------------


In [15]:
if torch.cuda.is_available():
    print("GPU")
    device = torch.device("cuda")
    model = model.cuda()
    criterion = criterion.cuda()
else:
    print("CPU")
    device = torch.device("cpu")

GPU


In [16]:
dataset = BrainTumorDataset(train_csv_path)
params = {"batch_size": batch_size, "shuffle": True, "num_workers": 0}
data_gen = DataLoader(dataset, **params)

In [17]:
if flag_continue == True:
    print("Model loaded for further training!")
    checkpoint = torch.load(checkpoint_load)
    model.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    epoch = checkpoint["epoch"]
    loss = checkpoint["loss"]
else:
    print("Model training!")
    for p in model.parameters():
        if p.dim() > 1:
            xavier_uniform_(p)

Model training!


In [18]:
model.train()

Model(
  (conv_layers): Sequential(
    (0): Conv2d(1, 4, kernel_size=(5, 5), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(4, 16, kernel_size=(5, 5), stride=(1, 1))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Flatten(start_dim=1, end_dim=-1)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=18432, out_features=1024, bias=True)
    (1): Linear(in_features=1024, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [19]:
print('---------------------------------------------------')
for epoch in range(start_epoch, end_epoch + 1):
    epoch_loss = 0.0
    epoch_accuracy = []
    for batch_idx, generator_values in enumerate(data_gen):
        x = generator_values[0]
        target = generator_values[1]
        x = x.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        y_hat = model(x)
        loss = criterion(y_hat, target)
        loss.backward()
        optimizer.step()
        y_hat = Sigmoid()(y_hat)
        y_pred = (y_hat > threshold).float()
        y_pred = y_pred.detach().cpu().numpy()
        target = target.detach().cpu().numpy()
        batch_accuracy = accuracy_score(target, y_pred)
        batch_loss = loss.item()
        epoch_loss = epoch_loss + batch_loss
        batch_accuracy = round(batch_accuracy, 3)
        batch_loss = round(batch_loss, 3)
        epoch_accuracy.append(batch_accuracy)
        torch.cuda.empty_cache()
        del x, target
        del y_hat, y_pred
        if batch_idx % 2 == 0:
            print("Epoch: {}, Batch: {}, Batch Loss: {} Batch Accuracy: {}".format(epoch, batch_idx, batch_loss, batch_accuracy))
    epoch_accuracy = sum(epoch_accuracy) / len(epoch_accuracy)
    epoch_accuracy = round(epoch_accuracy, 3)
    epoch_loss = round(epoch_loss, 3)
    print("Epoch: {}, Epoch Loss: {} Epoch Accuracy: {}".format(epoch, epoch_loss, epoch_accuracy))
    torch.save({"epoch": epoch,"model_state_dict": model.state_dict(),
              "optimizer_state_dict": optimizer.state_dict(),
              "loss": epoch_loss,
          },
          checkpoint_path)
    print('---------------------------------------------------\n')

---------------------------------------------------
Epoch: 1, Batch: 0, Batch Loss: 0.71 Batch Accuracy: 0.344
Epoch: 1, Batch: 2, Batch Loss: 17.653 Batch Accuracy: 0.469
Epoch: 1, Batch: 4, Batch Loss: 5.728 Batch Accuracy: 0.5
Epoch: 1, Batch: 6, Batch Loss: 5.063 Batch Accuracy: 0.484
Epoch: 1, Batch: 8, Batch Loss: 0.972 Batch Accuracy: 0.484
Epoch: 1, Epoch Loss: 46.048 Epoch Accuracy: 0.514
---------------------------------------------------

Epoch: 2, Batch: 0, Batch Loss: 4.284 Batch Accuracy: 0.641
Epoch: 2, Batch: 2, Batch Loss: 0.873 Batch Accuracy: 0.453
Epoch: 2, Batch: 4, Batch Loss: 3.574 Batch Accuracy: 0.469
Epoch: 2, Batch: 6, Batch Loss: 0.671 Batch Accuracy: 0.641
Epoch: 2, Batch: 8, Batch Loss: 1.556 Batch Accuracy: 0.391
Epoch: 2, Epoch Loss: 21.736 Epoch Accuracy: 0.526
---------------------------------------------------

Epoch: 3, Batch: 0, Batch Loss: 0.915 Batch Accuracy: 0.562
Epoch: 3, Batch: 2, Batch Loss: 0.734 Batch Accuracy: 0.5
Epoch: 3, Batch: 4, Batc

In [20]:
class BrainTumorPredictionDataset(Dataset):
    def __init__(self, csv_path):
        self.dataframe = pd.read_csv(csv_path)
        self.transform = transforms.Compose([transforms.ToTensor()])
    def __len__(self):
        return self.dataframe.shape[0]
    def __getitem__(self, idx):
        file_id = self.dataframe.iloc[idx]['BraTS21ID']
        path = self.dataframe.iloc[idx]['image']
        image = Image.open(path)
        image = image.resize((224, 224))
        image = self.transform(image)
        label = torch.FloatTensor([self.dataframe.iloc[idx]['target']])
        return file_id, image, label

In [21]:
test_csv_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv'
df = pd.read_csv(test_csv_path)
csv_path = './test.csv'
create_dataframe(df, csv_path, data_type = 'test')
test_csv_path = csv_path

In [22]:
dataset = BrainTumorPredictionDataset(test_csv_path)
params = {"batch_size": 1, "shuffle": False, "num_workers": 0}
data_gen = DataLoader(dataset, **params)

In [23]:
model = Model()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [24]:
if torch.cuda.is_available():
    print("GPU")
    device = torch.device("cuda")
    model = model.cuda()
else:
    print("CPU")
    device = torch.device("cpu")

GPU


In [25]:
checkpoint = torch.load(checkpoint_load)
model.load_state_dict(checkpoint["model_state_dict"])
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
epoch = checkpoint["epoch"]
loss = checkpoint["loss"]
model.eval()

Model(
  (conv_layers): Sequential(
    (0): Conv2d(1, 4, kernel_size=(5, 5), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(4, 16, kernel_size=(5, 5), stride=(1, 1))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Flatten(start_dim=1, end_dim=-1)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=18432, out_features=1024, bias=True)
    (1): Linear(in_features=1024, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [26]:
image_id_list = []
predictions = []
for batch_idx, generator_values in tqdm(enumerate(data_gen)):
    image_id = generator_values[0].item()
    image_id = '0' * (5 - len(str(image_id))) + str(image_id) 
    x = generator_values[1]
    x = x.to(device)
    y_hat = model(x)
    y_hat = Sigmoid()(y_hat)
    y_pred = (y_hat > threshold).float()
    y_pred = y_pred.detach().cpu().numpy()
    y_pred = int(y_pred[0][0])
    image_id_list.append(image_id)
    predictions.append(y_pred)
    del x, image_id
    del y_hat, y_pred

87it [00:00, 229.12it/s]


In [27]:
! zip -r './FLAIR_train.zip' './train/FLAIR' > /dev/null
! zip -r './FLAIR_test.zip' './test/FLAIR' > /dev/null

In [28]:
submission = pd.DataFrame()
submission['BraTS21ID'] = image_id_list
submission['MGMT_value'] = predictions

In [29]:
submission.to_csv('./submission.csv', index = False)