In [1]:
import pandas as pd
import numpy as np
import nilearn as nl
import nibabel as nib
import os
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300
from time import sleep
import shutil

In [2]:
datadir = "./data_1.4.2_has_tumor"

# copy the files to the traning dir
try:
    shutil.rmtree(datadir)
except Exception as e:
    print(e)
# create directories
if not os.path.exists(datadir):
    os.makedirs(datadir)
train = os.path.join(datadir, "train")
if not os.path.exists(train):
    os.makedirs(train)
train_1 = os.path.join(train, "1")
if not os.path.exists(train_1):
    os.makedirs(train_1)
train_0 = os.path.join(train, "0")
if not os.path.exists(train_0):
    os.makedirs(train_0)
# varidation directories
val = os.path.join(datadir, "val")
if not os.path.exists(val):
    os.makedirs(val)
val_1 = os.path.join(val, "1")
if not os.path.exists(val_1):
    os.makedirs(val_1)
val_0 = os.path.join(val, "0")
if not os.path.exists(val_0):
    os.makedirs(val_0)

In [3]:
def imshow(inp, title=None):
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

def colornormalisation(img):
    if np.max(img) != 0:
        # values to greyscale
        img = img - np.min(img)
        img = img / np.max(img)    # 0~1
        img = (img * 255).astype(np.uint8)    # 0~255 
        return img
    else:
        return img.astype(np.uint8)
    
def imagenormalisation(image, mincutsize=0.1):
    # the minimal pixel value 
    cutoff = 5
    sizecut = 1 # default sizecut is maximum image
    if np.max(image) > cutoff:
        
        # apply binary thresholding to the gray image
        ret, thresh = cv2.threshold(image, cutoff, 255, cv2.THRESH_BINARY)
        # detect the contours on the binary image using cv2.CHAIN_APPROX_NONE
        cnts = cv2.findContours(image=thresh, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_SIMPLE)
        
        # if not found any contour return the original image
        if len(cnts[0]) == 0: 
            cv2.imwrite('test.png', image)
            cv2.imwrite('test_gray.png', image)
            image_copy1 = image.copy()
            return colorAndShape(image)
        cnts = cnts[0] if len(cnts) == 2 else cnts[1]
        cnts = sorted(cnts, key=cv2.contourArea, reverse=True)

        # Find bounding box and extract ROI
        x = 10000
        y = 10000
        w = 0
        h = 0
        for c in cnts:
            xc,yc,wc,hc = cv2.boundingRect(c)
            if xc < x:
                x = xc
            if yc < y:
                y = yc
            if xc+wc > w:
                w = xc+wc
            if yc+hc > h:
                h = yc+hc
        # crop the image
        sizecut = (h-y)*(w-x)/(image.shape[0]*image.shape[1])
        # check the image has got enough pixel to worth to consider
        if sizecut < mincutsize:
            image = None
        else:
            # cut the image
            image = image[y:h,x:w]
    else:
        # the image haw not meaningfull result
        image = None
    if image is not None:
        # resize the image
        image = cv2.resize(image, (targetsize, targetsize))
    return image, sizecut

In [4]:
TRAIN_DATASET_PATH = './data_has_tumor/BraTS2020_TrainingData/MICCAI_BraTS2020_TrainingData/'
scan_types = ['flair', 't1', 't1ce', 't2'] 
targetsize = 224

# get all of the patient id
dirs = [x[0] for x in os.walk(TRAIN_DATASET_PATH)][1:]
dirs = [ d.replace('./data_has_tumor/BraTS2020_TrainingData/MICCAI_BraTS2020_TrainingData/BraTS20_Training_', '') 
        for d in dirs ]
datalength = len(dirs)
progress = 0

# drop 355 as its segmentation not sure
dirs.remove("355")

# ittterate over the dirs
for didx in dirs:
    progress += 1
    print(f"Process: {np.round(100*progress/datalength, 2)}%", end="\r")
    images = {
    "flair": nib.load(
        os.path.join(
            TRAIN_DATASET_PATH, 
            f"BraTS20_Training_{didx}",  
            f"BraTS20_Training_{didx}_flair.nii" )).get_fdata(),
    "t1": nib.load(
        os.path.join(
            TRAIN_DATASET_PATH, 
            f"BraTS20_Training_{didx}",  
            f"BraTS20_Training_{didx}_t1.nii" )).get_fdata(),
    "t1ce": nib.load(
        os.path.join(
            TRAIN_DATASET_PATH, 
            f"BraTS20_Training_{didx}",  
            f"BraTS20_Training_{didx}_t1ce.nii" )).get_fdata(),
    "t2": nib.load(
        os.path.join(
            TRAIN_DATASET_PATH, 
            f"BraTS20_Training_{didx}",  
            f"BraTS20_Training_{didx}_t2.nii" )).get_fdata(),
    "mask": nib.load(
        os.path.join(
            TRAIN_DATASET_PATH, 
            f"BraTS20_Training_{didx}",  
            f"BraTS20_Training_{didx}_seg.nii" )).get_fdata()
    }
    
    # get is there tumor or not on the slice
    labels = []
    for l in range(images["mask"].shape[2]):
        # just get the images where necrotic and non-enhancing tumor core
        if 1.0 in set(images["mask"][:,:,l].flatten()):
            labels.append("1")
        else:
            labels.append("0")
            
    # randomly asign to train and val group
    val = np.random.binomial(1, 0.2, len(labels))
    val = [ 'val' if v == 1 else 'train' for v in val ]
    
    # generate jpg traning data
    for lidx in range(len(labels)):
        for scan_type in scan_types:
            filename = os.path.join(datadir, val[lidx], labels[lidx], f"{didx}_{scan_type}_{lidx}.jpg")
            data = images[scan_type][:,:,lidx]
            data = data.reshape(data.shape[0], data.shape[1])
            data = colornormalisation(data)
            data, sizes = imagenormalisation(data)
            # if it is a valid image
            if data is not None:
                cv2.imwrite(filename, data.T)
            


    

Process: 99.73%