In [None]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
import seaborn
import cv2 as cv
import nibabel as nib
import pickle
import imgaug as ia
import imgaug.augmenters as iaa
import tqdm
import gc
import warnings
import tensorflow as tf

## Data Preparation

In [None]:
metadata = pd.read_csv('../input/covid19-ct-scans/metadata.csv')

def readImg(imgPath):
    img = nib.load(imgPath)
    arr = img.get_fdata()
    arr = np.rot90(np.array(arr))
    return arr

# clahe -> makes uniform pixel distribution
def applyClahe(img):
    clahe = cv.createCLAHE(clipLimit=3.0)
    claheImage = clahe.apply(np.uint8(img*255))
    return claheImage

def contourOK(cc, size):
    x, y, w, h = cv.boundingRect(cc) # gets coordinates of that contour
    if ((w < 50 and h > 150) or (w > 150 and h < 50)) : 
        return False # too narrow or wide is bad
    area = cv.contourArea(cc) # gets area of that contour
    return area < (size * 0.5) and area > 200

def getContours(img):
    img = np.uint8(img*255)
    
    kernel = np.ones((3,3),np.float32)/9
    img = cv.filter2D(img, -1, kernel)
    
    ret, thresh = cv.threshold(img, 50, 255, cv.THRESH_BINARY) # edge detection
    contours, hierarchy = cv.findContours(thresh, 2, 1) # gets contours
    
    # filter contours that are too large or small
    size = img.shape[0] * img.shape[1]
    contours = [cc for cc in contours if contourOK(cc, size)]
    return contours


def getBoundaries(img):
    contours = getContours(img)
    
    # gets max sized-contour coordinates
    ih, iw = img.shape
    minx, miny, maxx, maxy = iw, ih, 0, 0

    for cc in contours:
        x, y, w, h = cv.boundingRect(cc)
        if x < minx: minx = x
        if y < miny: miny = y
        if x + w > maxx: maxx = x + w
        if y + h > maxy: maxy = y + h

    return (minx, miny, maxx, maxy)

def cropImage(img, bounds): 
    # img -> contours -> max sized contour based on rect boundaries -> crop img according to max boundary
    minx, miny, maxx, maxy = bounds
    return img[miny:maxy, minx:maxx]

# Sample Images

In [None]:
# Img -> Resize -> Standardization -> Contours -> Boundaries -> Crop -> Apply Clahe
img_size = 512

cts = readImg(metadata.loc[0, 'ct_scan'])
lungs = readImg(metadata.loc[0, 'lung_mask'])
infec = readImg(metadata.loc[0, 'infection_mask'])
slices = cts.shape[2]
    
arr_cts = cts[:, :, range(100,200,20)]
arr_lungs = lungs[:, :, range(100,200,20)]
arr_infec = infec[:, :, range(100,200,20)]

arr_cts = np.reshape(np.rollaxis(arr_cts, 2),(arr_cts.shape[2],arr_cts.shape[0],arr_cts.shape[1], 1))
arr_lungs = np.reshape(np.rollaxis(arr_lungs, 2),(arr_lungs.shape[2],arr_lungs.shape[0],arr_lungs.shape[1], 1))
arr_infec = np.reshape(np.rollaxis(arr_infec, 2),(arr_infec.shape[2],arr_infec.shape[0],arr_infec.shape[1], 1))

fig, axes = plt.subplots(4, 5, figsize=(24,18)) 

for ii in range(arr_cts.shape[0]):
    # CT SCAN
    imgCt = cv.resize(arr_cts[ii], dsize=(img_size, img_size), interpolation=cv.INTER_AREA) # resize
    xmax, xmin = imgCt.max(), imgCt.min() # standardization
    imgCt = (imgCt - xmin)/(xmax - xmin)
    
    bounds = getBoundaries(imgCt)
    claheImgCt = applyClahe(imgCt)
    newImgCt = cropImage(claheImgCt, bounds)
    
    axes[0,ii].imshow(imgCt, cmap='bone')
    axes[0,ii].set_title('Original CT')
    axes[0,ii].set_xticks([]); axes[0,ii].set_yticks([])
    
    axes[1,ii].imshow(newImgCt, cmap='bone')
    axes[1,ii].set_title('CT: clahe + cropped')
    axes[1,ii].set_xticks([]); axes[1,ii].set_yticks([])
    
    # LUNG SEGMENTATION
    imgLungs = cv.resize(arr_lungs[ii], dsize=(img_size, img_size), interpolation=cv.INTER_AREA)
    newImgLungs = cropImage(imgLungs, bounds)
    
    axes[2,ii].imshow(newImgLungs, cmap='Greens')
    axes[2,ii].set_title('Lungs segmentation: cropped')
    axes[2,ii].set_xticks([]); axes[2,ii].set_yticks([])
    
    # INFECTION SEGMENTATION
    imgInfec = cv.resize(arr_infec[ii], dsize=(img_size, img_size), interpolation=cv.INTER_AREA)
    newImgInfec = cropImage(imgInfec, bounds)
    
    axes[3,ii].imshow(newImgInfec, cmap='Reds')
    axes[3,ii].set_title('Infection segmentation: cropped')
    axes[3,ii].set_xticks([]); axes[3,ii].set_yticks([])

## Training Data

In [None]:
def convertDataArray(cts_all, infects_all):
    img_size = 256

    del_lst = [] # will not be able to resize every image, will delete those images

    for ii in tqdm.tqdm(range(len(cts_all))):
        try:
            cts_all[ii] = cv.resize(cts_all[ii], dsize=(img_size, img_size), interpolation=cv.INTER_AREA)
            cts_all[ii] = np.reshape(cts_all[ii], (img_size, img_size, 1))

            #lungs_all[ii] = cv.resize(lungs_all[ii], dsize=(img_size, img_size), interpolation=cv.INTER_AREA)
            #lungs_all[ii] = np.reshape(lungs_all[ii], (img_size, img_size, 1))

            infects_all[ii] = cv.resize(infects_all[ii], dsize=(img_size, img_size), interpolation=cv.INTER_AREA)
            infects_all[ii] = np.reshape(infects_all[ii], (img_size, img_size, 1))
        except:
            del_lst.append(ii)

    for idx in del_lst[::-1] :
        del cts_all[idx]
        #del lungs_all[idx]
        del infects_all[idx]
        
        
    return (cts_all, infects_all)

In [None]:
def generateData(metaData, low, high):
    # Img -> Resize -> Standardization -> Contours -> Boundaries -> Crop -> Apply Clahe
    img_size = 512

    cts_all = []
    lungs_all = []
    infects_all = []
    
    ans = 0

    for fileNum in tqdm.tqdm(range(low, high, 1)):
        cts = readImg(metadata.loc[fileNum, 'ct_scan'])
        #lungs = readImg(metadata.loc[fileNum, 'lung_mask'])
        infec = readImg(metadata.loc[fileNum, 'infection_mask'])
        slices = cts.shape[2]
        
        ans += slices

        arr_cts = cts[:, :, :]
        #arr_lungs = lungs[:, :, round(slices*0.30):round(slices*0.75)]
        arr_infec = infec[:, :, :]

        arr_cts = np.reshape(np.rollaxis(arr_cts, 2),(arr_cts.shape[2],arr_cts.shape[0],arr_cts.shape[1], 1))
        #arr_lungs = np.reshape(np.rollaxis(arr_lungs, 2),(arr_lungs.shape[2],arr_lungs.shape[0],arr_lungs.shape[1], 1))
        arr_infec = np.reshape(np.rollaxis(arr_infec, 2),(arr_infec.shape[2],arr_infec.shape[0],arr_infec.shape[1], 1))

        for ii in range(arr_cts.shape[0]):
            # CT SCAN
            imgCt = cv.resize(arr_cts[ii], dsize=(img_size, img_size), interpolation=cv.INTER_AREA) # resize
            xmax, xmin = imgCt.max(), imgCt.min() # standardization
            imgCt = (imgCt - xmin)/(xmax - xmin)

            bounds = getBoundaries(imgCt)
            claheImgCt = applyClahe(imgCt)
            newImgCt = cropImage(claheImgCt, bounds)
            cts_all.append(newImgCt)

            # LUNG SEGMENTATION
            #imgLungs = cv.resize(arr_lungs[ii], dsize=(img_size, img_size), interpolation=cv.INTER_AREA)
            #newImgLungs = cropImage(imgLungs, bounds)
            #lungs_all.append(newImgLungs)

            # INFECTION SEGMENTATION
            imgInfec = cv.resize(arr_infec[ii], dsize=(img_size, img_size), interpolation=cv.INTER_AREA)
            newImgInfec = cropImage(imgInfec, bounds)
            infects_all.append(newImgInfec)
            
    return convertDataArray(cts_all, infects_all)

In [None]:
cts_all, infects_all = generateData(metadata, 0, 20)

In [None]:
print(f'Training Total Slices: {np.array(cts_all).shape[0]} with image size: {np.array(cts_all)[0].shape}')

## Images wit no infection

In [None]:
infectBool = np.ones((1, len(infects_all)))

for index in range(len(infects_all)):
    if np.unique(infects_all[index]).size == 1:
        infectBool[0, index] = 0
        
print(f"Number of CTs with no infection: {len(infects_all)-infectBool.sum()}")
print(f"Number of CTs with infection: {infectBool.sum()}")

## Data Augmentation

In [None]:
# # Code taken directly
# ia.seed(1)

# seq = iaa.Sequential([
#     iaa.Fliplr(0.5), # horizontal flips
#     iaa.Flipud(0.5), # vertical flips
    
#     # Apply affine transformations to each image.
#     # Scale/zoom them, translate/move them, rotate them and shear them.
#     iaa.Affine(
#         scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images
#         translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
#         rotate=(-15, 15),
#         shear=(-15, 15)
#     )
# ], random_order=True) # apply augmenters in random order

In [None]:
# # Random sampling
# num_augs = round(len(cts_all)/2.)
# rand_idx = np.random.randint(0, len(cts_all), size=num_augs)

# sample_cts = [cts_all[ii] for ii in rand_idx]
# sample_lungs = [lungs_all[ii] for ii in rand_idx]
# sample_infects = [infects_all[ii] for ii in rand_idx]

In [None]:
# seq_det = seq.to_deterministic()
# cts_aug = seq_det.augment_images(sample_cts)
# lungs_aug = seq_det.augment_images(sample_lungs)
# infects_aug = seq_det.augment_images(sample_infects)

In [None]:
# cts = np.concatenate([cts_all, cts_aug], axis=0)
# lungs = np.concatenate([lungs_all, lungs_aug], axis=0)
# infects = np.concatenate([infects_all, infects_aug], axis=0)

## Saving Data

In [None]:
np.save('CtScans(Clahe_ClipLimit_3).npy', np.array(cts_all))
np.save('InfectionSegmentation(Clahe_ClipLimit_3).npy', np.array(infects_all))