# Processing NIfTI Medical Volumes

In [None]:
import numpy as np
import os
import shutil

In [None]:
import cv2
from PIL import Image
from natsort import natsorted
import nibabel as nib
import matplotlib.pyplot as plt

from convert_nii2png import nii2png

## Dataset 
The data  to prcess was ingested from the sources below:

1. COVID-19 Lung CT Lesion Segmentation Challenge - 2020, 199 patients https://covid-segmentation.grand-challenge.org/COVID-19-20/

2. COVID-19 CT Lung and Infection Segmentation Dataset, 20 patients (only infection masks were used) https://zenodo.org/record/3757476#.YTdEx55Kg1h

3. Medseg AI - SIRM ( dataset 100 scans from 48 patients - https://medicalsegmentation.com/covid19/

4. MosMedData Dataset COVID19_1110, https://www.kaggle.com/datasets/maedemaftouni/covid19-ct-scan-lesion-segmentation-dataset50

 ## 1. Visualize a voxel slice

In [None]:
current_dir = os.getcwd()

In [None]:
gc_dir= os.listdir(os.path.join(current_dir, 'segmentation_datasets/grandchallenge/COVID-19-20_v2/Train'))

In [None]:
len(gc_dir)

In [None]:
voxels= nib.load(os.path.join(current_dir, 'segmentation_datasets/grandchallenge/COVID-19-20_v2/Train/volume-covid19-A-0599_ct.nii.gz')).get_fdata()
voxel= voxels
print(voxels.shape)
sl = 12
vox_slice= voxels[:,:, sl]
plt.imshow(vox_slice, cmap='gray')

In [None]:
voxels= nib.load('./segmentation_datasets/grandchallenge/COVID-19-20_v2/Train/volume-covid19-A-0599_seg.nii.gz').get_fdata()
print(voxels.shape)
sl = 12
vox_slice= voxels[:,:, sl]
plt.imshow(vox_slice, cmap='gray')

## 1.  Create parent and children directories

In [None]:
!find . -name "*.DS_Store" -type f -delete

In [None]:
# Create parent directory

parent_dir = os.path.join(os.getcwd(), 'merged_dataset')
try:
    os.makedirs(parent_dir, exist_ok=False)
    print('Directory created successfully')
except OSError as error:
    print('Directory exist')

In [None]:
# Create children directories

child_dirs = ['CT', 'GT']

for child in child_dirs:
    try:
        os.makedirs(os.path.join(parent_dir, child), exist_ok=False)
        print('Directory created successfully')
    except OSError as error:
        print('Directory can not be created')
    

In [None]:
def move_file(src_dir, dest_dir):
    return shutil.move(src_dir, dest_dir)

<h3> 2.1 Move Grand challenge volumens onto the parent diretory</h3>

In [None]:
grand_challenge = sorted(gc_dir)

In [None]:
len(grand_challenge)

In [None]:
gc_src = os.path.join(os.getcwd(), 'segmentation_datasets/grandchallenge/COVID-19-20_v2/Train')

In [None]:
for i, vol in enumerate(grand_challenge):
    source = os.path.join(gc_src, vol)
    if '_ct' in vol:
        dest =  os.path.join('./merged_dataset/CT')
    elif '_seg' in vol:
        dest = os.path.join('./merged_dataset/GT')
    else:
        continue
    move_file(source, dest)

In [None]:
gt= os.listdir(os.path.join(current_dir, 'merged_dataset/GT'))
ct= os.listdir(os.path.join(current_dir, 'merged_dataset/CT'))
print(f'Ground truth volumes: {len(gt)}')
print(f'3D Image volumes: {len(ct)}')

<h3> 2.2 Move Grand challenge volumens onto the parent diretory</h3>

In [None]:
zenodo_dir = os.path.join(os.getcwd(),'segmentation_datasets/zenodo_ma_jun_segmentation')
zenodo_list = sorted(os.listdir(zenodo_dir))

In [None]:
print('List of Files')
print('Zenodo dir', zenodo_dir, 'Zenodo list', zenodo_list, sep='\n')

In [None]:
zen_ct = os.path.join(zenodo_dir, 'COVID-19-CT-Seg_20cases')
zen_gt = os.path.join(zenodo_dir, 'Infection_Mask')
zen_ct_list = sorted(os.listdir(zen_ct))
zen_gt_list = sorted(os.listdir(zen_gt)) 

In [None]:
zen_gt_list.remove('.DS_Store')

In [None]:
for vol in enumerate(zen_ct_list):
    source = os.path.join(zen_ct, vol)
    dest =  os.path.join('./merged_dataset/CT')
    move_file(source, dest) 

In [None]:
for i, vol in enumerate(zen_gt_list):
    source = os.path.join(zen_gt, vol)
    dest =  os.path.join('./merged_dataset/GT')
    move_file(source, dest) 

<h3> 2.1 Move Medical Segmentatation(MedSeg) volumens onto the parent diretory</h3>

In [None]:
dest_msk = os.path.join(os.path.join(current_dir, 'merged_dataset/GT'))
dest_img = os.path.join(os.path.join(current_dir, 'merged_dataset/CT'))
simr_mask = './segmentation_datasets/SIRM/tr_mask.nii.gz'
simr_img = './segmentation_datasets/SIRM/tr_im.nii.gz'
move_file(simr_mask, dest_msk)
move_file(simr_img, dest_img)


In [None]:
!find . -name "*.DS_Store" -type f -delete

In [None]:
vol_path = os.path.join(os.getcwd(),'merged_dataset')

In [None]:
CT_list= list(sorted(os.listdir(os.path.join(vol_path,'CT'))))
GT_list= list(sorted(os.listdir(os.path.join(vol_path,'GT'))))

In [None]:
# 199 Grand challenge + 20 Zenodo + 1 SIRM
print('CT volumes:{}, Mask volumes:{}'.format(len(CT_list), len(GT_list)))
      

<h2> 3. Visualizing datasets in axes z, x and y </h2>

In [None]:
ct_dir= os.path.join(vol_path,'CT')
gt_dir= os.path.join(vol_path,'GT')

In [None]:
# 3 nii files from the two datasets collection
ct_vols = ['coronacases_005.nii.gz','radiopaedia_14_85914_0.nii.gz','volume-covid19-A-0657_ct.nii.gz']

In [None]:
!find . -name "*.DS_Store" -type f -delete

In [None]:
gt_vols =['coronacases_005.nii.gz','radiopaedia_14_85914_0.nii.gz','volume-covid19-A-0657_seg.nii.gz']

In [None]:
def volume_vis(volumes, im_dir):
    
    """
    Helps to visualize the orientation of the volumes
    inputs: a list of volumes from the different datasets
    and directory of the volumes
    outpust: 2D image (middle slice of the  volumes)
    
    """
    
    size = (512, 512)
    mid_vox = list()
    shapes= list()
    
    for v in volumes:
        vox= nib.load(os.path.join(im_dir, v)).get_fdata()
        slc = int(vox.shape[-1]/2)
        sliced= vox[:,:, slc]
        if sliced.shape != size:
            sliced = cv2.resize(sliced, size, interpolation=cv2.INTER_NEAREST)
        mid_vox.append(sliced)
        shapes.append(sliced.shape)
    #Visualize slice
    fig, ax = plt.subplots(1, len(mid_vox), sharex=False, sharey=False)
    ax = ax.flatten()
    for ii, sli in enumerate(mid_vox):
        ax[ii].imshow(sli, cmap=plt.cm.gray)
    plt.subplots_adjust(hspace=0, wspace=1) 
    
    return shapes
    

In [None]:
volume_vis(gt_vols, gt_dir)

In [None]:
volume_vis(ct_vols, ct_dir)

In [None]:
def axesvolume_vis(volumes, im_dir, vol_slice, sl_axis=None):
    
    """
    Helps to the volumes in x and y axes
    input: volumes, ct directory, slice axes
    outpust: 2D image 
    
    """
    assert  sl_axis in ['x', 'y', 'z']
    size = (512, 512)
    mid_vox = list()
    shapes= list()
    
    for v in volumes:
        vox = nib.load(os.path.join(im_dir, v)).get_fdata()

        if sl_axis == 'x':
            sliced= vox[vol_slice, : ,:]
        elif sl_axis == 'y':
            sliced= vox[:,vol_slice,:]
        else:
            sliced= vox[:,:, vol_slice]
        
        if sliced.shape != size:
            sliced = cv2.resize(sliced, size, interpolation=cv2.INTER_NEAREST)
        
        mid_vox.append(sliced)
        shapes.append(sliced.shape)

    #Visualize slice

    fig, ax = plt.subplots(1, len(mid_vox), sharex=False, sharey= False)
    ax = ax.flatten()
    for ii, sli in enumerate(mid_vox):
        ax[ii].imshow(sli, cmap=plt.cm.gray)
    plt.subplots_adjust(hspace=0, wspace=1)   
    
    return shapes

In [None]:
# Examples of the slices
axesvolume_vis(ct_vols, ct_dir, 200, 'x')

In [None]:
axesvolume_vis(ct_vols, ct_dir, 300, 'y')

## 4. Slicing the dataset

In [None]:
# Create parent directory and children directory for the slice dataset
child_dirs = ['CT', 'GT']
slice_dir = os.path.join(os.getcwd(), 'merged_dataset/merged_dataset')

for child in child_dirs:
    child_dir = os.path.join(slice_dir, child)
    try:
        os.makedirs(child_dir, exist_ok=False)
        print('Directory created successfully')
    except OSError as error:
        print('Directory exist')

In [None]:
input_path = vol_path
output_path = slice_dir

In [None]:
nii2png(input_path, output_path,'z')   

## 5. Moving Morozov CT slices onto the sliced directory

The Morozov dataset images were sourced from kaggle. The original link to the dataset is
https://mosmed.ai/datasets/covid19_1110/ is no longer available.

In [None]:
mo_dir = os.path.join(os.getcwd(), 'segmentation_datasets/maede_maftouni-segmentation')
mo_ct = natsorted(os.listdir(os.path.join(mo_dir, 'frames')))
mo_gt = natsorted(os.listdir(os.path.join(mo_dir, 'masks')))

In [None]:
mo_gt = [g for g in mo_gt if 'Morozov' in g]
mo_ct = [c for c in mo_ct if 'Morozov' in c]

In [None]:
# Check for the same number of images and masks
print(len(mo_gt), len(mo_ct))
mo_gt == mo_ct

In [None]:
for g in mo_gt:
    # Images and gt save with the same fname
    sr_img = os.path.join(mo_dir, 'frames', g)
    sr_msk = os.path.join(mo_dir, 'masks', g)
    dest_img =  os.path.join(output_path, 'CT')
    dest_msk =  os.path.join(output_path, 'GT')
    # Move files
    move_file(sr_img, dest_img) 
    move_file(sr_msk, dest_msk) 

## 6. Clean dataset

Remove images and their mask from the dataset if the infection is than 110 pixels.

In [None]:
msks = natsorted(os.listdir(os.path.join(slice_dir,'GT')))

In [None]:
imgs = natsorted(os.listdir(os.path.join(slice_dir,'CT')))

In [None]:
c=0
for i in range(len(imgs)):
    mask = np.array(Image.open(os.path.join(output_path,'GT', msks[i])).convert('L'))
    im = np.array(Image.open(os.path.join(output_path,'CT', imgs[i])).convert('RGB'))
    pathg =os.path.join(output_path,'GT', msks[i])
    pathi =os.path.join(output_path,'CT', imgs[i])
    px = np.where(mask == 255) 
    if len(px[0]) < 110:
        os.remove(pathg) 
        os.remove(pathi) 
        c += 1
print(f'Total removed {c}')

In [None]:
print(f'Total masks in dataset: {len(msks)}')
print(f'Total images in dataset: {len(imgs)}')