# **MR PREPROCESSING PIPELINE**

In [2]:
import os
import json
import ants
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt

from pprint import pprint
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor


from preprocessing.utils import *
from preprocessing.functions import *
from preprocessing.pipeline import preprocess_brain_images
from preprocessing.cropping import *
from preprocessing.intensity_matching import *
from intensity_normalization.plot.histogram import HistogramPlotter

2023-08-15 19:27:18.141721: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# !nvidia-smi
# !nvcc --version

In [4]:
# !pip install antspynet
# !pip install intensity-normalization

## **0. TRANSFORM DICOM to NIFTI**
Using dcm2niix library (not included in the Notebook).

---------------------

In [5]:
# DATA_DIR = '/mnt/c/Users/sergio/Desktop/Sergio/pydicom2niix/data'

In [6]:
GM_ROOT = '/mnt/c/Users/sergio/Desktop/TFG/GM'
UCSF_ROOT = '/mnt/c/Users/sergio/Desktop/TFG/TCIA'

In [7]:
GM_DIR_SpN = f'{GM_ROOT}/GM-SpN'  # Spatial Normalization
GM_DIR_SkS = f'{GM_ROOT}/GM-SkS'  # Skull Stripped
GM_DIR_BFC_DN = f'{GM_ROOT}/GM-BFC+DN'  # Bias Field Correction + Denoise
GM_DIR_HM = f'{GM_ROOT}/GM-HM'  # Histogram matched
GM_DIR_HM_NORM = f'{GM_ROOT}/GM-HM+NORM'  # Histogram matched + Normalized
GM_DIR_NORM = f'{GM_ROOT}/GM-NORM'  # Normalized

In [8]:
GM_BRATS = f'{GM_ROOT}/GM-BRATS'  # BraTS Pre-Processing Pipeline
GM_BRATS_NORM = f'{GM_ROOT}/GM-BRATS+NORM'  # BraTS Pre-Processing Pipeline + Normalization

In [9]:
UCSF_BASE = f'{UCSF_ROOT}/UCSF-CLEANED'
UCSF_NORM = f'{UCSF_ROOT}/UCSF-NORM'

In [10]:
def load_ucsf_mask(root, patient_no, mask_type='tumor'):
    path = f'{root}/derivatives/sub-{patient_no:04}'
    if mask_type == 'brain':
        path += f'/sub-{patient_no:04}_desc-brain_mask.nii.gz'
    elif mask_type == 'tumor':
        path += f'/sub-{patient_no:04}_desc-tumor_dseg.nii.gz'

    return ants.image_read(path)

## **DATA STRUCTURE**
1. **UCSF Data:** Public dataset from TCIA [The Cancer Imaging Archive](https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=119705830)
    - 501 patients
    - T1w, T1w + contrast, T2w, FLAIR, DWI, SWI
2. **GM Data:** Private dataset from Hospital Gregorio Marañón of Madrid.
    - 41 patients
    - T1w, T1w + contrast, T2w, FLAIR

In [11]:
# gen_metadata(GM_DIR_SpN, f'{GM_ROOT}/GM_spatial_norm.json')

In [12]:
# gen_metadata(GM_BRATS, f'{GM_ROOT}/GM_after_BraTS.json')

In [13]:
# gen_metadata(GM_BRATS_NORM, f'{GM_ROOT}/GM_BraTS_with_normalization.json')

In [14]:
# gen_metadata(GM_DIR_HM, f'{GM_ROOT}/GM_histogram_match.json')

In [15]:
# gen_metadata(GM_DIR_HM_NORM, f'{GM_ROOT}/GM_histogram_match_normalization.json')

In [16]:
# gen_metadata(UCSF_NORM, f'{UCSF_ROOT}/UCSF_normalized.json')

In [17]:
def show_series_info(scanner, json_path, property='Max', verbose=False):
    json_ = load_json(json_path)
    mins, means, vars_, maxs, sums = [], [], [], [], []
    
    for val in json_[scanner].values():
        means.append(val['Mean'])
        vars_.append(val['Var'])
        mins.append(val['Min'])
        maxs.append(val['Max'])
        sums.append(val['Sum'])
    
    if verbose:
        print(f'Mean MEAN: {np.mean(means):.2f}')
        print(f'Mean VAR: {np.mean(vars_):.2f}')
        print(f'Min MIN: {np.min(mins):.2f}')
        print(f'Max MIN: {np.max(mins):.2f}')
        print(f'Min MAX: {np.min(maxs):.2f}')
        print(f'Mean MAX: {np.mean(maxs):.2f}')
        print(f'Max MAX: {np.max(maxs):.2f}')
        print(f'Min SUM: {np.min(sums):.2f}')
        print(f'Mean SUM: {np.mean(sums):.2f}')
        print(f'Max SUM: {np.max(sums):.2f}')
        return
    
    return [np.min(mins), np.min(maxs), np.mean(maxs), np.max(maxs)]  # np.max(mins)

## **1. SPATIAL NORMALIZATION**
1. **Resample the image:** Reshape the image to the same dimensions as the TCIA database.
 
   1.1 **Interpolation method &rarr; Windowed Sinc Interpolation:** It is more appropriate as it tends to retain sharp edges better than other methods like B-Spline or Gaussian, which could be important in capturing the details of the tumor regions. This might be especially relevant in this case, as IDH status and 1p/19q codeletion are known to be associated with certain morphological characteristics on MRI.
2. **Set spacing:** Resample spacing to 1 mm isotropic resolution.
3. **Reorient:** Reorient the image to RAI (Right, Anterior, Inferior)
4. **Set origin:** Set the origin of the image to the same as the TCIA database.
5. **Set direction:** Set the direction of the image to the same as the TCIA database

### UCSF DATA (Reference)
- **Orientation:** RAI
- **Dimensions:** (240, 240, 155)
- **Spacing:** (1.0, 1.0, 1.0)
- **Origin:** (-0.0, -239.0, 0.0)
- **Direction:** [1. 0. 0. 0. 1. 0. 0. 0. 1.]

In [18]:
def add_padding_axis(image, target_size):
    for axis in range(image.dimensions):  # Iterate over the three axes (0, 1, 2)
        if image.shape[axis] < target_size[axis]:
            image = add_padding(image, target_size[axis], axis=axis)
    return image

In [19]:
def spatial_normalize_dir(root_directory, target_size, target_spacing, target_origin, target_direction, interp_type=3, verbose=False):
    for dirpath, dirs, files in os.walk(root_directory):  # Iterate over root_directory
        for filename in files:  # For each file
            if not filename.endswith('.nii.gz') and not filename.endswith('.nii'):  # Check if file is a NIFTI file
                continue

            if verbose:
                print(f'Performing spatial normalization: {filename}')
            
            file_path = os.path.join(dirpath, filename)
            image = ants.image_read(file_path)  # Read NIFTI file
            image = add_padding_axis(image, target_size)  # Just for GM T2w
            resampled_img = spatial_normalization(image, target_size, target_spacing, target_origin, target_direction, interp_type)
            ants.image_write(resampled_img, file_path)  # Save the resampled image

In [20]:
target_size = (240, 240, 155)
target_spacing = (1.0, 1.0, 1.0)
target_origin = (-0.0, -239.0, 0.0)
target_direction = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]

In [21]:
# spatial_normalize_dir(
#     GM_DIR_SpN,
#     target_size=target_size,
#     target_spacing=target_spacing,
#     target_origin=target_origin,
#     target_direction=target_direction,
#     verbose=True
# )

In [22]:
GM_DIR_PROCESSED = f'{GM_ROOT}/GM-PROCESSED'  # Normalized

In [23]:
def get_scans_by_patient(root_directory):
    results = {}
    for dirpath, dirs, files in os.walk(root_directory):  # Iterate over root_directory
        if 'anat' in dirs or 'derivatives' in dirpath:
            continue
        for dir in dirs:
            if dir == 'derivatives': continue
            results[dir] = get_nifti_paths(os.path.join(dirpath, dir))
    return results

In [24]:
def preprocess_brain_images_dir(
    root_directory,
    truncate_intensity=(0.01, 0.99),
    template_transform_type='Affine',
    do_brain_extraction=True,
    do_bias_correction=True,
    do_denoising=True,
    intensity_matching_type=None,
    reference_image=None,
    intensity_normalization_type=None,
    antsxnet_cache_directory=None,
    segment=True,
    verbose=1,
    **kwargs
):
    nifti_paths = get_scans_by_patient(root_directory)

    for patient, scanner_paths in nifti_paths.items():
        paths = [path[0] for path in scanner_paths.values()]
        image_t1w, image_t1w_ce, image_t2w, image_flair = list(map(ants.image_read, paths))
        template = image_t1w_ce
        
        results = preprocess_brain_images(
            image_t1w=image_t1w,
            image_t1w_ce=image_t1w_ce,
            image_t2w=image_t2w,
            image_flair=image_flair,
            reference_modality='T1w_ce',
            truncate_intensity=truncate_intensity,
            do_brain_extraction=do_brain_extraction,
            template_transform_type=template_transform_type, # Rigid / Affine / SyN
            template=template, # tocheck
            do_bias_correction=do_bias_correction,
            return_bias_field=False,
            do_denoising=do_denoising,
            intensity_matching_type=intensity_matching_type,
            reference_image=reference_image,
            intensity_normalization_type=intensity_normalization_type,
            antsxnet_cache_directory=antsxnet_cache_directory,
            verbose=verbose,
            **kwargs
        )
        if verbose:
            print(f'\nSaving {patient} MR images...\n')
    
        for modality in scanner_paths.keys():
            derivatives_dir = f'{root_directory}/derivatives/{patient}'
            Path(derivatives_dir).mkdir(parents=True, exist_ok=True)
            preprocessed_image = results[modality]['preprocessed_image']
            brain_mask = results[modality]['brain_mask']
            if segment:
                preprocessed_image = segment_image(preprocessed_image, brain_mask)
            ants.image_write(preprocessed_image, scanner_paths[modality][0])
            ants.image_write(brain_mask, f'{derivatives_dir}/{patient}_desc-{modality}_brain_mask.nii.gz')

In [25]:
# preprocess_brain_images_dir(
#     GM_DIR_PROCESSED,
#     truncate_intensity=(0.01, 0.99),
#     template_transform_type='Affine',  # SyN
#     do_brain_extraction=True,
#     do_bias_correction=True,
#     do_denoising=True,
#     segment=True
# )

## **2. SKULL STRIPPING**

In [26]:
def skull_stripping_dir(root_directory, quantile_values=None, segment=True, verbose=False):
    for dirpath, dirs, files in os.walk(root_directory):
        for filename in files:
            if not filename.endswith('.nii.gz') and not filename.endswith('.nii'):
                continue
            if filename.endswith('T1w.nii.gz'):  # T1w / T1w + contrast
                brain_extraction_modality = 't1'
            elif filename.endswith('T2w.nii.gz'):  # T2w
                brain_extraction_modality = 't2'            
            elif filename.endswith('FLAIR.nii.gz'):  # FLAIR
                brain_extraction_modality = 'flair'
            else:
                raise ValueError(f'Unknown modality: {filename}')
                
            file_path = os.path.join(dirpath, filename)
            mask_path = get_mask_path(root_directory, filename)
            image = ants.image_read(file_path)
            
            if quantile_values is not None:
                image = truncate_intensities(image, quantile_values=quantile_values)

            brain_mask = extract_brain(image, brain_extraction_modality=brain_extraction_modality)
            
            if segment:
                if verbose:
                    print(f'Removing skull from: {filename} ({brain_extraction_modality.upper()})')
                skull_stripped_img = segment_image(image, brain_mask)
                ants.image_write(skull_stripped_img, file_path)

            if verbose:
                print(f'Saving brain mask: {mask_path}')
            
            ants.image_write(brain_mask, mask_path)

In [27]:
# skull_stripping_dir(GM_DIR_SkS, quantile_values=(0.01, 0.99), segment=False, verbose=True)

## **3. BIAS FIELD CORRECTION AND DENOISING**

In [28]:
def bias_correction_and_denoising_dir(root_directory, verbose=False):
    for dirpath, dirs, files in os.walk(root_directory):
        if 'derivatives' in dirpath:
            continue
        for filename in files:
            if not filename.endswith('.nii.gz') and not filename.endswith('.nii'):
                continue
                
            file_path = os.path.join(dirpath, filename)
            mask_path = get_mask_path(root_directory, filename)
            image = ants.image_read(file_path)
            mask = ants.image_read(mask_path)

            if verbose:
                print(f'Performing bias field correction and denoising: {filename} ({mask_path.split("/")[-1]})')
            
            preprocessed_img = bias_field_correction(image, mask=mask)
            preprocessed_img = denoise_image(preprocessed_img, mask=mask)
            preprocessed_img = segment_image(preprocessed_img, mask=mask)
            ants.image_write(preprocessed_img, file_path)  # Save the resampled image

In [29]:
# bias_correction_and_denoising_dir(GM_DIR_BFC_DN, verbose=True)

## **4. INTENSITY MATCHING**

### **4.1. SAVE HISTOGRAMS**

In [30]:
def calculate_histogram(img_path, bins=100):
    img = nib.load(img_path).get_fdata()
    hist, _ = np.histogram(img, bins=bins)
    return hist  # / np.sum(hist)

def save_histogram(img_path):
    hist_path = img_path[:-7] + '.npy'
    if os.path.exists(hist_path):
        return
    try:
        hist = calculate_histogram(img_path)
    except ValueError:
        return
    
    np.save(hist_path, hist)

In [31]:
def save_histograms(root_directory, verbose=False):
    for dirpath, dirs, files in os.walk(root_directory):
        if 'derivatives' in dirpath:
            continue
            
        for filename in files:
            if not filename.endswith('.nii.gz') and not filename.endswith('.nii'):
                continue
                
            path = os.path.join(dirpath, filename)
            
            if verbose:
                print(f'Saving histogram for: {filename}')
                
            save_histogram(path)

In [32]:
# save_histograms(GM_DIR_BFC_DN, verbose=False)

In [33]:
# save_histograms(UCSF_BASE, verbose=False)

### **4.2. HISTOGRAM MATCHING**

In [34]:
def match_histograms(root_directory, ref_directory, num_clusters=3, verbose=False):
    img_paths = get_nifti_paths(root_directory)        
    ref_img_paths = get_nifti_paths(ref_directory)

    # img_histograms = load_histograms(root_directory)
    ref_histograms = load_histograms(ref_directory)
    nifti_paths = get_nifti_paths(ref_directory)
    ref_histograms = {
        modality: [calculate_histogram(nifti) for (nifti) in nifti_paths[modality]]
        for modality in ('T1w', 'T1w_ce', 'T2w', 'FLAIR')
    }
    kmeans = {modality: cluster_histograms(ref_histograms[modality], num_clusters) for modality in ('T1w', 'T1w_ce', 'T2w', 'FLAIR')}
    
    for modality, img_mod_paths in img_paths.items():
        if verbose:
            print(f'Matching {modality} images')
        for img_path in img_mod_paths:
            matched_img = match_histogram(img_path, ref_histograms[modality], kmeans[modality], ref_img_paths[modality], verbose=verbose)
            ants.image_write(matched_img, img_path)

In [35]:
# GM_DIR_HM = f'{GM_ROOT}/GM-BRATS+HM'

# match_histograms(GM_DIR_HM, UCSF_BASE, verbose=True)

## **5. INTENSITY NORMALIZATION**

In [36]:
def normalize_intensities_dir(root_directory, use_masks=True, modality_in_mask_name=False, verbose=False, **kwargs):
    for dirpath, dirs, files in os.walk(root_directory):
        for filename in files:
            if not filename.endswith('.nii.gz') and not filename.endswith('.nii'):
                continue
            
            if filename.endswith('ce-GADOLINIUM_T1w.nii.gz'):
                modality, normalization_type = 'T1w_ce', kwargs.get('T1w_ce')
            elif filename.endswith('T1w.nii.gz'):
                modality, normalization_type = 'T1w', kwargs.get('T1w')
            elif filename.endswith('T2w.nii.gz'):
                modality, normalization_type = 'T2w', kwargs.get('T2w')
            elif filename.endswith('FLAIR.nii.gz'):
                modality, normalization_type = 'FLAIR', kwargs.get('FLAIR')
            else:
                continue

            if verbose:
                print(f'Normalizing intensity for: {filename}')
            
            file_path = os.path.join(dirpath, filename)
            image = ants.image_read(file_path)
            mask = None
            
            if use_masks:
                mask_path = get_mask_path(root_directory, filename, modality_in_name=modality_in_mask_name)
                mask = nib.load(mask_path).get_fdata()

            norm_img = normalize_intensity(image, mask=mask, modality=modality, normalization_type=normalization_type)
            ants.image_write(norm_img, file_path)

In [37]:
GM_BRATS_NORM = f'{GM_ROOT}/GM-BRATS+HM+NORM'  # Bias Field Correction + Denoise

In [38]:
# normalize_intensities_dir(
#     GM_BRATS_NORM,
#     T1w='WhiteStripe',
#     T1w_ce='WhiteStripe',
#     T2w='WhiteStripe',
#     FLAIR='WhiteStripe',
#     verbose=True
# )

In [39]:
# normalize_intensities_dir(
#     UCSF_NORM,
#     T1w='WhiteStripe',
#     T1w_ce='WhiteStripe',
#     T2w='WhiteStripe',
#     FLAIR='WhiteStripe',
#     verbose=False,
#     modality_in_mask_name=False
# )

## **6. TUMOR CROPPING**

In [40]:
GM_CROPPED = f'{GM_ROOT}/GM-BRATS+HM+NORM+CROPPED'

In [41]:
def adjust_interval(start, end, desired_length, lower_limit=0, upper_limit=239):
    # Compute current length of interval
    N = end - start
    
    # Compute half the difference to adjust
    half_difference = (desired_length - N) / 2
    
    # Adjust the start and end
    new_start = start - half_difference
    new_end = end + half_difference
    
    # Adjust for constraints
    if new_start < lower_limit:
        new_end += (lower_limit - new_start)
        new_start = lower_limit
    if new_end > upper_limit:
        new_start -= (new_end - upper_limit)
        new_end = upper_limit
    
    return int(new_start), int(new_end)

def divide_interval(start, end, N, lower_limit=0, upper_limit=154):
    # Create dividers within the interval
    interval_length = (end - start) / N
    # Create dividers using a list comprehension, skip the end point
    dividers = list(sorted(set([int(start + i*interval_length) for i in range(0, N)])))
        
    step = int(np.ceil(interval_length))
    # If there's room to the left, prepend values to the left of the start point
    low, high = start, end
    while len(dividers) < N:
        if not(start < lower_limit):
            low -= step
            dividers.insert(0, low)
            if len(dividers) == N: break
        if not(end > upper_limit - 1):
            high += step
            if high > upper_limit: continue
            dividers.append(high)
    
    assert len(dividers) == N, (len(dividers), (start, end))
    return dividers

def extract_slices_from_bbox(image, bbox, resize_to=None, n_slices=16):
    (xmin, xmax), (ymin, ymax), (zmin, zmax) = bbox
    
    zmin = 0   if zmin < 0   else zmin
    zmax = 154 if zmax > 154 else zmax
    # print(interval_length(bbox))

    if resize_to:
        final_x, final_y = resize_to
        xmin, xmax = adjust_interval(xmin, xmax, final_x)
        ymin, ymax = adjust_interval(ymin, ymax, final_y)
        # print((xmin, xmax), (ymin, ymax), 'adjusted')
    
    z_values = divide_interval(zmin, zmax, n_slices)
    # print(z_values, len(z_values))

    if resize_to:
        # imgs = []
        # for i, n in enumerate(z_values):
        #     print((xmin, xmax), (ymin, ymax), (zmin, zmax), i, n)
        #     imgs.append(image[xmin:xmax, ymin:ymax, n])
        #     print(z_values)
        # return imgs
        # print([((xmin, xmax), (ymin, ymax), i, n) for i, n in enumerate(z_values)])
        return [image[xmin:xmax, ymin:ymax, n] for n in z_values]
    return [image[:, :, n] for n in z_values]


def merge_slices_to_volume(numpy_slices):
    # Stack arrays along the third dimension to create a 3D volume
    volume_array = np.stack(numpy_slices, axis=-1)
    # Convert numpy array back to an antspy image
    merged_volume = ants.from_numpy(volume_array)
    return merged_volume

def crop_tumor(image, segmentation, modality, n_slices=16, n_pixels=16, scale_factor=None, resize_to=None):
    bbox = compute_bounding_cube(segmentation)
    # print(bbox)
    scaled_bbox = scale_bounding_cube(bbox, n_pixels=n_pixels, factor=scale_factor)
    # print(scaled_bbox)
    img_slices = extract_slices_from_bbox(image, scaled_bbox, n_slices=n_slices, resize_to=resize_to)
    # print(img_slices)
    main_images = merge_slices_to_volume(img_slices)
    return main_images

In [42]:
# t = divide_interval(99, 148, 64)
# print(t, len(t), len(set(t)))

In [43]:
# xs, ys, zs = test(paths)

### **GM Dataset**
 * Min: (42, 37, 35)
 * Max: (115, 136, 126)

### **TCIA Dataset**
 * Min: (18, 18, 16)
 * Max: (179, 198, 150)

In [45]:
# print(sorted(xs))
# print(max(xs))
# print(min(xs))

In [46]:
# # print(sorted(ys))
# print(max(ys))
# print(min(ys))

In [47]:
# # print(sorted(zs))
# print(max(zs))
# print(min(zs))

In [None]:
# UCSF_CROPPED = f'{UCSF_ROOT}/UCSF-NORM-LIGHT'
# for dirpath, dirs, files in os.walk(UCSF_CROPPED):
#     for filename in files:
#         if not filename.endswith('.nii.gz') and not filename.endswith('.nii'):
#             continue
        
#         if filename.endswith('ce-GADOLINIUM_T1w.nii.gz'):
#             continue
#         elif filename.endswith('T1w.nii.gz'):
#             file_path = os.path.join(dirpath, filename)
#             # print(file_path)
#             os.remove(file_path)
#         elif filename.endswith('T2w.nii.gz'):
#             file_path = os.path.join(dirpath, filename)
#             # print(file_path)
#             os.remove(file_path)
#         elif filename.endswith('FLAIR.nii.gz'):
#             continue
#         else:
#             continue

In [54]:
GM_CROPPED = f'{GM_ROOT}/GM-BRATS+HM+NORM+CROPPED'
UCSF_CROPPED = f'{UCSF_ROOT}/UCSF-NORM+CROPPED'
UCSF_CROPPED = f'{UCSF_ROOT}/UCSF-NORM-LIGHT-CROPPED'

In [51]:
def crop_tumor_dir(root_directory, resize_to=(128, 128), n_slices=64, verbose=True):
    paths = get_nifti_paths(root_directory, ommit_derivatives=True)

    for modality, file_paths in paths.items():
        for file_path in file_paths:
            patient_no = get_patient_no(file_path)

            # if patient_no <= 12:
            #     continue
            image = ants.image_read(file_path)
            segmentation = load_mask(root_directory, patient_no)
            
            if verbose:
                print(f'Cropping tumor from: {os.path.basename(file_path)}')
            
            cropped_image = crop_tumor(
                image=image,
                segmentation=segmentation,
                modality=modality,
                resize_to=resize_to,
                n_slices=n_slices,
            )
            # print(image.shape, cropped_image.shape)
            assert cropped_image.shape == (*resize_to, n_slices)
            ants.image_write(cropped_image, file_path)

In [52]:
crop_tumor_dir(GM_CROPPED, resize_to=(128, 128), n_slices=64, verbose=False)

In [55]:
crop_tumor_dir(UCSF_CROPPED, resize_to=(128, 128), n_slices=64, verbose=True)

Cropping tumor from: sub-0004_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0005_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0007_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0008_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0009_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0010_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0011_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0012_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0013_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0014_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0015_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0016_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0017_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0018_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0019_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0020_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0021_ce-GADOLINIUM_T1w.nii.gz
Cropping tumor from: sub-0022_ce-GADOLINIUM_T1w.nii.gz
Cropping t

## **7. CONVERT DATA TO .NPY**

In [59]:
# UCSF_NORM_NP = f'{UCSF_ROOT}/UCSF-NORM-NPZ'
GM_NORM_NP = f'{GM_ROOT}/GM-BRATS+HM+NORM-NPZ'

NORM_DIR = GM_NORM_NP

In [60]:
def nifti_dir_to_numpy(root_directory, overwrite=True, ommit_derivatives=False, verbose=True, format_='.npy'):
    paths = []
    for dirpath, dirs, files in os.walk(root_directory):
        if ommit_derivatives and 'derivatives' in dirpath:
            continue
        for filename in files:
            if filename.endswith('.nii.gz') or filename.endswith('.nii'):
                paths.append(os.path.join(dirpath, filename))

    for file_path in paths:
        np_file = file_path.split('.')[0] + format_
        nifti = load_nifti(file_path, read_with='nibabel')

        if verbose: print(f'Saving {np_file}')
        if format_ == '.npy':
            np.save(np_file, nifti)
        elif format_ == '.npz':
            np.savez(np_file, nifti)
        if overwrite: os.remove(file_path)

In [61]:
nifti_dir_to_numpy(NORM_DIR, overwrite=True, verbose=True, format_='.npz')

Saving /mnt/c/Users/sergio/Desktop/TFG/GM/GM-BRATS+HM+NORM-NPZ/derivatives/sub-0000/sub-0000_desc-brain_mask.npz
Saving /mnt/c/Users/sergio/Desktop/TFG/GM/GM-BRATS+HM+NORM-NPZ/derivatives/sub-0000/sub-0000_desc-tumor_dseg.npz
Saving /mnt/c/Users/sergio/Desktop/TFG/GM/GM-BRATS+HM+NORM-NPZ/derivatives/sub-0001/sub-0001_desc-brain_mask.npz
Saving /mnt/c/Users/sergio/Desktop/TFG/GM/GM-BRATS+HM+NORM-NPZ/derivatives/sub-0001/sub-0001_desc-tumor_dseg.npz
Saving /mnt/c/Users/sergio/Desktop/TFG/GM/GM-BRATS+HM+NORM-NPZ/derivatives/sub-0002/sub-0002_desc-brain_mask.npz
Saving /mnt/c/Users/sergio/Desktop/TFG/GM/GM-BRATS+HM+NORM-NPZ/derivatives/sub-0002/sub-0002_desc-tumor_dseg.npz
Saving /mnt/c/Users/sergio/Desktop/TFG/GM/GM-BRATS+HM+NORM-NPZ/derivatives/sub-0003/sub-0003_desc-brain_mask.npz
Saving /mnt/c/Users/sergio/Desktop/TFG/GM/GM-BRATS+HM+NORM-NPZ/derivatives/sub-0003/sub-0003_desc-tumor_dseg.npz
Saving /mnt/c/Users/sergio/Desktop/TFG/GM/GM-BRATS+HM+NORM-NPZ/derivatives/sub-0004/sub-0004_des