In [54]:
import numpy as np
import pandas as pd
import sys, os
import glob
from pprint import pprint
import csv

In [55]:
import nibabel as nib
from nilearn import image, plotting

In [56]:
from pathlib import Path

In [27]:
NB_DIR = %pwd
NB_DIR = Path(NB_DIR)
subtype = 'LGG' #change this if error 
image = 't2'

In [28]:
DATA = Path('/data-nas/brains/MICCAI_BraTS2018/interim/'+subtype)

In [29]:
studies_path = sorted(glob.glob(str(DATA)+'/*'))#/home/sathiesh/BRAT18/HGG/Brats18_2013_10_1

In [30]:
len(studies_path)

75

In [31]:
names_studies = [s.split("/")[-1] for s in studies_path] #Brats18_2013_10_1

In [32]:
df_train_image_paths = pd.read_csv('/data-nas/brains/MICCAI_BraTS2018/interim/csv/val_survival_evaluation.csv')
df_train_image_paths.head()

Unnamed: 0,BraTS18ID,Age,ResectionStatus
0,Brats18_MDA_1012_1,49.0,GTR
1,Brats18_MDA_1015_1,58.0,GTR
2,Brats18_MDA_1081_1,27.0,GTR
3,Brats18_MDA_907_1,64.0,GTR
4,Brats18_MDA_922_1,57.0,GTR


In [33]:
def rename_files(fns, new_name): 
    for index, file_path in enumerate(fns): 
        new_file = os.path.join(studies_path[index], new_name)
        os.rename(file_path, new_file)

In [34]:
#Rename data
fns = glob.glob(str(DATA) + f'/*/*{image}.nii.gz')
rename_files(fns, f'orig{image}.nii.gz')

In [35]:
#Rename labels
fns = glob.glob(str(DATA) + '/*/*seg.nii.gz')
rename_files(fns, "brain.nii.gz")

In [36]:
brains_fns = glob.glob(str(DATA) + '/*/brain.nii.gz') #finner alle pathene som matcher stringen
brains_fns[:2]

['/data-nas/brains/MICCAI_BraTS2018/interim/LGG/Brats18_2013_0_1/brain.nii.gz',
 '/data-nas/brains/MICCAI_BraTS2018/interim/LGG/Brats18_2013_15_1/brain.nii.gz']

In [37]:
len(brains_fns)

75

In [38]:
brains = ["_".join(fn.split('/')[-2:]) for fn in brains_fns] #concat strings 
brains[:5] # 'CMU_b_0050648_brain.nii.gz'

['Brats18_2013_0_1_brain.nii.gz',
 'Brats18_2013_15_1_brain.nii.gz',
 'Brats18_2013_16_1_brain.nii.gz',
 'Brats18_2013_1_1_brain.nii.gz',
 'Brats18_2013_24_1_brain.nii.gz']

In [39]:
from utils import write_nifti, load_nifti

In [40]:
def binarize_mask(imgfn, store=True, verbose=False): #verbose om ting skal logges eller ikke
    """
    Input: a nifti image
    Output: a binary nifti image. Stored at the same location if store=True.
    """
    infn = imgfn.split("/")[-1]
    outdir = "/".join(imgfn.split("/")[:-1])
    
    outfn = outdir + '/binary_' + infn
    
    img, rnx, rny, rnz, data, affine = load_nifti(imgfn)
    
    if verbose: print(f'Number of unique regions in input img: {len(np.unique(data))}')
    
    new_data = np.where(data>0, 1, 0).astype('uint8')
    
    if verbose: print(f'Number of unique regions in output img: {len(np.unique(new_data))}')
    
    write_nifti(new_data, affine, outfn)
    
    if verbose: print(f'Result saved in {outfn}')
    
    return new_data

In [41]:
from concurrent.futures import ThreadPoolExecutor

In [42]:
with ThreadPoolExecutor(14) as e: 
    e.map(binarize_mask, brains_fns)  #TODO hvorfor satt til 14

  ext_def = np.fromstring(ext_def, dtype=np.int32)


# Normalize images

In [43]:
def standardize_img(in_img):
    '''
    Input: in_img  - numpy ndarray 
    Output: out_img  - numpy ndarray with zero mean and unit variance
    '''
    out_img = (in_img - np.mean(in_img)) / np.std(in_img)
    return out_img

In [44]:
images = glob.glob(str(DATA) + f'/*/*{image}.nii.gz')

for img in images:
    img_dir = "/".join(img.split("/")[:-1])
    img_fn = ".".join(img.split("/")[-1].split(".")[:-2])
        
    out_fn = img_fn + "_std.nii.gz"

    save_fn = img_dir + "/" + out_fn
    
    if Path(save_fn).is_file():
        print(f"{out_fn} and the rest already processed?")
        break
    
    else:
        rfunc, rnx, rny, rnz, data, affine = load_nifti(img)

        out_data = standardize_img(data)

        write_nifti(out_data, affine, save_fn)

  ext_def = np.fromstring(ext_def, dtype=np.int32)


In [45]:
len(images)

75

# Create CSV 

In [46]:
CSV_PATH = NB_DIR/'../../csv/brats/interim'
CSV_PATH.mkdir(exist_ok=True)

In [47]:
masks = ['binary_brain.nii.gz']

In [48]:
def create_niftynet_csv(dataset, masks=masks, outputdir=CSV_PATH):
    """
    Input: 
        dataset: path to a folder containing studies with labelled data
        masks: a list of masks to use
    Output:
        - One CSV containing "ID, original image" for all studies
        - One CSV containing "ID, brain image" for all studies
        - Multiple CSVs, one for each label type (aseg, aparc+aseg, ribbon, etc)
    """
    
    # Create label CSVs
    
    for m in masks:
        masks_studies =[f"{f}/{m}" for f in dataset]

        labels = list(zip(names_studies, masks_studies))
     
        labelsfn = f'{outputdir}/{m.split(".")[0]}_labels_{subtype}.csv'
    
        with open(labelsfn,'w') as out:
            csv_out=csv.writer(out)
            #csv_out.writerow(['ID','location'])
            for row in labels:
                csv_out.writerow(row)
            print(f'{labelsfn} created')
            
    # Create data CSVs
    
    for fn in ['brain.nii.gz', f'orig{image}_std.nii.gz']:
        images_studies = [f"{f}/{fn}" for f in dataset]
        data = list(zip(names_studies, images_studies))
        
        datafn = f'{outputdir}/{fn.split(".")[0]}_{subtype}.csv'
        
        with open(datafn,'w') as out:
            csv_out=csv.writer(out)
            #csv_out.writerow(['ID','location'])
            for row in data:
                csv_out.writerow(row)
            print(f'{datafn} created')

In [49]:
create_niftynet_csv(studies_path)

/home/sathiesh/niftynet_brain/nbs/brats/../../csv/brats/interim/binary_brain_labels_LGG.csv created
/home/sathiesh/niftynet_brain/nbs/brats/../../csv/brats/interim/brain_LGG.csv created
/home/sathiesh/niftynet_brain/nbs/brats/../../csv/brats/interim/origt2_std_LGG.csv created


# Concatenate HGG.csv and LGG.csv

In [50]:
#check if interim file contains hgg and lgg files

In [51]:
def concate_df(subtypes, new_filename): 
    df_arr = []
    for filename in subtypes: 
        df_arr.append(pd.read_csv(f'{CSV_PATH}/{filename}', header=None))
    
    df = pd.concat(df_arr, ignore_index=True)
    df.to_csv(f'{CSV_PATH}/../{new_filename}', index=False, header=None)

In [52]:
subtypes = [f'orig{image}_std_HGG.csv',f'orig{image}_std_LGG.csv']
new_filename = f'orig_{image}.csv'
concate_df(subtypes, new_filename)

In [59]:
subtypes = ['brain_HGG.csv','brain_LGG.csv']
new_filename = 'labels.csv'
concate_df(subtypes, new_filename)