In [1]:
import functools
import glob
import nibabel as nib
import numpy as np
import os
import sys

In [4]:
def calculateStats(directory):
    
    voxelSum = 0.0
    voxelSumSq = 0.0
    numVoxels = 0
    
    maxVal = float('-inf')
    maxFile = None
    minVal = float('inf')
    minFile = None
    
    for subdir in os.listdir(os.fsencode(directory)):
        subdirname = os.fsdecode(subdir)
        if not subdirname.startswith("."):
            full_subdir_path = os.path.join(directory, subdirname)
            for file in os.listdir(os.fsencode(full_subdir_path)):
                filename = os.fsdecode(file)
                if filename.endswith(".nii"):
                    if filename.startswith("volume"): 
                        full_file_path = os.path.join(full_subdir_path, filename)
                        img = nib.load(full_file_path).get_data()
                        voxelSum += np.sum(img)
                        voxelSumSq += np.sum(np.square(img))
                        numVoxels += img.shape[0] * img.shape[1] * img.shape[2]
                        ma = np.max(img)
                        if ma > maxVal:
                            maxVal = ma
                            maxFile = full_file_path
                        mi = np.min(img)
                        if mi < minVal:
                            minVal = mi  
                            minFile = full_file_path
    
    mean = voxelSum / numVoxels
    stddev = (voxelSumSq / numVoxels - mean**2)**(0.5)
                    
    return mean, stddev, minVal, maxVal, minFile, maxFile

In [None]:
train_dir = 'data/raw/sag/train'

#Calculate training data mean and stddev
mean, stddev, minVal, maxVal, minFile, maxFile = calculateStats(train_dir)
print(mean, stddev, minVal, maxVal, minFile, maxFile)

In [10]:
#Sample axial image
img = nib.load('data/raw/axial/train/1206816567/volume-1206816567.nii').get_data()
print(img[:10,375:385, 56])
print(img.shape)
print(np.min(img))
print(np.max(img))
print(np.mean(img))
print(np.std(img))
print(np.unravel_index(np.argmin(img), img.shape))

#Sample SAG image
img = nib.load('data/raw/sag/train/1543518185/volume-1543518185.nii').get_data()
print(img[99,340:350,260:270])
print(img.shape)
print(np.min(img))
print(np.max(img))
print(np.mean(img))
print(np.std(img))
print(np.unravel_index(np.argmax(img), img.shape))

img = nib.load('data/raw/sag/train/1323628206/volume-1323628206.nii').get_data()
print(img[32,340:350,260:270])
print(img.shape)
print(np.min(img))
print(np.max(img))
print(np.mean(img))
print(np.std(img))
print(np.unravel_index(np.argmax(img), img.shape))

[[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [13.  9.  9. 22. 31. 26. 18. 34. 11. 16.]
 [14. 18. 12. 15. 18. 23. 24. 22. 21. 14.]
 [28. 28. 16. 11. 19. 16. 32. 12. 21. 21.]
 [40. 31. 16. 14. 16. 14. 15. 13. 18. 23.]
 [22. 24. 20. 14. 17. 16. 17. 13. 16. 14.]
 [19. 19. 31. 18. 14. 17. 22. 21. 22. 11.]
 [28. 18. 27. 25. 20. 17. 33. 19. 21. 10.]
 [ 9. 12.  9. 18. 29. 15. 35. 21. 14. 10.]
 [27. 33. 26. 28. 21. 19. 22. 14. 14. 10.]]
(512, 512, 160)
0.0
447.0
29.106436
30.600554
(0, 0, 0)
[[  627.   827.   880.   518.   224.   441.   266.   327.   822.  1266.]
 [  794.   369.   284.   479.   368.   527.  1023.  1013.   805.   635.]
 [  786.   484.   801.  1266.  1803.  2070.  1983.  1281.   951.  1678.]
 [ 2303.  2519.  3516.  3208.  3942.  3328.  4596.  4638.  4281.  2245.]
 [ 6679.  6387.  8002.  7442.  8956.  7934.  6654.  5068. 16136. 10461.]
 [ 1383.  2965.  1697.  3525.  3796. 10806. 17958. 22591. 11114. 18744.]
 [ 4768.  5208.  7682.  6986. 18056. 13778. 20852. 16528. 17547.  6923.]
 [

In [5]:
def rebuildNii(directory, folder_name, mean, stddev):
    img = None
    final_seg = None
    segs = []

    for file in os.listdir(os.fsencode(directory)):
        filename = os.fsdecode(file)
        if filename.endswith(".nii"):
            if filename.startswith("volume"): 
                img = nib.load(os.path.join(directory, filename)).get_data()
                
            elif filename.startswith("SEG"): 
                seg = nib.load(os.path.join(directory, filename)).get_data()
                seg = seg[:,:,:,0]                
                segs.append(seg)
                
    if(len(segs) == 0):
        final_seg = np.zeros(img.shape)
    elif(len(segs) == 1):
        final_seg = segs[0]
    else:
        final_seg = functools.reduce(lambda a, b: np.bitwise_or(a, b), segs)
            
    D, H, W = img.shape
    
    #hack to move depth to 1st dim
    if D == H:
        img = img.transpose(2, 0, 1)
        final_seg = final_seg.transpose(2, 0, 1)
        D, W = W, D
        
    #normalize image
    img = (img - mean) / stddev
        
    final_img = nib.Nifti1Image(img, affine=np.eye(4))
    final_seg_img = nib.Nifti1Image(final_seg, affine=np.eye(4))

    os.makedirs(folder_name)
    nib.save(final_seg_img, os.path.join(folder_name, "seg.nii"))
    nib.save(final_img, os.path.join(folder_name, "img.nii"))
    

In [6]:
directory = 'data/raw/sag'

#Calculate training data mean and stddev
mean, stddev, minVal, maxVal, minFile, maxFile = calculateStats(directory + '/train')
print(mean, stddev, minVal, maxVal, minFile, maxFile)

#Calculated values
DATASET_GLOBAL_MEAN = 321.56370587244527
DATASET_GLOBAL_STDDEV = 517.4083720223107

DATASET_SAG_MEAN = 319.38926782103283
DATASET_SAG_STDDEV = 447.42789129337154

#Preprocess all data
for subdir in os.listdir(os.fsencode(directory)):
    subdirname = os.fsdecode(subdir)
    if not subdirname.startswith("."):
        path1 = os.path.join(directory, subdirname)
        for subsubdir in os.listdir(os.fsencode(path1)):
            subsubdirname = os.fsdecode(subsubdir)
            if not subsubdirname.startswith("."):
                path2 = os.path.join(path1, subsubdirname)
                newPath = path2.replace("raw", "preprocessed")
                rebuildNii(path2, newPath, mean, stddev)

319.38926782103283 447.42789129337154 0.0 22591.0 data/raw/sag/train/8669051505/volume-8669051505.nii data/raw/sag/train/1543518185/volume-1543518185.nii


In [6]:
nib.load('data/raw/val/1078523304/volume-1078523304.nii').get_data().shape

(552, 512, 111)

In [3]:
#Sample preprocessed SAG image
img = nib.load('data/preprocessed/sag/train/1543518185/img.nii').get_data()
print(img[99,340:350,260:270])
print(img.shape)
print(np.min(img))
print(np.max(img))
print(np.mean(img))
print(np.std(img))
print(np.unravel_index(np.argmax(img), img.shape))

img = nib.load('data/preprocessed/sag/train/1323628206/img.nii').get_data()
print(img[32,340:350,260:270])
print(img.shape)
print(np.min(img))
print(np.max(img))
print(np.mean(img))
print(np.std(img))
print(np.unravel_index(np.argmax(img), img.shape))

[[ 6.87509060e-01  1.13450849e+00  1.25296330e+00  4.43894356e-01
  -2.13194758e-01  2.71799594e-01 -1.19324885e-01  1.70099325e-02
   1.12333345e+00  2.11567211e+00]
 [ 1.06075358e+00  1.10879809e-01 -7.90949389e-02  3.56729478e-01
   1.08644813e-01  4.64009345e-01  1.57256782e+00  1.55021787e+00
   1.08533847e+00  7.05389023e-01]
 [ 1.04287362e+00  3.67904454e-01  1.07639849e+00  2.11567211e+00
   3.31586552e+00  3.91260982e+00  3.71816492e+00  2.14919710e+00
   1.41164804e+00  3.03649092e+00]
 [ 4.43336391e+00  4.91612387e+00  7.14441586e+00  6.45603657e+00
   8.09652424e+00  6.72423649e+00  9.55821228e+00  9.65208244e+00
   8.85418892e+00  4.30373430e+00]
 [ 1.42137117e+01  1.35610924e+01  1.71706123e+01  1.59190140e+01
   1.93027992e+01  1.70186329e+01  1.41578360e+01  1.06131315e+01
   3.53500786e+01  2.26664696e+01]
 [ 2.37716675e+00  5.91293240e+00  3.07895589e+00  7.16453075e+00
   7.77021503e+00  2.34375439e+01  3.94222450e+01  4.97769852e+01
   2.41259212e+01  4.11789513e+01

In [13]:
import nibabel as nib
import os

train_dir = 'data/preprocessed/sag/train'
val_dir = 'data/preprocessed/sag/val'
test_dir = 'data/preprocessed/sag/test'
for subdirname in os.listdir(os.fsencode(val_dir)):
    subdir = os.path.join(val_dir, os.fsdecode(subdirname))
    for filename in os.listdir(os.fsencode(subdir)):
        f = os.fsdecode(filename)
        if f.endswith(".nii"):
            if f.startswith("img"): 
                img = nib.load(os.path.join(subdir, f)).get_data()
                print(img.shape)

(158, 512, 512)
(152, 512, 512)
(160, 512, 512)
(112, 512, 512)
(64, 512, 512)
(172, 512, 512)
(152, 512, 512)
(152, 512, 512)
