In [5]:
import functools
import glob
import nibabel as nib
import numpy as np
import os
import sys

In [15]:
def calculateStats(directory):
    
    voxelSum = 0.0
    voxelSumSq = 0.0
    numVoxels = 0
    
    maxVal = float('-inf')
    maxFile = None
    minVal = float('inf')
    minFile = None
    
    for subdir in os.listdir(os.fsencode(directory)):
        subdirname = os.fsdecode(subdir)
        if not subdirname.startswith("."):
            full_subdir_path = os.path.join(directory, subdirname)
            for file in os.listdir(os.fsencode(full_subdir_path)):
                filename = os.fsdecode(file)
                if filename.endswith(".nii"):
                    if filename.startswith("volume"): 
                        full_file_path = os.path.join(full_subdir_path, filename)
                        img = nib.load(full_file_path).get_data()
                        voxelSum += np.sum(img)
                        voxelSumSq += np.sum(np.square(img))
                        numVoxels += img.shape[0] * img.shape[1] * img.shape[2]
                        ma = np.max(img)
                        if ma > maxVal:
                            maxVal = ma
                            maxFile = full_file_path
                        mi = np.min(img)
                        if mi < minVal:
                            minVal = mi  
                            minFile = full_file_path
    
    mean = voxelSum / numVoxels
    stddev = (voxelSumSq / numVoxels - mean**2)**(0.5)
                    
    return mean, stddev, minVal, maxVal, minFile, maxFile

train_dir = 'data/raw/train'

#Calculate training data mean and stddev
mean, stddev, minVal, maxVal, minFile, maxFile = calculateStats(train_dir)
print(mean, stddev, minVal, maxVal, minFile, maxFile)

321.56370587244527 517.4083720223107 -2.0 22591.0 data/raw/train/4030249091/volume_4030249091.nii data/raw/train/1543518185/volume-1543518185.nii


In [31]:

img = nib.load('data/raw/train/4030249091/volume_4030249091.nii').get_data()
print(img[:10,375:385, 56])
print(img.shape)
print(np.min(img))
print(np.max(img))
print(np.mean(img))
print(np.std(img))
print(np.unravel_index(np.argmin(img), img.shape))

img = nib.load('data/raw/train/1543518185/volume-1543518185.nii').get_data()
print(img[99,340:350,260:270])
print(img.shape)
print(np.min(img))
print(np.max(img))
print(np.mean(img))
print(np.std(img))
print(np.unravel_index(np.argmax(img), img.shape))

[[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0. -2. -1.  1.  0.  0.]
 [ 4.  7. 10. 13. 11. 11. 17. 22. 21. 19.]
 [ 8. 12. 20. 28. 32. 33. 32. 37. 42. 43.]
 [10. 11. 19. 27. 34. 34. 25. 28. 41. 45.]
 [ 6. 11. 18. 21. 24. 27. 17. 19. 41. 55.]
 [ 7. 16. 20. 14. 12. 18. 14. 15. 42. 65.]
 [15. 19. 20. 14. 12. 15. 12. 15. 38. 60.]
 [18. 16. 15. 13. 16. 18. 18. 21. 32. 45.]]
(512, 512, 68)
-2.0
275.0
21.918333
27.773787
(2, 380, 56)
[[  627.   827.   880.   518.   224.   441.   266.   327.   822.  1266.]
 [  794.   369.   284.   479.   368.   527.  1023.  1013.   805.   635.]
 [  786.   484.   801.  1266.  1803.  2070.  1983.  1281.   951.  1678.]
 [ 2303.  2519.  3516.  3208.  3942.  3328.  4596.  4638.  4281.  2245.]
 [ 6679.  6387.  8002.  7442.  8956.  7934.  6654.  5068. 16136. 10461.]
 [ 1383.  2965.  1697.  3525.  3796. 10806. 17958. 22591. 11114. 18744.]
 [ 4768.  5208.  7682.  6986. 18056. 13778. 20852. 16528. 17547.  6923.]

In [24]:
def rebuildNii(directory, folder_name, mean, stddev):
    img = None
    final_seg = None
    segs = []

    for file in os.listdir(os.fsencode(directory)):
        filename = os.fsdecode(file)
        if filename.endswith(".nii"):
            if filename.startswith("volume"): 
                img = nib.load(os.path.join(directory, filename)).get_data()
                
            elif filename.startswith("SEG"): 
                seg = nib.load(os.path.join(directory, filename)).get_data()
                seg = seg[:,:,:,0]                
                segs.append(seg)
                
    if(len(segs) == 0):
        final_seg = np.zeros(img.shape)
    elif(len(segs) == 1):
        final_seg = segs[0]
    else:
        final_seg = functools.reduce(lambda a, b: np.bitwise_or(a, b), segs)
            
    D, H, W = img.shape
    
    #hack to move depth to 1st dim
    if D == H:
        img = img.transpose(2, 0, 1)
        final_seg = final_seg.transpose(2, 0, 1)
        D, W = W, D
        
    #normalize image
    img = (img - mean) / stddev
        
    final_img = nib.Nifti1Image(img, affine=np.eye(4))
    final_seg_img = nib.Nifti1Image(final_seg, affine=np.eye(4))

    os.makedirs(folder_name)
    nib.save(final_seg_img, os.path.join(folder_name, "seg.nii"))
    nib.save(final_img, os.path.join(folder_name, "img.nii"))
    

In [25]:
train_dir = 'data/raw/train'

#Calculate training data mean and stddev
mean, stddev, minVal, maxVal, minFile, maxFile = calculateStats(train_dir)
print(mean, stddev, minVal, maxVal, minFile, maxFile)

#Preprocess all data
directory = 'data/raw'
for subdir in os.listdir(os.fsencode(directory)):
    subdirname = os.fsdecode(subdir)
    if not subdirname.startswith("."):
        path1 = os.path.join(directory, subdirname)
        for subsubdir in os.listdir(os.fsencode(path1)):
            subsubdirname = os.fsdecode(subsubdir)
            if not subsubdirname.startswith("."):
                path2 = os.path.join(path1, subsubdirname)
                newPath = path2.replace("raw", "preprocessed")
                rebuildNii(path2, newPath, mean, stddev)

321.56370587244527 517.4083720223107 -2.0 22591.0 data/raw/train/4030249091/volume_4030249091.nii data/raw/train/1543518185/volume-1543518185.nii


In [6]:
nib.load('data/raw/val/1078523304/volume-1078523304.nii').get_data().shape

(552, 512, 111)

In [32]:
img = nib.load('data/preprocessed/train/4030249091/img.nii').get_data()
print(img[:10,375:385, 56])
print(img.shape)
print(np.min(img))
print(np.max(img))
print(np.mean(img))
print(np.std(img))
print(np.unravel_index(np.argmin(img), img.shape))

img = nib.load('data/preprocessed/train/1543518185/img.nii').get_data()
print(img[99,340:350,260:270])
print(img.shape)
print(np.min(img))
print(np.max(img))
print(np.mean(img))
print(np.std(img))
print(np.unravel_index(np.argmax(img), img.shape))

[[-0.6079602  -0.60989296 -0.60989296 -0.61569107 -0.61182564 -0.61182564
  -0.60989296 -0.6137584  -0.60989296 -0.60602754]
 [-0.61569107 -0.61569107 -0.6137584  -0.6137584  -0.6137584  -0.6137584
  -0.61182564 -0.6079602  -0.60989296 -0.60989296]
 [-0.61569107 -0.6137584  -0.61182564 -0.61182564 -0.6137584  -0.6079602
  -0.6079602  -0.60989296 -0.60989296 -0.6137584 ]
 [-0.61182564 -0.61182564 -0.6137584  -0.6137584  -0.61182564 -0.60989296
  -0.6079602  -0.60989296 -0.6079602  -0.60989296]
 [-0.6021621  -0.60989296 -0.61182564 -0.61182564 -0.60989296 -0.60989296
  -0.61182564 -0.60602754 -0.60602754 -0.6079602 ]
 [-0.6021621  -0.6079602  -0.6079602  -0.61182564 -0.61182564 -0.61182564
  -0.60989296 -0.61182564 -0.61182564 -0.6079602 ]
 [-0.6079602  -0.6040948  -0.6040948  -0.61182564 -0.6079602  -0.6040948
  -0.60602754 -0.6079602  -0.6079602  -0.60602754]
 [-0.6040948  -0.6040948  -0.6079602  -0.60989296 -0.61182564 -0.60989296
  -0.60989296 -0.60989296 -0.60989296 -0.6079602 ]
 [-