In [None]:
"""
Jason Qin
CS230

Augment positive aneurysm data
"""

In [1]:
import nrrd
import numpy as np
import numpy.random
import matplotlib.image as mpimg
import os, os.path
import imageio as imgio
import glob
import shutil
import preprocess_data

In [29]:
# identify which training PNGs have positive aneurysm data
# gather all training example NRRDs
trainingMaskFiles = glob.glob('/data2/yeom/ky_aneur/segmentation/final_segmentation/train/*.nrrd')
trainingDir = '/data2/yeom/ky_aneur/resnet_data/train/'
destDir = trainingDir + 'augmentation3/'

allSegScans = []
allSegMasks = []
for nrrdFile in trainingMaskFiles:
    sampleName = os.path.basename(nrrdFile)[:-5]

    mask = nrrd.read(nrrdFile)
    mask = mask[0].astype(np.uint8)

    maskSums = np.sum(mask, axis=0)
    maskSums = np.sum(maskSums, axis=0)

    maskLocs = np.squeeze(np.where(maskSums > 0))
    for i in maskLocs:
        trainingFiles = glob.glob('{}/{}/IM-[0-9][0-9][0-9][0-9]-{:04d}*'.format(trainingDir, sampleName, i))
        for f in trainingFiles:
            if 'mask' in f:
                allSegMasks.append(f)
                #dest = destDir + 'patient{:02d}-{:04d}_mask.png'.format(counter, i)
            else:
                allSegScans.append(f)
                #dest = destDir + 'patient{:02d}-{:04d}.png'.format(counter, i)
            # shutil.copyfile(f, dest)
            
counter = 0
resample = numpy.random.randint(len(allSegScans), size=len(allSegScans) * 10)
for i in resample:
    scanToCopy = allSegScans[i]
    maskToCopy = allSegMasks[i]
    scanDest = destDir + 'resample{:04d}.png'.format(counter)
    maskDest = destDir + 'resample{:04d}_mask.png'.format(counter)
    shutil.copyfile(scanToCopy, scanDest)
    shutil.copyfile(maskToCopy, maskDest)
    counter = counter + 1        

In [33]:
# Append new augmented data to existing training data list
destDir = trainingDir + 'augmentation3/'
trainingOdgt = '/home/ky_aneur/cs230/code/cta-scripts/train_list.odgt'
res = preprocess_data.build_NN_inputs(destDir, trainingOdgt)

In [9]:
# Check file name prefixes for each patient
prefixes = []
trainFolder = '/data2/yeom/ky_aneur/resnet_data/train'
for folder in os.listdir(trainFolder):
    files = os.listdir('{}/{}'.format(trainFolder, folder))
    f = os.path.basename('{}/{}/{}'.format(trainFolder, folder, files[0]))
    prefixes.append(f[0:7])
    
print(prefixes)
print(np.sort(prefixes))

['IM-0052', 'IM-0008', 'IM-0062', 'IM-0018', 'IM-0061', 'IM-0003', 'IM-0055', 'IM-0042', 'IM-0013', 'IM-0008', 'IM-0035', 'IM-0005', 'IM-0008', 'IM-0046', 'IM-0040', 'IM-0005', 'IM-0020', 'IM-0016', 'IM-0025', 'IM-0032', 'IM-0008', 'IM-0045', 'IM-0002', 'IM-0029', 'IM-0048', 'IM-0023', 'IM-0008', 'IM-0063', 'IM-0039', 'IM-0044', 'IM-0078', 'IM-0049', 'IM-0077', 'IM-0027', 'IM-0061', 'IM-0044', 'IM-0006', 'IM-0050', 'IM-0047', 'IM-0041', 'IM-0049', 'IM-0038', 'IM-0008', 'IM-0012', 'IM-0018', 'IM-0010', 'IM-0056', 'IM-0053', 'IM-0048']
['IM-0002' 'IM-0003' 'IM-0005' 'IM-0005' 'IM-0006' 'IM-0008' 'IM-0008'
 'IM-0008' 'IM-0008' 'IM-0008' 'IM-0008' 'IM-0010' 'IM-0012' 'IM-0013'
 'IM-0016' 'IM-0018' 'IM-0018' 'IM-0020' 'IM-0023' 'IM-0025' 'IM-0027'
 'IM-0029' 'IM-0032' 'IM-0035' 'IM-0038' 'IM-0039' 'IM-0040' 'IM-0041'
 'IM-0042' 'IM-0044' 'IM-0044' 'IM-0045' 'IM-0046' 'IM-0047' 'IM-0048'
 'IM-0048' 'IM-0049' 'IM-0049' 'IM-0050' 'IM-0052' 'IM-0053' 'IM-0055'
 'IM-0056' 'IM-0061' 'IM-0061' 'IM

In [3]:
# Find how many pixels are aneurysm and how many are background
allPixelSum = 0
aneurysmPixelSum = 0
numAneurysmSlices = 0

fileDir = '/data2/yeom/ky_aneur/segmentation/segmentations_181128/'
for f in os.listdir(fileDir):
    mask = nrrd.read(fileDir + '/' + f)
    maskData = mask[0].astype(np.uint8)
    
    maskAneurysms = np.sum(maskData, axis=0)
    maskAneurysmSums = np.sum(maskAneurysms, axis=0)

    numAneurysmSlices += np.sum(maskAneurysmSums > 0)
    aneurysmPixelSum += np.sum(maskAneurysmSums)
    allPixelSum += 512*512*maskData.shape[2]
    
print(allPixelSum)
print(aneurysmPixelSum)
print(aneurysmPixelSum / allPixelSum)
print(numAneurysmSlices)

7088898048
166287.0
42630.50056829458
541


In [28]:
# Calculate proportion of pixels after upsampling the positive data
multFactor = 80
augmentedAneurysmPixelSum = aneurysmPixelSum * multFactor
augmentedAllPixelSum = allPixelSum + numAneurysmSlices*multFactor*512*512

print(augmentedAneurysmPixelSum)
print(augmentedAllPixelSum)

print(aneurysmPixelSum / allPixelSum)
print(augmentedAneurysmPixelSum / augmentedAllPixelSum)

6651480.0
12761694208
2.3457383485281574e-05
0.0005212066588956776
