In [2]:
from __future__ import print_function
import SimpleITK as sitk
import numpy as np
import csv
import pandas as pd
from ipywidgets import interact
import matplotlib.pyplot as plt
#from PIL import Image
import os

from configparser import ConfigParser
import h5py

from matplotlib.patches import Circle
from matplotlib.pylab import subplots
import matplotlib.patches as patches 
from scipy.misc import imsave

%matplotlib inline

In [45]:
parser = ConfigParser()
parser.read('config.ini')
#print (parser.get('local', 'dim') )

60


In [51]:
# for name, value in parser.items('local'):
#     print ('  %s = %s' % (name, value) )

In [52]:
#### ---- Global Vars ---- ####
PATCH_DIM = parser.get('local', 'dim')
DATA_DIR = parser.get('local', 'data')
SUBSET = parser.get('local', 'subset')
CSV_PATH = parser.get('local', 'csv')
SAVE_IMG = parser.get('local', 'img')
TENSOR = parser.get('local', 'tensor')
TENSOR_DIM = parser.get('local', 'slices')

In [53]:
img_filename = '1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295249119452142211437.mhd'
itk_img = sitk.ReadImage(img_filename) # Dimensions are ordered as X, Y, Z (height, width, depth)

In [54]:
print('The pixel dimensions are {} mm'.format(itk_img.GetSpacing()))
print('There are {} slices in the CT volume.'.format(itk_img.GetDepth()))
print('So the depth is {} mm (i.e. total slice * 2.5)'.format(itk_img.GetDepth()*itk_img.GetSpacing()[2]))
print('That should be about the distance from the neck to the navel.')
print('Slice  #1 is closer to the feet. Slice #{} is closer to the head'.format(itk_img.GetDepth()))

The pixel dimensions are (0.78125, 0.78125, 2.5) mm
There are 117 slices in the CT volume.
So the depth is 292.5 mm (i.e. total slice * 2.5)
That should be about the distance from the neck to the navel.
Slice  #1 is closer to the feet. Slice #117 is closer to the head


In [55]:
# SimpleITK keeps the origin and spacing information for the 3D image volume
img_np_array = sitk.GetArrayFromImage(itk_img) # indices are z,y,x (note the ordering of dimensions)
img_np_array.shape

(117, 512, 512)

### Normalizing the image to sets the Voxel size to 1x1x1 mm (as the spacing in Inferrior, Superior, Traverse i.e. x,y,z is of different size)

In [56]:
pixel_spacing = [1.0, 1.0, 1.0] # New Voxel spacing in mm (feel free to change this)

def normalize_img(img):
    
    new_x_size = img.GetSpacing()[0]*img.GetWidth()  # Number of Voxels you want for x dimension
    new_y_size = img.GetSpacing()[1]*img.GetHeight() # Number of Voxels you want for y dimension
    new_z_size = img.GetSpacing()[2]*img.GetDepth()  # Number of Voxels you want for z dimesion
    new_size = [new_x_size, new_y_size, new_z_size]

    new_spacing = pixel_spacing  # mm per voxel (x,y,z) (h, w, d)
    
    new_size = np.rint(np.array(new_size) / np.array(new_spacing)).astype(np.uint32).tolist()

    interpolator_type = sitk.sitkBSpline  #interpolator_type = sitk.sitkLinear
    img_norm = sitk.Resample(img, new_size, sitk.Transform(), interpolator_type, img.GetOrigin(),\
                             new_spacing, img.GetDirection(), 0.0, img.GetPixelIDValue())   
    
    # correcting origin to the new scaling factor
    img_norm.SetOrigin(np.array(img.GetOrigin()) / np.array(new_spacing))

    return img_norm
itk_img_norm = normalize_img(itk_img)

#### Converting Normalized Image to numpy 3D array. The values are in HU descrbing the radiodensity 

In [57]:
img_np_array_norm = sitk.GetArrayFromImage(itk_img_norm)

In [58]:
print('After resizing the voxels to 1x1x1 mm each the matrix dimensions are now: {}'.format(img_np_array_norm.shape))
print('But the depth is still {} mm'.format(itk_img_norm.GetDepth()*itk_img_norm.GetSpacing()[2]))
print('That should be about the distance from the neck to the navel.')

After resizing the voxels to 1x1x1 mm each the matrix dimensions are now: (292, 400, 400)
But the depth is still 292.0 mm
That should be about the distance from the neck to the navel.


#### Reading the annotation file for marked nodules (Region of Interest, ROI) in x,y, z coordinates storing it in a list. Note the coordinates are given in world coordinates (which will changed to Voxel coordinates later, as we have normalized the images).

In [59]:
def readCSV(filename):
    lines = []
    with open(filename, "r") as f:
        csvreader = csv.reader(f)
        for line in csvreader:
            lines.append(line)
    return lines
annotations_file = "annotations.csv"
cands = readCSV(annotations_file)
cands_df = pd.DataFrame(cands)
print ('Total number of marked nodule coordinates are {} '.format(len(cands) - 1) )
cands_df

Total number of marked nodule coordinates are 8 


Unnamed: 0,0,1,2,3,4
0,seriesuid,coordX,coordY,coordZ,diameter_mm
1,1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295...,-65.87013627,50.73044009,-90.80330387,5.915845183
2,1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295...,-105.9171192,-17.6565882,-105.0917524,5.48467872
3,1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295...,42.29613347,56.15173475,-84.66166226,6.47796611
4,1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295...,-137.5005674,-21.65360391,-152.0366379,4.362886498
5,1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295...,-26.86268253,38.52993743,-167.5018886,5.885443068
6,1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295...,71.39121262,10.55131442,-85.27847902,5.734744196
7,1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295...,59.89735907,41.71051844,-96.24057315,4.3556589
8,1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295...,99.37468119,23.94678445,-138.5658318,9.104624209


##### Extracting the Origin and Pixel spacing from the original image

In [60]:
numpyOrigin = np.array(list(reversed(itk_img.GetOrigin())))
numpySpacing = np.array(list(reversed(itk_img.GetSpacing())))

print ("Image Origin in Z, Y, X order : ", numpyOrigin)
print ("Image Spacing in Z, Y, X order : ", numpySpacing)

numpySpacing_Normalized = np.array([1.0, 1.0, 1.0])
print ("\nImage Normalized Spacing in Z, Y, X order : ", numpySpacing_Normalized)

Image Origin in Z, Y, X order :  [-311.25 -200.   -207.5 ]
Image Spacing in Z, Y, X order :  [ 2.5      0.78125  0.78125]

Image Normalized Spacing in Z, Y, X order :  [ 1.  1.  1.]


### Procedure for changing the world coordinates to Voxel coordinates
Since the coordinates of nodules are given in World Coordinates, we transform from world coordinates to voxel coordinates. 
As their is no rotation component in LUNA16 data set so we can use the following simple formula for conversion. 


In [61]:
def worldToVoxelCoord(worldCoord, origin, spacing):
    stretchedVoxelCoord = np.absolute(worldCoord - origin)
    voxelCoord = np.array(stretchedVoxelCoord / spacing).astype(int)
    return voxelCoord

### Reading the annotation list, converting each element from World to Voxel coordinate, which represents the core/center of a nodule. Then showing only the slices having nodules.

In [62]:
# nodule_slices_lst = []
# for cand in cands[1:]:
#     worldCoord = np.asarray([float(cand[3]),float(cand[2]),float(cand[1])])
#     voxelCoord = worldToVoxelCoord(worldCoord, numpyOrigin, numpySpacing_Normalized)
#     nodule_slices_lst.append(voxelCoord[0])

#     fig,ax = plt.subplots(figsize=(10,10))
#     print ('Voxel coord in Z, Y, X (with normalized spacing): {}'.format(voxelCoord));
# #    print ('Value at Voxel coord is : {}'.format(img_np_array_norm[voxelCoord[0],voxelCoord[1], voxelCoord[2]] ));

#     plt.imshow(img_np_array_norm[voxelCoord[0],:,:], cmap='bone');    

#     circ = Circle((voxelCoord[2],voxelCoord[1]),15, lw=3.,fill=False, edgecolor='r') 
#     ax.add_patch(circ)

#     plt.title('Slice #{}'.format(voxelCoord[0]));
#     plt.show() 
    
# print ('Total number of Nodules are {}'.format(len (nodule_slices_lst) ));    
# print ('Nodules found on Slice numbers {}'.format(sorted (nodule_slices_lst) ));

### Segment/patch around the nodule (Voxel coordinates)
This will be used for Deep learning analysis in a separate CNN hidden layers. It also ensures that the nodules are invariant of the location in lung (for our Fractal Geometry analysis and feature extractions).

Currently the patch size is 60x60 mm, we can upgrade to make the patch size as user driven
    - Extract patch for each nodule from the annotation list
    - Visualize each patch and saving the patch in separate file

In [None]:
HDF5 = h5py.File('Test_1.hdf5')
#dset = f.create_dataset('mydataset', data=numpy.ones((2,2),"=i4"))
#new_dset_value=numpy.zeros((3,3),"=i4")

In [None]:
dset = HDF5.create_dataset('patch_dataset', (2,2), maxshape=(None,3))

In [104]:
def save_patch(seriesId, seq, worldCoord, patch):
    worldCoord = np.absolute((np.round(worldCoord)))
    print ('SeriesId {} , Seq : {}, XYZ coordinates {}, Patch Dimension {}'\
           .format(seriesId, seq, worldCoord, patch.shape ))

In [105]:
patchCoord_lst = []
seq = 0
for cand in cands[1:]:
#     fig,ax = plt.subplots(figsize=(8,8))
    seq += 1
    worldCoord = np.asarray([float(cand[3]),float(cand[2]),float(cand[1])])
    voxelCoord = worldToVoxelCoord(worldCoord, numpyOrigin, numpySpacing_Normalized)
    
    patchWidth = 60 # as the maximum size of a Nodule can be upto 30 mm
#     patch = img_np_array_norm[voxelCoord[0], \
    
    patch = img_np_array_norm[voxelCoord[0] - 30 : voxelCoord[0] + 30, \
                       voxelCoord[1] - 30 : voxelCoord[1] + 30, \
                       voxelCoord[2] - 30 : voxelCoord[2] + 30]    
#    patch = normalizePlanes(patch) ....:need to think on normalizing HU for each Patch 
    save_patch(img_filename, seq, worldCoord, patch)
    patchCoord_lst.append((voxelCoord,patch))
    
    print ('\nNodule center scoordinate is :  {}'.format(voxelCoord) );
    print ('Voxel value at nodule center is :  {} \n'.\
           format(img_np_array_norm[voxelCoord[0], voxelCoord[1],voxelCoord[2]]) );
    
#     plt.imshow(patch, cmap='bone')
#     plt.show()    
    
#     outputDir = 'patches/'
#     fig.savefig(os.path.join(outputDir, 'patch_' + str(voxelCoord[0]) + '_' + str(voxelCoord[1]) +\
#                              '_' + str(voxelCoord[2])) )


SeriesId 1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295249119452142211437.mhd , Seq : 1, XYZ coordinates [ 91.  51.  66.], Patch Dimension (60, 60, 60)

Nodule center scoordinate is :  [220 250 141]
Voxel value at nodule center is :  -85 

SeriesId 1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295249119452142211437.mhd , Seq : 2, XYZ coordinates [ 105.   18.  106.], Patch Dimension (60, 60, 60)

Nodule center scoordinate is :  [206 182 101]
Voxel value at nodule center is :  -39 

SeriesId 1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295249119452142211437.mhd , Seq : 3, XYZ coordinates [ 85.  56.  42.], Patch Dimension (60, 60, 60)

Nodule center scoordinate is :  [226 256 249]
Voxel value at nodule center is :  37 

SeriesId 1.3.6.1.4.1.14519.5.2.1.6279.6001.112767175295249119452142211437.mhd , Seq : 4, XYZ coordinates [ 152.   22.  138.], Patch Dimension (60, 60, 60)

Nodule center scoordinate is :  [159 178  69]
Voxel value at nodule center is :  -307 

SeriesId 1.3.6.1.4.1.14519.5.2.1.6

In [71]:
#imsave("tstPatch.png",patch)

In [39]:
p1 = patch.flatten()

In [3]:
f = h5py.File('myfile_1.hdf5')
#dset = f.create_dataset('mydataset', data=numpy.ones((2,2),"=i4"))
#new_dset_value=numpy.zeros((3,3),"=i4")

In [4]:
dset = f.create_dataset('mydataset', (2,2), maxshape=(None,3))

In [6]:
dset.resize((3,3))
dset[:,:] = numpy.zeros((3,3),"=i4")

In [14]:
type(dset)

h5py._hl.dataset.Dataset

In [16]:
dset.resize((5,3))

In [17]:
dset

<HDF5 dataset "mydataset": shape (5, 3), type "<f4">

In [None]:
#AL -Reference

In [62]:
y = np.random.rand(3,3,2)
print(y[:])
print('------')
y1 = y.ravel().reshape(1,-1)
print(y1)
print(y1.shape)
print('------')
y2 = y1.reshape(3,3,2)
print(y2)

[[[ 0.8258521   0.4916576 ]
  [ 0.96592831  0.86182914]
  [ 0.11719222  0.1864989 ]]

 [[ 0.28308992  0.91171064]
  [ 0.6007023   0.61216908]
  [ 0.4467723   0.90813744]]

 [[ 0.33569915  0.29754201]
  [ 0.13368492  0.72305467]
  [ 0.84094043  0.56057669]]]
------
[[ 0.8258521   0.4916576   0.96592831  0.86182914  0.11719222  0.1864989
   0.28308992  0.91171064  0.6007023   0.61216908  0.4467723   0.90813744
   0.33569915  0.29754201  0.13368492  0.72305467  0.84094043  0.56057669]]
(1, 18)
------
[[[ 0.8258521   0.4916576 ]
  [ 0.96592831  0.86182914]
  [ 0.11719222  0.1864989 ]]

 [[ 0.28308992  0.91171064]
  [ 0.6007023   0.61216908]
  [ 0.4467723   0.90813744]]

 [[ 0.33569915  0.29754201]
  [ 0.13368492  0.72305467]
  [ 0.84094043  0.56057669]]]


In [60]:
y = np.random.rand(1,3,2)
print(y[:])
print('------')
y1 = y.ravel().reshape(1,-1)
print(y1)

[[[ 0.90524765  0.10401848]
  [ 0.26253789  0.90064091]
  [ 0.22436896  0.01085188]]]
------
[[ 0.90524765  0.10401848  0.26253789  0.90064091  0.22436896  0.01085188]]


In [28]:
x = np.zeros((3,3,2))
print(x)
print('------')
print(x)

[[[ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]]

 [[ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]]

 [[ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]]]
------
[[[ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]]

 [[ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]]

 [[ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]]]


In [46]:
# test hdf5 file generated

In [67]:
hdf5_file_name = '/Users/keil/datasets/LUNA16/64dim_patches.hdf5'

dataset_name = h5py.File(hdf5_file_name, 'r') # open in read-only mode

In [68]:
dataset_name

<HDF5 file "64dim_patches.hdf5" (mode r)>

In [69]:
print('Dataset meta data and real data:')
for name in [key for key in dataset_name.keys()]:
    print(name)
    print(dataset_name[name])
    print(dataset_name[name][:])
    print(dataset_name[name][:].shape)

Dataset meta data and real data:
classes
<HDF5 dataset "classes": shape (365, 4), type "<f8">
[[   0.           19.26483377   50.52493873 -176.063894  ]
 [   0.           74.34730878    5.43345505 -135.2220227 ]
 [   0.           15.98         22.08        -80.17      ]
 ..., 
 [   0.          -59.26359466   -9.24087582 -147.9494437 ]
 [   0.          -74.56033695   31.46355465 -183.213398  ]
 [   0.          -29.58         28.39        -53.36      ]]
(365, 4)
patches
<HDF5 dataset "patches": shape (365, 12288), type "<f4">
[[ 1.  1.  1. ...,  1.  1.  1.]
 [ 1.  1.  1. ...,  1.  1.  1.]
 [ 1.  1.  1. ...,  1.  1.  1.]
 ..., 
 [ 1.  1.  1. ...,  1.  1.  1.]
 [ 1.  1.  1. ...,  1.  1.  1.]
 [ 1.  1.  1. ...,  1.  1.  1.]]
(365, 12288)
uuid
<HDF5 dataset "uuid": shape (365, 1), type "|O">
[[b'1.3.6.1.4.1.14519.5.2.1.6279.6001.281967919138248195763602360723']
 [b'1.3.6.1.4.1.14519.5.2.1.6279.6001.281967919138248195763602360723']
 [b'1.3.6.1.4.1.14519.5.2.1.6279.6001.28196791913824819576360

In [70]:
len(dataset_name['patches'])

365