In [1]:
ANNOTATIONS_PATH = '/home/rrg0013@auburn.edu/git/nodules/resources/annotations.csv'
BIN0PATH = '/scr/data/nodules/luna/test/subset0/'
XMLDIR = '/home/rrg0013@auburn.edu/xml/'

In [2]:
import sys
sys.path.append('/home/rrg0013@auburn.edu/git/nodules/src/')

In [3]:
import math
import segment
import candgen as cg
import pandas as pd
import numpy as np
import SimpleITK as sitk
import skimage.measure as measure
import xml.etree.ElementTree as et

Using TensorFlow backend.


# CLASSES

In [146]:
class Scan:
  def __init__(self):
    self.id = ''
    self.nodules = []
    self.groups = []
    self.clusters = []
    self.annotations = None
    
    
  def combine(self):
    for group in self.groups:
      nn = Nodule()
      cx,cy,cz,d = 0.,0.,0.,0.
      for i in range(len(group)):
        cx += self.nodules[group[i]].centroid[0]
        cy += self.nodules[group[i]].centroid[1]
        cz += self.nodules[group[i]].centroid[2]
        d += self.nodules[group[i]].diameter
        nn.subtlety += self.nodules[group[i]].subtlety
        nn.internalStructure += self.nodules[group[i]].internalStructure
        nn.sphericity += self.nodules[group[i]].sphericity 
        nn.calcification += self.nodules[group[i]].calcification
        nn.margin += self.nodules[group[i]].margin
        nn.lobulation += self.nodules[group[i]].lobulation
        nn.spiculation += self.nodules[group[i]].spiculation
        nn.texture += self.nodules[group[i]].texture
        nn.malignancy += self.nodules[group[i]].malignancy
        nn.roi += self.nodules[group[i]].roi
      newCentroid = np.array([cx,cy,cz])
      newCentroid /= float(len(group))
      nn.centroid = newCentroid
      print newCentroid
      nn.tagCount = len(group)
      nn.diameter = d/float(len(group))
      nn.subtlety /= float(len(group))
      nn.internalStructure /= float(len(group))
      nn.sphericity /= float(len(group))
      nn.calcification /= float(len(group))
      nn.margin /= float(len(group))
      nn.lobulation /= float(len(group))
      nn.spiculation /= float(len(group))
      nn.texture /= float(len(group))
      nn.malignancy /= float(len(group))
      self.clusters.append(nn)

  def getDistance(self,a,b):
    du = (a[0]-b[0])
    dv = (a[1]-b[1])
    dw = (a[2]-b[2])
    return math.sqrt(du*du+dv*dv+dw*dw)
    
  def validateNodules(self):
    ct, count = 0, 0
    if scan.annotations is not None:
      count = len(self.annotations)
      for coord in self.annotations:
        print coord
        for cluster in self.clusters:
          distance = self.getDistance(coord,cluster.centroid)
          print distance
          if distance < cluster.diameter:
            print "VERIFIED NODULE"
            cluster.valid = True
            ct += 1
            break
    print "VALIDATED " + str(ct) + " OF " + str(count) + " NODULES"
            

In [126]:
class Nodule():
  def __init__(self):
    self.type = ''
    self.no = -1
    
    self.subtlety = 0.
    self.internalStructure = 0.
    self.sphericity = 0.
    self.calcification = 0.
    self.margin = 0.
    self.lobulation = 0.
    self.spiculation = 0.
    self.texture = 0.
    self.malignancy = 0.
    
    #original x, y, z and roi coords
    self.ox = 0.
    self.oy = 0. 
    self.oz = 0.
    self.centroid = None
    self.roi = []
    
    self.group = -1
    self.grouped = False
    self.groupMembers = []

    self.tagCount = -1
    self.valid = False
    
    
  def addCharacteristics(self, chars, type=None):
    try:
      subtlety = chars.find("{http://www.nih.gov}subtlety")
      self.subtlety = int(subtlety.text)
      internal = chars.find("{http://www.nih.gov}internalStructure")
      self.internalStructure = int(internal.text)
      calcification = chars.find("{http://www.nih.gov}calcification")
      self.calcification = int(calcification.text)
      sphericity = chars.find("{http://www.nih.gov}sphericity")
      self.sphericity = int(sphericity.text)
      margin = chars.find("{http://www.nih.gov}margin")
      self.margin = int(margin.text)
      lobulation = chars.find("{http://www.nih.gov}lobulation")
      self.lobulation = int(lobulation.text)
      spiculation = chars.find("{http://www.nih.gov}spiculation")
      self.spiculation = int(spiculation.text)
      texture = chars.find("{http://www.nih.gov}texture")
      self.texture = int(texture.text)
      ##-MALIGNANCY IS ASSUMING 60 YEAR OLD MALE SMOKER ..
      malignancy = chars.find("{http://www.nih.gov}malignancy")
      self.malignancy = int(malignancy.text)
    except AttributeError:
      print "ATTRIBUTE ERROR"
        
  def addGroup(self,groupNo,group):
    self.group = groupNo
    self.grouped = True
    for i in range(len(group)):
      if group[i] != self.no:
        self.groupMembers.append(group[i])
      
  def getProperties(self):
    tmp = np.zeros((500,500,500)).astype(int)
    xmax,xmin,ymax,ymin,zmax,zmin=-1e5,1e5,-1e5,1e5,-1e5,1e5
    x,y,z=0,0,0
    for value in self.roi:
      xmax = max(abs(int(round(value[0]))),xmax)
      xmin = min(abs(int(value[0])),xmin)
      ymax = max(abs(int(round(value[1]))),ymax)
      ymin = min(abs(int(value[1])),ymin)
      zmax = max(abs(int(round(value[2]))),zmax)
      zmin = min(abs(int(value[2])),zmin)
      x,y,z = value[0],value[1],value[2]
      tmp[abs(int(x)),abs(int(y)),abs(int(z))]=1  

    props = measure.regionprops(tmp)
    centroid = np.array(props[0].centroid)
    self.centroid = centroid
    
    tmp = tmp[xmin-1:xmax+1,ymin-1:ymax+1,zmin-1:zmax+1]
    tmp[tmp>0] = 1
    if tmp.shape[0]*tmp.shape[1]*tmp.shape[2] == max(tmp.shape):
      self.diameter = 3.
      self.area = max(tmp.shape)
    else:
      props = measure.regionprops(tmp)
      self.diameter = props[0].equivalent_diameter
      self.area = props[0].area

# EXPLORATION

In [4]:
annotations = pd.read_csv(ANNOTATIONS_PATH)
filelist, filepathlist = cg.getFilelist(BIN0PATH)

In [116]:
fuid = filelist[2]
currentNodules = annotations[annotations['seriesuid'] == fuid]
print currentNodules
#xml(fuid)

                                            seriesuid     coordX     coordY  \
25  1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524...  46.188539  48.402806   
26  1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524...  36.392044  76.771663   

        coordZ  diameter_mm  
25 -108.578632    13.596471  
26 -123.321911     4.343200  


# FUNCTIONS

In [83]:
def cluster(scan):
  print scan.id
  nodules = []
  gno = 0

  for i, nodule in enumerate(scan.nodules):
    nodule.no = i
    nodules.append(nodule)

  for i in range(len(nodules)):
    if not nodules[i].grouped:
      group = []
      group.append(i)
      for j in range(len(nodules)):
        if i != j:
          if not nodules[j].grouped:
            distance = scan.getDistance(nodules[i].centroid,nodules[j].centroid)
            if distance < (nodules[i].diameter/2.+nodules[j].diameter/2.):
              group.append(j)
      for j in range(1,len(group)):
        j = group[j]
        for k in range(len(nodules)):
          if i != k and j != k and i != j:
            if not nodules[k].grouped:
              distance = scan.getDistance(nodules[j].centroid,nodules[k].centroid)
              if distance < (nodules[j].diameter/2.+nodules[k].diameter/2.):
                group.append(k)
      if len(group) >= 1:
        group = set(group)
        group = list(group)
        for n in range(len(group)):
          nodules[group[n]].addGroup(gno,group)
        scan.groups.append(group)
        gno += 1

In [84]:
def xml(seriesuid, spacing=None, origin=None):
  scan = Scan()
  scan.id = seriesuid
  xmlpath = XMLDIR+'/'+seriesuid+'.xml'
  xml = et.parse(xmlpath)
  ct = 0
  for reader in xml.findall('./{http://www.nih.gov}readingSession'):
    for reading in reader.findall('./{http://www.nih.gov}unblindedReadNodule'):
      nn = Nodule() 
      if reading.find('./{http://www.nih.gov}characteristics') is not None:
        nn.type = 'L'
        characteristics = reading.find('./{http://www.nih.gov}characteristics')
        nn.addCharacteristics(characteristics)
        for roi in reading.findall('./{http://www.nih.gov}roi'):
          z = roi.find('./{http://www.nih.gov}imageZposition')
          z = float(z.text)
          z = z-origin[2]
          include = roi.find('./{http://www.nih.gov}inclusion')
          if include.text == 'TRUE':
            for e in roi.findall('./{http://www.nih.gov}edgeMap'):
              x = e.find('./{http://www.nih.gov}xCoord')
              y = e.find('./{http://www.nih.gov}yCoord')
              coord = int(x.text),int(y.text)
              coord = (coord[0],coord[1],0.)
              coord *= spacing
              coord[2] = z
              nn.roi.append(coord)
        nn.getProperties()
        scan.nodules.append(nn)
      else:
        nn.type = 'S'
        nn.diameter = 3.
        roi = reading.find('./{http://www.nih.gov}roi')
        e = roi.find('./{http://www.nih.gov}edgeMap')
        x = e.find('./{http://www.nih.gov}xCoord')
        x = float(x.text)
        x *= spacing[0]
        y = e.find('./{http://www.nih.gov}yCoord')
        y = float(y.text)
        y *= spacing[1]
        z = roi.find('./{http://www.nih.gov}imageZposition')
        z = float(z.text)
        z = z-origin[2]
        nn.ox = x
        nn.oy = y
        nn.oz = z
        nn.centroid = (x,y,z)
        scan.nodules.append(nn)
      
  return scan

# MAIN

In [5]:
import lidc

In [6]:
n = 2
def run(no):
  currentNodules = annotations[annotations['seriesuid'] == filelist[no]]
  if currentNodules.empty:
    return
  itk = sitk.ReadImage(filepathlist[no])
  origin = np.array(itk.GetOrigin())
  spacing = np.array(itk.GetSpacing())
  coords = segment.getCoords(currentNodules, origin, spacing)
  for i in range(len(coords)):
    print coords[i]
  scan = lidc.xml(XMLDIR,filelist[no],spacing,origin)
  scan.annotations = coords
  lidc.cluster(scan)
  scan.combine()
  scan.validateNodules()
  return scan
scan = run(n)

[234 157  85]
[224 185  71]
1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524522225658609808059
[ 208.19544132  161.47205593  165.95504386]
[ 233.28684629  156.23046213   84.95076273]
[ 223.44291059  183.99794568   70.24463359]
[ 71.07322675  87.81248093  71.25      ]
[  55.9418412   146.47821619  121.31854839]
[  67.50584471  143.79293752  155.        ]
[ 225.5322534   109.42037248   87.51315789]
[  99.33786905  132.8163774   173.75      ]
[  94.947245    129.52340937  166.25      ]
[234 157  85]
85.0858017501
1.05033377972
VERIFIED NODULE
[224 185  71]
99.0949473068
33.2949786615
1.37296755042
VERIFIED NODULE
VALIDATED 2 OF 2 NODULES


In [138]:
print scan.clusters

[<__main__.Nodule instance at 0x7f02a072b950>, <__main__.Nodule instance at 0x7f02a072b998>, <__main__.Nodule instance at 0x7f02a072b9e0>, <__main__.Nodule instance at 0x7f02a072ba28>, <__main__.Nodule instance at 0x7f02a072bb00>, <__main__.Nodule instance at 0x7f02a072bb48>, <__main__.Nodule instance at 0x7f02a072bc20>, <__main__.Nodule instance at 0x7f02a072bc68>, <__main__.Nodule instance at 0x7f02a072bcf8>]


In [122]:
print currentNodules
print scan.clusters[0].tagCount
print scan.clusters[0].centroid

                                            seriesuid     coordX     coordY  \
25  1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524...  46.188539  48.402806   
26  1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524...  36.392044  76.771663   

        coordZ  diameter_mm  
25 -108.578632    13.596471  
26 -123.321911     4.343200  
4
[ 208.19544132  161.47205593  165.95504386]
