# CREATING ANNOTATIONS LIST

In [37]:
LUNA_PATH = '/scr/data/nodules/luna/test/'
XMLDIR = '/home/rrg0013@auburn.edu/xmlb/xml/'
OUTFILE = '/home/rrg0013@auburn.edu/git/nodules/Annotations.csv'

In [2]:
import sys
sys.path.append('/home/rrg0013@auburn.edu/git/nodules/src/')

In [24]:
import os
import math
import scan as scn
import nodule as nd
import candgen as cg
import pandas as pd
import numpy as np
import skimage.measure as measure
import SimpleITK as sitk
import xml.etree.ElementTree as et

In [35]:
class Scan:
  def __init__(self):
    self.id = ''
    self.nodules = []
    self.groups = []
    self.clusters = []
    self.annotations = None


  def combine(self):
    for group in self.groups:
      nn = Nodule()
      cx,cy,cz,d = 0.,0.,0.,0.
      for i in range(len(group)):
        cx += self.nodules[group[i]].centroid[0]
        cy += self.nodules[group[i]].centroid[1]
        cz += self.nodules[group[i]].centroid[2]
        d += self.nodules[group[i]].diameter
        if self.nodules[group[i]].type == 'L':
          nn.type = 'L'
        elif self.nodules[group[i]].type == 'S':
          if nn.type != 'L':
            nn.type = 'S'
        elif self.nodules[group[i]].type == 'N':
          if nn.type != 'L' and nn.type != 'S':
            nn.type = 'N'
        nn.subtlety += self.nodules[group[i]].subtlety
        nn.internalStructure += self.nodules[group[i]].internalStructure
        nn.sphericity += self.nodules[group[i]].sphericity
        nn.calcification += self.nodules[group[i]].calcification
        nn.margin += self.nodules[group[i]].margin
        nn.lobulation += self.nodules[group[i]].lobulation
        nn.spiculation += self.nodules[group[i]].spiculation
        nn.texture += self.nodules[group[i]].texture
        nn.malignancy += self.nodules[group[i]].malignancy
        nn.roi += self.nodules[group[i]].roi
        nn.oroi += self.nodules[group[i]].oroi
      newCentroid = np.array([cx,cy,cz])
      newCentroid /= float(len(group))
      nn.centroid = newCentroid
      print newCentroid
      nn.tagCount = len(group)
      nn.diameter = d/float(len(group))
      #print nn.diameter
      nn.subtlety /= float(len(group))
      nn.internalStructure /= float(len(group))
      nn.sphericity /= float(len(group))
      nn.calcification /= float(len(group))
      nn.margin /= float(len(group))
      nn.lobulation /= float(len(group))
      nn.spiculation /= float(len(group))
      nn.texture /= float(len(group))
      nn.malignancy /= float(len(group))
      self.clusters.append(nn)
  def getDistance(self,a,b):
    du = (a[0]-b[0])
    dv = (a[1]-b[1])
    dw = (a[2]-b[2])
    return math.sqrt(du*du+dv*dv+dw*dw)

In [26]:
class Nodule():
  def __init__(self):
    self.type = ''
    self.no = -1

    self.subtlety = 0.
    self.internalStructure = 0.
    self.sphericity = 0.
    self.calcification = 0.
    self.margin = 0.
    self.lobulation = 0.
    self.spiculation = 0.
    self.texture = 0.
    self.malignancy = 0.

    #original x, y, z and roi coords
    self.ox = 0.
    self.oy = 0.
    self.oz = 0.
    self.centroid = None
    self.roi = []
    self.oroi = []

    self.group = -1
    self.grouped = False
    self.groupMembers = []

    self.tagCount = -1
    self.valid = False


  def addCharacteristics(self, chars, type=None):
    try:
      subtlety = chars.find("{http://www.nih.gov}subtlety")
      self.subtlety = int(subtlety.text)
      internal = chars.find("{http://www.nih.gov}internalStructure")
      self.internalStructure = int(internal.text)
      calcification = chars.find("{http://www.nih.gov}calcification")
      self.calcification = int(calcification.text)
      sphericity = chars.find("{http://www.nih.gov}sphericity")
      self.sphericity = int(sphericity.text)
      margin = chars.find("{http://www.nih.gov}margin")
      self.margin = int(margin.text)
      lobulation = chars.find("{http://www.nih.gov}lobulation")
      self.lobulation = int(lobulation.text)
      spiculation = chars.find("{http://www.nih.gov}spiculation")
      self.spiculation = int(spiculation.text)
      texture = chars.find("{http://www.nih.gov}texture")
      self.texture = int(texture.text)
      ##-MALIGNANCY IS ASSUMING 60 YEAR OLD MALE SMOKER ..
      malignancy = chars.find("{http://www.nih.gov}malignancy")
      self.malignancy = int(malignancy.text)
    except AttributeError:
      print "ATTRIBUTE ERROR"

  def addGroup(self,groupNo,group):
    self.group = groupNo
    self.grouped = True
    for i in range(len(group)):
      if group[i] != self.no:
        self.groupMembers.append(group[i])

  def getProperties(self):
    tmp = np.zeros((500,500,500)).astype(int)
    xmax,xmin,ymax,ymin,zmax,zmin=-1e5,1e5,-1e5,1e5,-1e5,1e5
    x,y,z=0,0,0
    for value in self.roi:
      xmax = max(abs(int(round(value[0]))),xmax)
      xmin = min(abs(int(value[0])),xmin)
      ymax = max(abs(int(round(value[1]))),ymax)
      ymin = min(abs(int(value[1])),ymin)
      zmax = max(abs(int(round(value[2]))),zmax)
      zmin = min(abs(int(value[2])),zmin)
      x,y,z = value[0],value[1],value[2]
      tmp[abs(int(x)),abs(int(y)),abs(int(z))]=1

    props = measure.regionprops(tmp)
    centroid = np.array(props[0].centroid)
    self.centroid = centroid

    tmp = tmp[xmin-1:xmax+1,ymin-1:ymax+1,zmin-1:zmax+1]
    tmp[tmp>0] = 1
    if tmp.shape[0]*tmp.shape[1]*tmp.shape[2] == max(tmp.shape):
      self.diameter = 3.
      self.area = max(tmp.shape)
    else:
      props = measure.regionprops(tmp)
      self.diameter = props[0].equivalent_diameter
      self.area = props[0].area


In [30]:
def Cluster(scan):
  print scan.id
  nodules = []
  gno = 0
  for i, nodule in enumerate(scan.nodules):
    if nodule.type == 'L':
      nodule.no = i
      nodules.append(nodule)
    #elif nodule.type == 'S':
    #  nodule.no = i
    #  nodules.append(nodule)
    #elif nodule.type == 'N':
    #  nodule.no = i
    #  nodules.append(nodule)
  for i in range(len(nodules)):
    if not nodules[i].grouped:
      group = []
      group.append(i)
      for j in range(len(nodules)):
        if i != j:
          if not nodules[j].grouped:
            distance = scan.getDistance(nodules[i].centroid,nodules[j].centroid)
            if distance < (nodules[i].diameter/2.+nodules[j].diameter/2.):
              group.append(j)
      for j in range(1,len(group)):
        j = group[j]
        for k in range(len(nodules)):
          if i != k and j != k and i != j:
            if not nodules[k].grouped:
              distance = scan.getDistance(nodules[j].centroid,nodules[k].centroid)
              if distance < (nodules[j].diameter/2.+nodules[k].diameter/2.):
                group.append(k)
      if len(group) >= 1:
        group = set(group)
        group = list(group)
        for n in range(len(group)):
          nodules[group[n]].addGroup(gno,group)
        scan.groups.append(group)
        gno += 1

In [31]:
def xml(XMLDIR, seriesuid, spacing=None, origin=None):
  print XMLDIR
  scan = scn.Scan()
  scan.id = seriesuid
  xmlpath = XMLDIR+'/'+seriesuid+'.xml'
  print xmlpath
  try:
    xml = et.parse(xmlpath)
  except:
    return None
  ct = 0
  for reader in xml.findall('./{http://www.nih.gov}readingSession'):
    for reading in reader.findall('./{http://www.nih.gov}unblindedReadNodule'):
      nn = Nodule()
      if reading.find('./{http://www.nih.gov}characteristics') is not None:
        nn.type = 'L'
        characteristics = reading.find('./{http://www.nih.gov}characteristics')
        nn.addCharacteristics(characteristics)
        for roi in reading.findall('./{http://www.nih.gov}roi'):
          z = roi.find('./{http://www.nih.gov}imageZposition')
          z = float(z.text)
          z = z-origin[2]
          oz = z/spacing[2]
          include = roi.find('./{http://www.nih.gov}inclusion')
          if include.text == 'TRUE':
            for e in roi.findall('./{http://www.nih.gov}edgeMap'):
              x = e.find('./{http://www.nih.gov}xCoord')
              y = e.find('./{http://www.nih.gov}yCoord')
              coord = int(x.text),int(y.text)
              coord = (coord[0],coord[1],0.)
              coord *= spacing
              coord[2] = z
              nn.roi.append(coord)
              nn.oroi.append((int(x.text),int(y.text),oz))
        nn.getProperties()
        scan.nodules.append(nn)
      else:
        nn.type = 'S'
        nn.diameter = 3.
        roi = reading.find('./{http://www.nih.gov}roi')
        e = roi.find('./{http://www.nih.gov}edgeMap')
        x = e.find('./{http://www.nih.gov}xCoord')
        x = float(x.text)
        x *= spacing[0]
        y = e.find('./{http://www.nih.gov}yCoord')
        y = float(y.text)
        y *= spacing[1]
        z = roi.find('./{http://www.nih.gov}imageZposition')
        z = float(z.text)
        z = z-origin[2]
        nn.ox = x
        nn.oy = y
        nn.oz = z
        nn.centroid = (x,y,z)
        scan.nodules.append(nn)
    '''for reading in reader.findall("./{http://www.nih.gov}nonNodule"):
      nn = nd.Nodule()
      nn.type = 'N'
      nn.diameter = 6.
      z = reading.find("./{http://www.nih.gov}imageZposition")
      z = float(z.text)
      z = z-origin[2]
      locuss = reading.findall('./{http://www.nih.gov}locus')
      for locus in locuss:   
        x = locus.find("./{http://www.nih.gov}xCoord")
        y = locus.find("./{http://www.nih.gov}yCoord")
        x = float(x.text)
        y = float(y.text)
        x *= spacing[0]
        y *= spacing[1]
        nn.ox,nn.oy,nn.oz = x,y,z
        nn.centroid = (x,y,z)
        print nn.centroid
        scan.nodules.append(nn)'''
  return scan

In [34]:
def run():
  for parent, subdir, files in os.walk(LUNA_PATH):
    #print "A"
    for fname in files:
      #print "B"
      if ".mhd" in fname.lower():
        fpath = os.path.join(parent,fname)
        itk = sitk.ReadImage(fpath)
        origin = np.array(itk.GetOrigin())
        spacing = np.array(itk.GetSpacing())
        fname = fname[:-4]
        scan = xml(XMLDIR,fname,spacing,origin)
        Cluster(scan)
        scan.combine()
        for cluster in scan.clusters:
          if cluster.type == 'L':
            cx,cy,cz = cluster.centroid
            d = cluster.diameter
            seriesuid = scan.id
            f.write(str(seriesuid)+','+str(cx)+','+str(cy)+','+str(cz)+','+str(d)+'\n')
        #TODO: loop through clusters and generate annotations for segment.py
        #      after thorough debug retrain fpr model

In [38]:
f = open(OUTFILE,'w+')
f.write('seriesuid,coordX,coordY,coordZ,diameter_mm\n')
run()

/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0013@auburn.edu/xmlb/xml//1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260.xml
1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260
[ 271.17195988  279.55086875  171.249984  ]
[ 233.88888889  220.44444444  192.96296296]
[  83.02736974  129.49222803   50.        ]
[ 297.83212447  204.1406889   222.499992  ]
/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0013@auburn.edu/xmlb/xml//1.3.6.1.4.1.14519.5.2.1.6279.6001.108197895896446896160048741492.xml
1.3.6.1.4.1.14519.5.2.1.6279.6001.108197895896446896160048741492
[  81.36998927  256.81656177   82.08875878]
/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0013@auburn.edu/xmlb/xml//1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524522225658609808059.xml
1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524522225658609808059
[ 104.5517351   120.46774727  115.9375    ]
[ 222.03841721  141.72540231  112.60921159]
[ 223.83870968  184.51612903   70.48387097]
[  55.90322581  146.41935484  121.38

1.3.6.1.4.1.14519.5.2.1.6279.6001.213140617640021803112060161074
[  61.17136687  220.29846291  182.48577236]
[ 142.96835275  199.63126335  159.944959  ]
[ 232.37820959  163.98372811  157.70063694]
[ 249.7565407   206.11746003  149.38372093]
[ 175.28125     194.72265625  210.5       ]
[ 211.46519886  220.95419034  172.80681818]
[ 122.28043301  194.0078125   146.19607843]
[  41.3515625  191.9453125   73.       ]
/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0013@auburn.edu/xmlb/xml//1.3.6.1.4.1.14519.5.2.1.6279.6001.216882370221919561230873289517.xml
1.3.6.1.4.1.14519.5.2.1.6279.6001.216882370221919561230873289517
[ 155.63774686  162.75652911  274.76350826]
[ 193.67191029  177.96878242  236.24999475]
[ 163.57424855  182.54886138  234.374993  ]
[ 241.43559086   81.78712428  238.749993  ]
[ 262.37309468  166.19143653  279.999993  ]
/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0013@auburn.edu/xmlb/xml//1.3.6.1.4.1.14519.5.2.1.6279.6001.219087313261026510628926082729.xml
1.3.6.1.4.1.14519.5.2.1

1.3.6.1.4.1.14519.5.2.1.6279.6001.313334055029671473836954456733
[ 197.20477322  174.70999018  107.78407852]
/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0013@auburn.edu/xmlb/xml//1.3.6.1.4.1.14519.5.2.1.6279.6001.313605260055394498989743099991.xml
1.3.6.1.4.1.14519.5.2.1.6279.6001.313605260055394498989743099991
[ 115.3207092   247.08900235  178.02784603]
/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0013@auburn.edu/xmlb/xml//1.3.6.1.4.1.14519.5.2.1.6279.6001.313835996725364342034830119490.xml
1.3.6.1.4.1.14519.5.2.1.6279.6001.313835996725364342034830119490
[ 182.92617682  204.25092849  181.90242616]
[  88.59375  225.       232.5    ]
[  72.421875  159.609375  178.75    ]
/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0013@auburn.edu/xmlb/xml//1.3.6.1.4.1.14519.5.2.1.6279.6001.317087518531899043292346860596.xml
1.3.6.1.4.1.14519.5.2.1.6279.6001.317087518531899043292346860596
[ 196.85963284  160.76969884  196.84117162]
[ 243.125  111.875  209.375]
/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0

/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0013@auburn.edu/xmlb/xml//1.3.6.1.4.1.14519.5.2.1.6279.6001.826812708000318290301835871780.xml
1.3.6.1.4.1.14519.5.2.1.6279.6001.826812708000318290301835871780
[ 121.82572236  149.76836674  139.19577474]
[ 112.94285471  151.64179092  131.67715342]
/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0013@auburn.edu/xmlb/xml//1.3.6.1.4.1.14519.5.2.1.6279.6001.832260670372728970918746541371.xml
1.3.6.1.4.1.14519.5.2.1.6279.6001.832260670372728970918746541371
[ 154.09963806  249.35140493  223.98986486]
[ 183.90446762  217.20076349  225.53372297]
[  67.2655859   159.14053249  142.5       ]
/home/rrg0013@auburn.edu/xmlb/xml/
/home/rrg0013@auburn.edu/xmlb/xml//1.3.6.1.4.1.14519.5.2.1.6279.6001.868211851413924881662621747734.xml
1.3.6.1.4.1.14519.5.2.1.6279.6001.868211851413924881662621747734
[ 250.09488407  269.8186744   130.42137097]
[ 253.72676768  240.38551768  122.88323232]
[ 253.17889475  250.91435399  112.8577895 ]
[ 199.375   278.4375  206.25  ]
[ 19