In [1]:
import functools
import glob
import os
import csv
import SimpleITK as sitk
from collections import namedtuple
import numpy as np

### Create a list of tuples (with nodules' sizes): 
In order to have a good spread of nodules' sizes in training/validation sets, we create a list of tuples which combines two files:
- candidates.csv
- annotations.csv
The resulted list helps to extract interesting parts of raw data.

In [2]:
Candidate_info = namedtuple('Candidate_info',
                            ['isNodule', 
                             'diameter', 
                             'seriesid', 
                             'center_xyz'])

@functools.lru_cache(1)
def getCandidateInfo(requireOnDisk_bool=True):
    mhdlist = glob.glob('./luna/subset0/*.mhd')

    presentOnDisk_set = {os.path.split(p)[-1][:-4] for p in mhdlist}
#     print(list(presentOnDisk_set))
    # puting annotation info inside a dictionary
    diameter_dict = {}
    with open('./luna/annotations.csv','r') as f:
        for row in list(csv.reader(f))[1:]:
            series_id = row[0]
            Center_xyz = tuple([float(x) for x in row[1:-1]])
            Diameter = float(row[-1])
            diameter_dict.setdefault(series_id, []).append((Center_xyz, Diameter))
    
    # 
    Candidatelist = []
    with open('./luna/candidates.csv','r') as f:
        for row in list(csv.reader(f))[1:]:               
            series_id = row[0]

            if series_id not in presentOnDisk_set and requireOnDisk_bool:
                continue

            isNodule = bool(int(row[-1]))
            Center_xyz = tuple([float(x) for x in row[1:-1]])
            
            candDiameter = 0.0
            for annotation_tuple in diameter_dict.get(series_id,[]):
                annotation_center, diameter = annotation_tuple
                for i in range(3):
                    if abs(annotation_center[i] - Center_xyz[i])>diameter/4:
                        break
                else:
                    candDiameter = diameter
                    break
                    
            Candidatelist.append(Candidate_info(
                isNodule, 
                candDiameter,
                series_id,
                Center_xyz, 
            ))
    Candidatelist.sort(reverse=True)
    return Candidatelist

In [None]:
a = getCandidateInfo()

In [21]:
IrcTuple = namedtuple('IrcTuple', ['index', 'row', 'col'])
XyzTuple = namedtuple('XyzTuple', ['x', 'y', 'z'])
class CTscan():
    def __init__(self, series_id):
        self.series_id = series_id
        mhd_path = glob.glob(f'./luna/subset*/{series_id}.mhd')[0]
        
        ct_mhd = sitk.ReadImage(mhd_path) 
        
        ct = np.array(sitk.GetArrayFromImage(ct_mhd), dtype=np.float32)
        self.hu = np.clip(ct, -1000, 1000)
        
        self.origin_xyz = XyzTuple(*ct_mhd.GetSpacing())
        self.vxsize_xyz = XyzTuple(*ct_mhd.GetSpacing())
        self.direction = np.array(*ct_mhd.GetDirection()).reshape(3,3)
        
    def getRawCandidate(self, center_xyz, width_irc):
        

In [15]:
sitk.GetSpacing?

Object `sitk.GetSpacing` not found.


In [16]:
Dir = glob.glob('./luna/subset0/1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260.mhd')[0]
im = sitk.ReadImage(Dir)

In [17]:
im.GetSpacing()

(0.7617189884185791, 0.7617189884185791, 2.5)

In [26]:
tmp = im.GetOrigin()
print(tmp)
orig = IrcTuple(*tmp)

(-198.100006, -195.0, -335.209991)


In [27]:
orig

IrcTuple(index=-198.100006, row=-195.0, col=-335.209991)