In [1]:
%matplotlib notebook
%pwd

'/ocean/projects/asc170022p/mtragoza/lung-project'

In [2]:
import sys, os, re
import yaml
from pathlib import Path
import numpy as np
import xarray as xr
import hvplot.xarray

In [3]:
!tree --sort=mtime -L 1 data/4DCT

data/4DCT
├── Case1Pack
├── Case2Pack
├── Case3Pack
├── Case4Pack
├── Case5Pack
├── Case7Pack
├── Case8Pack
├── Case9Pack
├── Case10Pack
└── Case6Pack

10 directories, 0 files


In [4]:
!tree --sort=mtime data/4DCT/Case1Pack

data/4DCT/Case1Pack
├── Sampled4D
│   ├── case1_4D-75_T00.txt
│   ├── case1_4D-75_T50.txt
│   ├── case1_4D-75_T10.txt
│   ├── case1_4D-75_T20.txt
│   ├── case1_4D-75_T30.txt
│   └── case1_4D-75_T40.txt
├── ExtremePhases
│   ├── Case1_300_T00_xyz.txt
│   └── Case1_300_T50_xyz.txt
├── Images
│   ├── case1_T00_s.img
│   ├── case1_T10_s.img
│   ├── case1_T20_s.img
│   ├── case1_T30_s.img
│   ├── case1_T40_s.img
│   ├── case1_T50_s.img
│   ├── case1_T60_s.img
│   ├── case1_T70_s.img
│   ├── case1_T80_s.img
│   └── case1_T90_s.img
└── case1.yaml

3 directories, 19 files


In [5]:
def read_xyz_file(xyz_file):
    '''
    Read landmark xyz coordinates from text file.
    '''
    with open(xyz_file) as f:
        data = [line.strip().split() for line in f]
    return np.array(data, dtype=np.uint8)

xyz = read_xyz_file('data/4DCT/Case1Pack/Sampled4D/case1_4D-75_T00.txt')
xyz.shape

(75, 3)

In [6]:
def load_img_file(img_file, shape, dtype, verbose=True):
    '''
    Read CT image from file in Analyze 7.5 format.
    
    https://stackoverflow.com/questions/27507928/loading-analyze-7-5-format-images-in-python
    '''
    if verbose:
        print(f'Loading {img_file}')
    data = np.fromfile(img_file, dtype)
    data = data.reshape(shape)
    itemsize = data.dtype.itemsize
    data.strides = (
        itemsize,
        itemsize * shape[0],
        itemsize * shape[0] * shape[1]
    )
    return data.copy()

image = load_img_file('data/4DCT/Case1Pack/Images/case1_T00_s.img', shape=(256, 256, 94), dtype=np.int16)
image.shape

Loading data/4DCT/Case1Pack/Images/case1_T00_s.img


(256, 256, 94)

In [7]:
def load_yaml_file(yaml_file):
    print(f'Loading {yaml_file}')
    with open(yaml_file) as f:
        return yaml.safe_load(f)
    
load_yaml_file('data/4DCT/Case1Pack/case1.yaml')

Loading data/4DCT/Case1Pack/case1.yaml


{'shape': [256, 256, 94], 'resolution': [0.97, 0.97, 2.5]}

In [144]:
class Lung4DCTPatient(object):
    
    def __init__(self, data_root, case_id, phase):
        self.data_root = Path(data_root)
        self.case_id = int(case_id)
        self.phase = list(phase)
        
    @property
    def patient_dir(self):
        return self.data_root / f'Case{self.case_id}Pack'
    
    @property
    def metadata_file(self):
        return self.patient_dir / f'case{self.case_id}.yaml'
    
    @property
    def image_dir(self):
        return self.patient_dir / 'Images'

    def load_metadata(self):
        metadata = load_yaml_file(self.metadata_file)
        self.shape = metadata['shape']
        self.resolution = metadata['resolution']
        
    def load_images(self):

        images = []
        for phase in self.phase:
            img_glob = self.image_dir.glob(f'case{self.case_id}_T{phase:02d}*.img')
            img_file = next(img_glob) # assumes exactly one match
            image = load_img_file(img_file, self.shape, dtype=np.int16)
            images.append(image)
        
        self.array = xr.DataArray(
            data=np.stack(images)[...,::-1], # flip z orientation
            dims=['t', 'x', 'y', 'z'],
            coords={
                't': self.phase,
                'x': np.arange(self.shape[0]) * self.resolution[0],
                'y': np.arange(self.shape[1]) * self.resolution[1],
                'z': np.arange(self.shape[2]) * self.resolution[2]
            },
            name=f'case{self.case_id}'
        )
        
    def copy(self):
        copy = Lung4DCTPatient(self.data_root, self.case_id, self.phase)
        copy.shape = self.shape
        copy.resolution = self.resolution
        copy.array = self.array
        return copy
        
    def describe(self):
        return self.array.to_dataframe().describe().T
    
    def select(self, *args, **kwargs):
        selection = self.copy()
        selection.array = self.array.sel(*args, **kwargs, method='nearest')
        return selection
        
    def view(self, *args, **kwargs):    
        if ('x' in kwargs and 'y' in kwargs): # view image
            median = self.array.quantile(0.5)
            IQR = self.array.quantile(0.75) - self.array.quantile(0.25)
            image_kws = {
                'cmap': 'greys_r',
                'clim': (0, median + 1.5 * IQR),
                'frame_width': 500,
                'data_aspect': 1
            }
            image_kws.update(**kwargs)
            kwargs = image_kws

        return self.array.hvplot(*args, **kwargs)

patient = Lung4DCTPatient(data_root='data/4DCT', case_id=1, phase=range(0, 100, 10))
patient.load_metadata()
patient.load_images()
patient.array

Loading data/4DCT/Case1Pack/case1.yaml
Loading data/4DCT/Case1Pack/Images/case1_T00_s.img
Loading data/4DCT/Case1Pack/Images/case1_T10_s.img
Loading data/4DCT/Case1Pack/Images/case1_T20_s.img
Loading data/4DCT/Case1Pack/Images/case1_T30_s.img
Loading data/4DCT/Case1Pack/Images/case1_T40_s.img
Loading data/4DCT/Case1Pack/Images/case1_T50_s.img
Loading data/4DCT/Case1Pack/Images/case1_T60_s.img
Loading data/4DCT/Case1Pack/Images/case1_T70_s.img
Loading data/4DCT/Case1Pack/Images/case1_T80_s.img
Loading data/4DCT/Case1Pack/Images/case1_T90_s.img


In [145]:
patient.describe()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
case1,61603840.0,520.374532,477.174667,0.0,68.0,272.0,1036.0,14640.0


In [146]:
patient.view()

In [147]:
patient.view(groupby=['t'], xlim=(-1000, 15000), ylim=(0, 4e6), bins=np.linspace(0, 15000, 20))

  return pd.unique(values)


In [149]:
patient.view(groupby=['t', 'y'], x='x', y='z')

  return pd.unique(values)


In [150]:
patient.select(y=120).view(groupby='t', x='x', y='z', widget_type='scrubber', widget_location='bottom')

  return pd.unique(values)
