# [PTM] Section 5

## 10. 여러 데이터 소스를 통합 데이터셋으로 합치기
1. 원본 데이터 읽기
2. 전처리

원본 CT 스캔 데이터에 달아놓은 어노테이션 목록으로 훈련 샘플 만들기

### 10.1 원본 CT 데이터 파일
- `.mhd`: 메타데이터 헤더 정보가 포함
- `.raw`: 3차원 배열을 만들 원본 데이터 바이트
- 각 파일 이름은 `시리즈 UID`라고 불리는 CT 스캔 단일 식별자로 시작
  - UID 1.2.3 = 1.2.3.mhd + 1.2.3.raw
- 데이터를 제한하거나 잘라서 모델에 노이즈가 끼지 않게하는 것도 중요

### 10.2 LUNA 애노테이션 데이터 파싱
- LUNA에서 제공하는 `csv` 파일을 먼저 파싱해, 각 CT 스캔 중 관심 있는 부분을 파악할 필요가 있음

In [1]:
import os
import pandas as pd

In [2]:
root_path = r"C:\Users\spec3\OneDrive\바탕 화면\Dev\RhythmStudy\1. PyTorch\data\part2\luna"

In [3]:
# 책에서는 bash로 출력하지만, 판다스로 대체
candidates = pd.read_csv(os.path.join(root_path, "candidates.csv"))

candidates.shape

(551065, 5)

In [4]:
candidates.head(3)

Unnamed: 0,seriesuid,coordX,coordY,coordZ,class
0,1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222...,-56.08,-67.85,-311.92,0
1,1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222...,53.21,-244.41,-245.17,0
2,1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222...,103.66,-121.8,-286.62,0


In [5]:
# 클래스 분포
# 0: 결절X / 1: 결절O
candidates["class"].value_counts()

0    549714
1      1351
Name: class, dtype: int64

In [6]:
# 결절로 플래그된 후보들에 대한 정보
annotations = pd.read_csv(os.path.join(root_path, "annotations.csv"))

annotations.shape

(1186, 5)

In [7]:
annotations.head(3)

Unnamed: 0,seriesuid,coordX,coordY,coordZ,diameter_mm
0,1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222...,-128.699421,-175.319272,-298.387506,5.651471
1,1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222...,103.783651,-211.925149,-227.12125,4.224708
2,1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793...,69.639017,-140.944586,876.374496,5.786348


#### 10.2.1 훈련셋과 검증셋
- 모든 표준 지도 학습(supervised learning) 작업은 데이터를 훈련셋(training set)과 검증셋(validation set)으로 나눔
- 크기 순으로 정렬한 후, 매 N번째에 대해 검증세트로 구성

#### 10.2.2 어노테이션 데이터와 후보 데이터 합치기


In [9]:
import copy
import csv
import functools
import glob
import os

import collections
from collections import namedtuple

import SimpleITK as sitk
import numpy as np

import torch
import torch.cuda
from torch.utils.data import Dataset

from util.disk import getCache
from util.util import XyzTuple, xyz2irc
from util.logconf import logging

In [10]:
# 전문가의 어노테이션을 나타내는, 나름의 인터페이스
# 결절의 상태, 결절의 직겨으 순번, 중심점
CandidateInfoTuple = namedtuple(
    "CandidateInfoTuple",
    "isNodule_bool, diameter_mm, series_uid, center_xyz"
)

In [11]:
# 후보 정보
@functools.lru_cache(1) # 표준 인메모리 캐싱 라이브러리
# - 일부 데이터 파일은 파싱에 시간이 걸리므로, 함수 호출 결과를 메모리에 캐시
# - 인메모리나 온디스크 캐싱을 적절하게 사용하여 데이터 파이프라인 속도를 올려 놓으면 훈련 속도의 개선으로 이어질 수 있음
def getCandidateInfoList(requireOnDisk_bool=True):
    mhd_list = glob.glob(os.path.join(os.path.expanduser("~"), "Downloads/*/*/*.mhd"))
    presentOnDisk_set = {os.path.split(p)[-1][:-4] for p in mhd_list}
    
    diameter_dict = {}
    with open(os.path.join(root_path, "annotations.csv"), "r") as f:
        for row in list(csv.reader(f))[1:]:
            series_uid = row[0]
            annotationCenter_xyz = tuple([float(x) for x in row[1:4]])
            annotationDiameter_mm = float(row[4])
            
            diameter_dict.setdefault(series_uid, []).append(
                (annotationCenter_xyz, annotationDiameter_mm)
            )
            
    # candidates.csv의 정보를 이용해 전체 후보 리스트 만들기
    candidateInfo_list = []
    with open(os.path.join(root_path, "candidates.csv"), "r") as f:
        for row in list(csv.reader(f))[1:]:
            series_uid = row[0]
            
            if series_uid not in presentOnDisk_set and requireOnDisk_bool: continue
            
            isNodule_bool = bool(int(row[4]))
            candidateCenter_xyz = tuple(float(x) for x in row[1:4])
            
            candidateDiameter_mm = 0.0
            for annotation_tup in diameter_dict.get(series_uid, []):
                annotationCenter_xyz, annotationDiameter_mm = annotation_tup
                for i in range(3):
                    delta_mm = abs(candidateCenter_xyz[i] - annotationCenter_xyz[i])
                    if delta_mm>annotationDiameter_mm/4: # 바운딩 박스 체크
                        break
                    else:
                        candidateDiameter_mm = annotationDiameter_mm
                        break
                
                candidateInfo_list.append(CandidateInfoTuple(isNodule_bool, candidateDiameter_mm, series_uid, candidateCenter_xyz))
    
    candidateInfo_list.sort(reverse=True)
    return candidateInfo_list

In [12]:
# 판다스로하면 이렇게 될 듯..?
diameter_dict_df = {}

for idx in range(len(annotations)):
    series_uid = annotations.iloc[idx, 0]
    annotationCenter_xyz = tuple([float(x) for x in annotations.iloc[idx, 1:4]])
    annotationDiameter_mm = float(annotations.iloc[idx, -1])
    
    diameter_dict_df.setdefault(series_uid, []).append(
        (annotationCenter_xyz, annotationDiameter_mm)
    )

### 10.3 개별 CT 스캔 로딩
- 읽어온 CT 데이터를 얻어와 파이썬 객체로 변환해서 3차원 결절 밀도 데이터로 사용할 수 있도록 만드는 작업
- 결절 어노테이션 정보는 원본 데이터에서 얻어내고자 하는 영역에 대한 맵

In [13]:
class Ct:
    def __init__(self, series_uid):
        mhd_path = glob.glob(os.path.join(os.path.expanduser("~"), f"Downloads/*/*/{series_uid}.mhd"))[0]
        ct_mhd = sitk.ReadImage(mhd_path)
        ct_a = np.array(sitk.GetArrayFromImage(ct_mhd), dtype=np.float32)

#### 10.3.1 하운스필드 단위

In [14]:
class Ct:
    def __init__(self, series_uid):
        mhd_path = glob.glob(os.path.join(os.path.expanduser("~"), f"Downloads/*/*/{series_uid}.mhd"))[0]
        ct_mhd = sitk.ReadImage(mhd_path)
        ct_a = np.array(sitk.GetArrayFromImage(ct_mhd), dtype=np.float32)
        # HU 제거 (시야에 해당하는 값만 남기고, 이외는 모두 제거)
        ct_a.clip(-1000, 1000, ct_a)
        
        self.serires_uid = series_uid
        self.hu_a = ct_a

### 10.4 환자 좌표계를 사용해 결절 위치 정하기
- 통상적으로 모델은 고정된 크기의 입력을 필요로 함 (뉴런 수가 고정되어 있기때문)

#### 10.4.1 환자 좌표계
- 밀리미터 기반 `(X, Y, Z)`를 복셀 주소 기반 `(I, R, C)`로 변환
- `X`는 환자의 왼쪽, `Y`는 뒤쪽(후면), `Z`는 머리(상부)
  - 왼쪽-후면-상부(LPS, left-posterior-superior)
- 해부학적으로 관심있는 위치를 지정하기 위해 사용
- CT 배열과 환자 좌표계 사이의 관계를 정의하는 메타데이터는 파일 헤더에 저장

#### 10.4.2. CT 스캔 형태와 복셀 크기
- 메르카토르(Mercator)식 세계 지도와 유사하게, 실제 비율을 보기 위해서는 비율 계수(scale factor)를 적용

#### 10.4.3 밀리미터를 복셀 주소로 변환하기
1. 좌표를 XYZ 체계로 만들기 위해 IRC에서 CRI로 뒤집는다
2. 인덱스를 복셀 크기로 확대축소
3. 파이썬의 `@`를 이용해 방향을 나타내는 행렬과 행렬곱을 수행
4. 기준으로부터 오프셋을 더함

In [15]:
IrcTuple = collections.namedtuple("IrcTuple", ["index", "row", "col"])
XyzTuple = collections.namedtuple("XyzTuple", ["x", "y", "z"])

def irc2xyz(coord_irc, origin_xyz, vxSize_xyz, direction_a):
    cri_a = np.array(coord_irc)[::-1] # 넘파이 배열로 변환하며 순서를 바꿈
    origin_a = np.array(origin_xyz)
    vxSize_a = np.array(vxSize_xyz)
    coords_xyz = (direction_a @ (cri_a * vxSize_a)) + origin_a
    return XyzTuple(*coords_xyz)

def xyz2irc(coord_xyz, origin_xyz, vxSize_xyz, direction_a):
    origin_a = np.array(origin_xyz)
    vxSize_a = np.array(vxSize_xyz)
    coord_a = np.array(coord_xyz)
    cri_a = ((coord_a - origin_a) @ np.linalg.inv(direction_a)) / vxSize_a
    cri_a = np.round(cri_a)
    return IrcTuple(int(cri_a[2])), int(cri_a[1]), int(cri_a[0])

In [16]:
class Ct:
    def __init__(self, series_uid):
        mhd_path = glob.glob(os.path.join(os.path.expanduser("~"), f"Downloads/*/*/{series_uid}.mhd"))[0]
        ct_mhd = sitk.ReadImage(mhd_path)
        ct_a = np.array(sitk.GetArrayFromImage(ct_mhd), dtype=np.float32)
        # HU 제거 (시야에 해당하는 값만 남기고, 이외는 모두 제거)
        ct_a.clip(-1000, 1000, ct_a)
        
        self.serires_uid = series_uid
        self.hu_a = ct_a
        
        self.origin_xyz = XyzTuple(*ct_mhd.GetOrigin())
        self.vxSize_xyz = XyzTuple(*ct_mhd.GetSpacing())
        self.direction_a = np.array(ct_mhd.GetDirection()).reshape(3, 3)

#### 10.4.4 CT 스캔에서 결절 추출하기
- 각 후부 영역을 추출해 모델이 한 번에 한 영역에 집중할 수 있도록 만듬

In [17]:
class Ct:
    def __init__(self, series_uid):
        mhd_path = glob.glob(os.path.join(os.path.expanduser("~"), f"Downloads/*/*/{series_uid}.mhd"))[0]
        ct_mhd = sitk.ReadImage(mhd_path)
        ct_a = np.array(sitk.GetArrayFromImage(ct_mhd), dtype=np.float32)
        # HU 제거 (시야에 해당하는 값만 남기고, 이외는 모두 제거)
        ct_a.clip(-1000, 1000, ct_a)
        
        self.serires_uid = series_uid
        self.hu_a = ct_a
        
        self.origin_xyz = XyzTuple(*ct_mhd.GetOrigin())
        self.vxSize_xyz = XyzTuple(*ct_mhd.GetSpacing())
        self.direction_a = np.array(ct_mhd.GetDirection()).reshape(3, 3)
        
    def getRawCandidate(self, center_xyz, width_irc):
        center_irc = xyz2irc(
            center_xyz,
            self.origin_xyz,
            self.vxSize_xyz,
            self.direction_a
        )
        
        slice_list = []
        for axis, center_val in enumerate(center_irc):
            start_ndx = int(round(center_val - width_irc[axis]/2))
            end_ndx = int(start_ndx + width_irc[axis])
            slice_list.append(slice(start_ndx, end_ndx))
        
        ct_chunk = self.hu_a[tuple(slice_list)]
        
        return ct_chunk, center_irc

### 10.5 간단한 데이터셋 구현


In [22]:
class LunaDataset(Dataset):
    def __init__(self,
                 val_stride=0,
                 isValSet_bool=None,
                 series_uid=None,
            ):
        self.candidateInfo_list = copy.copy(getCandidateInfoList())

        if series_uid:
            self.candidateInfo_list = [
                x for x in self.candidateInfo_list if x.series_uid == series_uid
            ]

        if isValSet_bool:
            assert val_stride > 0, val_stride
            self.candidateInfo_list = self.candidateInfo_list[::val_stride]
            assert self.candidateInfo_list
        elif val_stride > 0:
            del self.candidateInfo_list[::val_stride]
            assert self.candidateInfo_list

        log.info("{!r}: {} {} samples".format(
            self,
            len(self.candidateInfo_list),
            "validation" if isValSet_bool else "training",
        ))

    def __len__(self):
        return len(self.candidateInfo_list)

    def __getitem__(self, ndx):
        candidateInfo_tup = self.candidateInfo_list[ndx]
        width_irc = (32, 48, 48)

        candidate_a, center_irc = getCtRawCandidate(
            candidateInfo_tup.series_uid,
            candidateInfo_tup.center_xyz,
            width_irc,
        )

        candidate_t = torch.from_numpy(candidate_a)
        candidate_t = candidate_t.to(torch.float32)
        candidate_t = candidate_t.unsqueeze(0)

        pos_t = torch.tensor([
                not candidateInfo_tup.isNodule_bool,
                candidateInfo_tup.isNodule_bool
            ],
            dtype=torch.long,
        )

        return (
            candidate_t,
            pos_t,
            candidateInfo_tup.series_uid,
            torch.tensor(center_irc),
        )

#### 10.5.1 geCtRawCandidate 함수로 후보 배열 캐싱하기

In [21]:
@functools.lru_cache(1, typed=True)
def getCt(series_uid):
    return Ct(series_uid)

def getCtRawCandidate(series_uid, center_xyz, width_irc):
    ct = getCt(series_uid)
    ct_chunk, center_irc = ct.getRawCandidate(center_xyz, width_irc)
    return ct_chunk, center_irc

---

### dsets.py

In [23]:
import copy
import csv
import functools
import glob
import os

from collections import namedtuple

import SimpleITK as sitk
import numpy as np

import torch
import torch.cuda
from torch.utils.data import Dataset

from util.disk import getCache
from util.util import XyzTuple, xyz2irc
from util.logconf import logging

log = logging.getLogger(__name__)
# log.setLevel(logging.WARN)
# log.setLevel(logging.INFO)
log.setLevel(logging.DEBUG)

raw_cache = getCache('part2ch10_raw')

CandidateInfoTuple = namedtuple(
    'CandidateInfoTuple',
    'isNodule_bool, diameter_mm, series_uid, center_xyz',
)

@functools.lru_cache(1)
def getCandidateInfoList(requireOnDisk_bool=True):
    # We construct a set with all series_uids that are present on disk.
    # This will let us use the data, even if we haven't downloaded all of
    # the subsets yet.
    mhd_list = glob.glob('data-unversioned/part2/luna/subset*/*.mhd')
    presentOnDisk_set = {os.path.split(p)[-1][:-4] for p in mhd_list}

    diameter_dict = {}
    with open('data/part2/luna/annotations.csv', "r") as f:
        for row in list(csv.reader(f))[1:]:
            series_uid = row[0]
            annotationCenter_xyz = tuple([float(x) for x in row[1:4]])
            annotationDiameter_mm = float(row[4])

            diameter_dict.setdefault(series_uid, []).append(
                (annotationCenter_xyz, annotationDiameter_mm)
            )

    candidateInfo_list = []
    with open('data/part2/luna/candidates.csv', "r") as f:
        for row in list(csv.reader(f))[1:]:
            series_uid = row[0]

            if series_uid not in presentOnDisk_set and requireOnDisk_bool:
                continue

            isNodule_bool = bool(int(row[4]))
            candidateCenter_xyz = tuple([float(x) for x in row[1:4]])

            candidateDiameter_mm = 0.0
            for annotation_tup in diameter_dict.get(series_uid, []):
                annotationCenter_xyz, annotationDiameter_mm = annotation_tup
                for i in range(3):
                    delta_mm = abs(candidateCenter_xyz[i] - annotationCenter_xyz[i])
                    if delta_mm > annotationDiameter_mm / 4:
                        break
                else:
                    candidateDiameter_mm = annotationDiameter_mm
                    break

            candidateInfo_list.append(CandidateInfoTuple(
                isNodule_bool,
                candidateDiameter_mm,
                series_uid,
                candidateCenter_xyz,
            ))

    candidateInfo_list.sort(reverse=True)
    return candidateInfo_list

class Ct:
    def __init__(self, series_uid):
        mhd_path = glob.glob(
            'data-unversioned/part2/luna/subset*/{}.mhd'.format(series_uid)
        )[0]

        ct_mhd = sitk.ReadImage(mhd_path)
        ct_a = np.array(sitk.GetArrayFromImage(ct_mhd), dtype=np.float32)

        # CTs are natively expressed in https://en.wikipedia.org/wiki/Hounsfield_scale
        # HU are scaled oddly, with 0 g/cc (air, approximately) being -1000 and 1 g/cc (water) being 0.
        # The lower bound gets rid of negative density stuff used to indicate out-of-FOV
        # The upper bound nukes any weird hotspots and clamps bone down
        ct_a.clip(-1000, 1000, ct_a)

        self.series_uid = series_uid
        self.hu_a = ct_a

        self.origin_xyz = XyzTuple(*ct_mhd.GetOrigin())
        self.vxSize_xyz = XyzTuple(*ct_mhd.GetSpacing())
        self.direction_a = np.array(ct_mhd.GetDirection()).reshape(3, 3)

    def getRawCandidate(self, center_xyz, width_irc):
        center_irc = xyz2irc(
            center_xyz,
            self.origin_xyz,
            self.vxSize_xyz,
            self.direction_a,
        )

        slice_list = []
        for axis, center_val in enumerate(center_irc):
            start_ndx = int(round(center_val - width_irc[axis]/2))
            end_ndx = int(start_ndx + width_irc[axis])

            assert center_val >= 0 and center_val < self.hu_a.shape[axis], repr([self.series_uid, center_xyz, self.origin_xyz, self.vxSize_xyz, center_irc, axis])

            if start_ndx < 0:
                # log.warning("Crop outside of CT array: {} {}, center:{} shape:{} width:{}".format(
                #     self.series_uid, center_xyz, center_irc, self.hu_a.shape, width_irc))
                start_ndx = 0
                end_ndx = int(width_irc[axis])

            if end_ndx > self.hu_a.shape[axis]:
                # log.warning("Crop outside of CT array: {} {}, center:{} shape:{} width:{}".format(
                #     self.series_uid, center_xyz, center_irc, self.hu_a.shape, width_irc))
                end_ndx = self.hu_a.shape[axis]
                start_ndx = int(self.hu_a.shape[axis] - width_irc[axis])

            slice_list.append(slice(start_ndx, end_ndx))

        ct_chunk = self.hu_a[tuple(slice_list)]

        return ct_chunk, center_irc


@functools.lru_cache(1, typed=True)
def getCt(series_uid):
    return Ct(series_uid)

@raw_cache.memoize(typed=True)
def getCtRawCandidate(series_uid, center_xyz, width_irc):
    ct = getCt(series_uid)
    ct_chunk, center_irc = ct.getRawCandidate(center_xyz, width_irc)
    return ct_chunk, center_irc

class LunaDataset(Dataset):
    def __init__(self,
                 val_stride=0,
                 isValSet_bool=None,
                 series_uid=None,
            ):
        self.candidateInfo_list = copy.copy(getCandidateInfoList())

        if series_uid:
            self.candidateInfo_list = [
                x for x in self.candidateInfo_list if x.series_uid == series_uid
            ]

        if isValSet_bool:
            assert val_stride > 0, val_stride
            self.candidateInfo_list = self.candidateInfo_list[::val_stride]
            assert self.candidateInfo_list
        elif val_stride > 0:
            del self.candidateInfo_list[::val_stride]
            assert self.candidateInfo_list

        log.info("{!r}: {} {} samples".format(
            self,
            len(self.candidateInfo_list),
            "validation" if isValSet_bool else "training",
        ))

    def __len__(self):
        return len(self.candidateInfo_list)

    def __getitem__(self, ndx):
        candidateInfo_tup = self.candidateInfo_list[ndx]
        width_irc = (32, 48, 48)

        candidate_a, center_irc = getCtRawCandidate(
            candidateInfo_tup.series_uid,
            candidateInfo_tup.center_xyz,
            width_irc,
        )

        candidate_t = torch.from_numpy(candidate_a)
        candidate_t = candidate_t.to(torch.float32)
        candidate_t = candidate_t.unsqueeze(0)

        pos_t = torch.tensor([
                not candidateInfo_tup.isNodule_bool,
                candidateInfo_tup.isNodule_bool
            ],
            dtype=torch.long,
        )

        return (
            candidate_t,
            pos_t,
            candidateInfo_tup.series_uid,
            torch.tensor(center_irc),
        )

### vis.py

In [27]:
import matplotlib
matplotlib.use('nbagg')

import numpy as np
import matplotlib.pyplot as plt

clim=(-1000.0, 300)

def findPositiveSamples(start_ndx=0, limit=100):
    ds = LunaDataset()

    positiveSample_list = []
    for sample_tup in ds.candidateInfo_list:
        if sample_tup.isNodule_bool:
            print(len(positiveSample_list), sample_tup)
            positiveSample_list.append(sample_tup)

        if len(positiveSample_list) >= limit:
            break

    return positiveSample_list

def showCandidate(series_uid, batch_ndx=None, **kwargs):
    ds = LunaDataset(series_uid=series_uid, **kwargs)
    pos_list = [i for i, x in enumerate(ds.candidateInfo_list) if x.isNodule_bool]

    if batch_ndx is None:
        if pos_list:
            batch_ndx = pos_list[0]
        else:
            print("Warning: no positive samples found; using first negative sample.")
            batch_ndx = 0

    ct = Ct(series_uid)
    ct_t, pos_t, series_uid, center_irc = ds[batch_ndx]
    ct_a = ct_t[0].numpy()

    fig = plt.figure(figsize=(30, 50))

    group_list = [
        [9, 11, 13],
        [15, 16, 17],
        [19, 21, 23],
    ]

    subplot = fig.add_subplot(len(group_list) + 2, 3, 1)
    subplot.set_title('index {}'.format(int(center_irc[0])), fontsize=30)
    for label in (subplot.get_xticklabels() + subplot.get_yticklabels()):
        label.set_fontsize(20)
    plt.imshow(ct.hu_a[int(center_irc[0])], clim=clim, cmap='gray')

    subplot = fig.add_subplot(len(group_list) + 2, 3, 2)
    subplot.set_title('row {}'.format(int(center_irc[1])), fontsize=30)
    for label in (subplot.get_xticklabels() + subplot.get_yticklabels()):
        label.set_fontsize(20)
    plt.imshow(ct.hu_a[:,int(center_irc[1])], clim=clim, cmap='gray')
    plt.gca().invert_yaxis()

    subplot = fig.add_subplot(len(group_list) + 2, 3, 3)
    subplot.set_title('col {}'.format(int(center_irc[2])), fontsize=30)
    for label in (subplot.get_xticklabels() + subplot.get_yticklabels()):
        label.set_fontsize(20)
    plt.imshow(ct.hu_a[:,:,int(center_irc[2])], clim=clim, cmap='gray')
    plt.gca().invert_yaxis()

    subplot = fig.add_subplot(len(group_list) + 2, 3, 4)
    subplot.set_title('index {}'.format(int(center_irc[0])), fontsize=30)
    for label in (subplot.get_xticklabels() + subplot.get_yticklabels()):
        label.set_fontsize(20)
    plt.imshow(ct_a[ct_a.shape[0]//2], clim=clim, cmap='gray')

    subplot = fig.add_subplot(len(group_list) + 2, 3, 5)
    subplot.set_title('row {}'.format(int(center_irc[1])), fontsize=30)
    for label in (subplot.get_xticklabels() + subplot.get_yticklabels()):
        label.set_fontsize(20)
    plt.imshow(ct_a[:,ct_a.shape[1]//2], clim=clim, cmap='gray')
    plt.gca().invert_yaxis()

    subplot = fig.add_subplot(len(group_list) + 2, 3, 6)
    subplot.set_title('col {}'.format(int(center_irc[2])), fontsize=30)
    for label in (subplot.get_xticklabels() + subplot.get_yticklabels()):
        label.set_fontsize(20)
    plt.imshow(ct_a[:,:,ct_a.shape[2]//2], clim=clim, cmap='gray')
    plt.gca().invert_yaxis()

    for row, index_list in enumerate(group_list):
        for col, index in enumerate(index_list):
            subplot = fig.add_subplot(len(group_list) + 2, 3, row * 3 + col + 7)
            subplot.set_title('slice {}'.format(index), fontsize=30)
            for label in (subplot.get_xticklabels() + subplot.get_yticklabels()):
                label.set_fontsize(20)
            plt.imshow(ct_a[index], clim=clim, cmap='gray')


    print(series_uid, batch_ndx, bool(pos_t[0]), pos_list)


---
### Inline

In [25]:
candidateInfo_list = getCandidateInfoList(requireOnDisk_bool=False)
positiveInfo_list = [x for x in candidateInfo_list if x[0]]
diameter_list = [x[1] for x in positiveInfo_list]

In [26]:
for i in range(0, len(diameter_list), 100):
    print("{:4} {:4.1f} mm".format(i, diameter_list[i]))

   0 32.3 mm
 100 17.7 mm
 200 13.0 mm
 300 10.0 mm
 400  8.2 mm
 500  7.0 mm
 600  6.3 mm
 700  5.7 mm
 800  5.1 mm
 900  4.7 mm
1000  4.0 mm
1100  0.0 mm
1200  0.0 mm
1300  0.0 mm


#### 10.5.4 데이터 렌더링

In [29]:
%matplotlib inline
positiveSample_list = findPositiveSamples()

2023-01-14 16:29:49,507 INFO     pid:36084 __main__:170:__init__ <__main__.LunaDataset object at 0x0000020868C23220>: 0 training samples


## 11. 종양 탐지를 위한 분류 모델 훈련 

### 11.1 기본 모델과 훈련 루프
- 모델을 초기화하고 데이터를 로딩

### 11.2 애플리케이션의 메인 진입점
이후 과정들은 제공되는 코드들 이용