## 肺癌のセグメンテーションと良悪性の判別モデル

１．CTデータをpytorchで扱えるように

２．肺の腫瘍のみをセグメンテーション（タスクの焦点を絞るために）

３．関心ボクセルをグループ化して結節候補をまとめる

４．結節候補をConvalutonで分類する

５．結節ごとを患者ごとへの評価にする（今回は悪性度の最大値）

こぶの内悪性が結節
annotationsは結節フラグの大きさ
candidatesはすべてのこぶ情報

## annotaitonとcandidateで位置の統合

In [2]:
from collections import namedtuple
CandidateInfoTuple = namedtuple(
    'CandidateInfoTuple',
    'isNodule_bool, diameter_mm, series_uid, center_xyz',
)

In [3]:
CandidateInfoTuple

__main__.CandidateInfoTuple

In [73]:
#CTデータの内mhdファイルをリストにする
#メモリに残す関数:lru_cache

import functools
import glob
import os

#mhd_list = glob.glob('data-unversioned/part2/luna/subset*/*.mhd')
mhd_list = glob.glob('../deep-learning-with-pytorch-ja/data/part2/luna/subset*/*.mhd')

presentOnDisk_set = {os.path.split(p)[-1][:-4] for p in mhd_list}

In [74]:
presentOnDisk_set

{'1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.108197895896446896160048741492',
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524522225658609808059'}

In [47]:
import csv


diameter_dict = {}
with open('../deep-learning-with-pytorch-ja/data/part2/luna/annotations.csv', "r") as f:
        for row in list(csv.reader(f))[1:]:
            series_uid = row[0]
            annotationCenter_xyz = tuple([float(x) for x in row[1:4]])
            annotationDiameter_mm = float(row[4])

            diameter_dict.setdefault(series_uid, []).append(
                (annotationCenter_xyz, annotationDiameter_mm)
            )

In [16]:
#diameter_dict

#uid一つに複数のこぶがある場合もあり、こぶの中心座標と大きさがvalueに入る。

In [30]:
a="1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016233746780170740405"

In [38]:
#辞書のaの要素を返す。なければ台に引数を返す。デフォはNone
b=diameter_dict.get(a,[])
print(b)
s=diameter_dict.get("lll",[])
print(s)
f=diameter_dict.get("lll")
print(f)


[((-24.0138242, 192.1024053, -391.0812764), 8.143261683), ((2.441546798, 172.4648812, -405.4937318), 18.54514997), ((90.93171321, 149.0272657, -426.5447146), 18.20857028), ((89.54076865, 196.4051593, -515.0733216), 16.38127631)]
[]
None


In [64]:
#sereis_uidでまわして、個々の結節の直径のデータごとに回して、ｘ、ｙ、ｚの各軸ごとにまわす

candidateInfo_list = []
with open('../deep-learning-with-pytorch-ja/data/part2/luna/candidates.csv', "r") as f:
    for row in list(csv.reader(f))[1:]:
        series_uid = row[0]
#presentOnDisk_setはCTデータのuid
#そこに無いのはスキップすることで直径０として使う
        if series_uid not in presentOnDisk_set :
            continue

        isNodule_bool = bool(int(row[4]))
        candidateCenter_xyz = tuple([float(x) for x in row[1:4]])

        candidateDiameter_mm = 0.0
        for annotation_tup in diameter_dict.get(series_uid, []):
            annotationCenter_xyz, annotationDiameter_mm = annotation_tup
            for i in range(3):
                #距離（差）が大きいものは確認(直径/2/2との比較)
                delta_mm = abs(candidateCenter_xyz[i] - annotationCenter_xyz[i])
                if delta_mm > annotationDiameter_mm / 4:
                    break
            else:
                # 差が小さいものは統一する
                candidateDiameter_mm = annotationDiameter_mm
                break

        candidateInfo_list.append(CandidateInfoTuple(
            isNodule_bool,
            candidateDiameter_mm,
            series_uid,
            candidateCenter_xyz,
        ))

In [65]:
candidateInfo_list[0:5]

[CandidateInfoTuple(isNodule_bool=False, diameter_mm=0.0, series_uid='1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260', center_xyz=(129.56815129, 45.3770967403, -277.835757804)),
 CandidateInfoTuple(isNodule_bool=False, diameter_mm=0.0, series_uid='1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260', center_xyz=(-97.26, 56.36, -201.93)),
 CandidateInfoTuple(isNodule_bool=False, diameter_mm=0.0, series_uid='1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260', center_xyz=(99.2304910833, -4.88216543725, -128.691304564)),
 CandidateInfoTuple(isNodule_bool=False, diameter_mm=0.0, series_uid='1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260', center_xyz=(-15.287525202, -59.1051002263, -251.303750179)),
 CandidateInfoTuple(isNodule_bool=False, diameter_mm=0.0, series_uid='1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260', center_xyz=(109.344666074, 1.89173714531, -237.284698998))]

In [66]:
candidateInfo_list.sort(reverse=True)

In [69]:
candidateInfo_list[1:5]

[CandidateInfoTuple(isNodule_bool=True, diameter_mm=6.440878725, series_uid='1.3.6.1.4.1.14519.5.2.1.6279.6001.108197895896446896160048741492', center_xyz=(-100.46, 68.01, -230.55)),
 CandidateInfoTuple(isNodule_bool=True, diameter_mm=0.0, series_uid='1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524522225658609808059', center_xyz=(44.25, 52.17, -110.25)),
 CandidateInfoTuple(isNodule_bool=True, diameter_mm=0.0, series_uid='1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524522225658609808059', center_xyz=(36.54, 78.1, -122.92)),
 CandidateInfoTuple(isNodule_bool=False, diameter_mm=0.0, series_uid='1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524522225658609808059', center_xyz=(49.0692129504, 23.0584814498, -98.6156878657))]

## CTデータ取込み

In [71]:
import SimpleITK as sitk
import numpy as np

In [86]:
from util.util import XyzTuple, xyz2irc


#series_uidごとにパスをつくり
mhd_path = glob.glob(
    '../deep-learning-with-pytorch-ja/data/part2/luna/subset*/*.mhd'.format(series_uid))[0]
#CTを取り込む
ct_mhd = sitk.ReadImage(mhd_path)
ct_a = np.array(sitk.GetArrayFromImage(ct_mhd), dtype=np.float32)

ct_a.clip(-1000, 1000, ct_a)

series_uid = series_uid
hu_a = ct_a

origin_xyz = XyzTuple(*ct_mhd.GetOrigin())
vxSize_xyz = XyzTuple(*ct_mhd.GetSpacing())
direction_a = np.array(ct_mhd.GetDirection()).reshape(3, 3)
a