In [None]:
import os
import cv2
import glob
import ntpath
import numpy
import pandas
import SimpleITK

In [None]:
get_tp_patients()

In [None]:
def get_tp_patients():
                      
    for subset_no in range(0, 10):

        src_path = "G:/LungCancerPredict/original/luna_raw/" + "subset" + str(subset_no) + "/"
        src_files = glob.glob(src_path + "*.mhd")
        
        count=1
        
        for src_file in src_files:
            
            print("Counter:",count)
            
            get_tp_patient(src_file)  
            
            count+=1

def get_tp_patient(src_file):

    patient_id = ntpath.basename(src_file).replace(".mhd", "")
    
    df_labels = pandas.read_csv("G:/LungCancerPredict/resources/luna16_annotations/annotations.csv")
    print("Search From:",df_labels.shape)

    dst_path = "G:/LungCancerPredict/extracted/luna16_extracted_images/_labels/"
    os.makedirs(dst_path, exist_ok=True)

    #读取数据
    itk_img = SimpleITK.ReadImage(src_file)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    num_z, height, width = img_array.shape  
    spacing = numpy.array(itk_img.GetSpacing())    
    origin = numpy.array(itk_img.GetOrigin())      
    direction = numpy.array(itk_img.GetDirection())  
    
    #读取该病人Nodule数据
    df_nodules = df_labels[df_labels["seriesuid"] == patient_id]
    print("Nodule Number: ", len(df_nodules))

    #图片方向处理
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0], direction[0], flip_direction_x = -origin[0], 1, True
    if round(direction[4]) == -1:
        origin[1], direction[4], flip_direction_y = -origin[1], 1, True
    assert abs(sum(direction) - 3) < 0.01
    
    #读取重采样之后的CT图像
    imgs = load_patient_images(patient_id)
    print("patient_imgs_shape：", imgs.shape)
    
    #存储该病人的tp_nodule
    labels = []
    counter = 0
    
    for index, annotation in df_nodules.iterrows():
        
        #读取第i个Nodule信息
        node_x = annotation["coordX"]
        node_y = annotation["coordY"]
        node_z = annotation["coordZ"]
        diam_mm = annotation["diameter_mm"]
        node_x *= -1 if flip_direction_x else 1
        node_y *= -1 if flip_direction_y else 1
        
        ##计算Nodule在重采样图片上的实际坐标，并表示为百分比的形式
        center_float = numpy.array([node_x, node_y, node_z])
        center_float_rescaled = (center_float - origin) / 1.00
        center_float_percent = center_float_rescaled / imgs.swapaxes(0, 2).shape
        diameter_pixels = diam_mm / 1.00
        diameter_percent = diameter_pixels / float(imgs.shape[1])
        print("(x,y,z，diam): ", (center_float_percent[0], center_float_percent[1], center_float_percent[2], diameter_percent))
        
        labels.append([counter, round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(diameter_percent, 4), int(diameter_pixels)])
        counter += 1
        
    df_labels = pandas.DataFrame(labels, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"])
    df_labels.to_csv(dst_path + patient_id + "_tp_nodule.csv", index=False)

def load_patient_images(patient_id):

    src_path = "G:/LungCancerPredict/extracted/luna16_extracted_images/" + patient_id + "/"
    src_files = glob.glob(src_path + "*_i.png")
    
    src_files.sort()
    
    imgs = [cv2.imread(src_file, cv2.IMREAD_GRAYSCALE) for src_file in src_files]
    imgs = [img.reshape((1, ) + img.shape) for img in imgs]
    
    res = numpy.vstack(imgs)
    
    return res