In [None]:
import cv2
import math
import glob
import ntpath
import numpy
import pandas
import SimpleITK

In [None]:
get_fp_patients()

In [None]:
def get_fp_patients():
        
    for subset_no in range(0, 10):

        src_path = "G:/LungCancerPredict/original/luna_raw/" + "subset" + str(subset_no) + "/"
        src_files = glob.glob(src_path + "*.mhd")
        
        count=1
        
        for src_file in src_files:
            
            print("Counter:",count)
            
            get_fp_patient(src_file)
            
            count+=1

def get_fp_patient(src_file):
    
    patient_id = ntpath.basename(src_file).replace(".mhd", "")

    dst_path = "G:/LungCancerPredict/extracted/luna16_extracted_images/_labels/"
    df_labels = pandas.read_csv(dst_path + patient_id + "_tp_nodule.csv")
        
    #读取所有fp_label
    df_fp_labels = pandas.read_csv("G:/LungCancerPredict/resources/luna16_annotations/candidates_V2.csv")
    df_fp_labels = df_fp_labels[df_fp_labels["seriesuid"] == patient_id]
    df_fp_labels = df_fp_labels[df_fp_labels["class"] == 0]
    print("Nodule Number: ", len(df_fp_labels))

    #读取数据
    itk_img = SimpleITK.ReadImage(src_file)
    origin = numpy.array(itk_img.GetOrigin()) 
    direction = numpy.array(itk_img.GetDirection())

    #图片方向处理
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0], direction[0], flip_direction_x = -origin[0], 1, True
    if round(direction[4]) == -1:
        origin[1], direction[4], flip_direction_y = -origin[1], 1, True

    #读取重采样之后的CT图像
    imgs = load_patient_images(patient_id)
    print("patient_imgs_shape：", imgs.shape)
    
    #存储该病人的fp_nodule
    labels = []
    
    for index, annotation in df_fp_labels.iterrows():
        
        #读取第i个Nodule信息
        node_x = annotation["coordX"]
        node_y = annotation["coordY"]
        node_z = annotation["coordZ"]
        diam_mm = 6   #直径默认为6mm
        node_x *= -1 if flip_direction_x else 1
        node_y *= -1 if flip_direction_y else 1
        
        #计算Nodule在重采样图片上的实际坐标，并表示为百分比的形式
        center_float = numpy.array([node_x, node_y, node_z])
        center_float_rescaled = (center_float - origin) / 1.00
        center_float_percent = center_float_rescaled / imgs.swapaxes(0, 2).shape
        diameter_pixels = 6 / 1.00    
        diameter_percent = diameter_pixels / float(imgs.shape[1])
        
        #在重采样图上的坐标
        coord_x = center_float_rescaled[0]
        coord_y = center_float_rescaled[1]
        coord_z = center_float_rescaled[2]

        ok = True
        
        #将该fp_nodule与其他所有tp_nodule进行对比
        for index, annotation in df_labels.iterrows():
     
            pos_coord_x = annotation["coord_x"] * imgs.shape[2]
            pos_coord_y = annotation["coord_y"] * imgs.shape[1]
            pos_coord_z = annotation["coord_z"] * imgs.shape[0]
            diameter = annotation["diameter"] * imgs.shape[2]
            
            dist = math.sqrt(math.pow(pos_coord_x - coord_x, 2) + math.pow(pos_coord_y - coord_y, 2) + math.pow(pos_coord_z - coord_z, 2))
            
            if dist < (diameter + 48): ok = False; break
          
        if not ok: continue
        
        labels.append([len(candidate_list), round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(diameter_percent, 4), 0])

    df_labels = pandas.DataFrame(labels, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"])
    df_labels.to_csv(dst_path + patient_id + "_fp_nodule.csv", index=False)

def load_patient_images(patient_id):

    src_path = "G:/LungCancerPredict/extracted/luna16_extracted_images/" + patient_id + "/"
    src_files = glob.glob(src_path + "*_i.png")
    
    src_files.sort()
    
    imgs = [cv2.imread(src_file, cv2.IMREAD_GRAYSCALE) for src_file in src_files]
    imgs = [img.reshape((1, ) + img.shape) for img in imgs]
    
    res = numpy.vstack(imgs)
    
    return res