In [None]:
import os
import cv2
import glob
import numpy
import pandas

In [None]:
detect_nodule()

In [None]:
def detect_nodule():
    
    #读取分类器
    model = get_net(load_weight_path="G:/LungCancerPredict/model/luna16_full_CNN_v1-0.0001.hd5")

    #读取数据里所有病人的ID
    patient_ids = os.listdir("G:/LungCancerPredict/extracted/ndsb3_extracted_images/")

    for patient_index, patient_id in enumerate(patient_ids):
        
        print("patient_id: ",patient_id)
    
        csv_target_path = "G:/LungCancerPredict/detected/cnn_detect_ndsb3/" + patient_id + ".csv"
        
        #读取CT图像和Mask图像，若想放大后检测可加rescale_image函数
        patient_img = load_patient_images(patient_id)
        patient_mask = load_patient_masks(patient_id)
     
        #scans_res存储预测结果
        step = 12; crop_size = 32
        scans_mov = [(patient_img.shape[dim] - crop_size) // step + 1 for dim in range(3)]
        scans_num = scans_mov[0] * scans_mov[1] * scans_mov[2] 
        scans_res = numpy.zeros(shape=(scans_mov[0],scans_mov[1],scans_mov[2]), dtype=float)
        print("scans_res shape:", scans_res.shape, "scans_num:", scans_num)

        #统计变量初始化
        done_count = 0; skipped_count = 0; batch_size = 128
        batch_list = []; batch_list_coords = []; res_to_csv = []
             
        for z, y, x in numpy.ndindex(tuple(scans_mov)):
        
            cube_img = patient_img[z * step:z * step+crop_size, y * step:y * step + crop_size, x * step:x * step+crop_size]
            cube_mask = patient_mask[z * step:z * step+crop_size, y * step:y * step + crop_size, x * step:x * step+crop_size]

            #如果cube位于肺的外面则直接跳过
            if cube_mask.sum() < 2000: skipped_count += 1; continue

            batch_list.append(normalize_and_expand(cube_img))  
            batch_list_coords.append((z, y, x))
            
            #凑够128个开始预测
            if len(batch_list) % batch_size == 0 or done_count==(scans_num-1):
                 
                out = model.predict(numpy.vstack(batch_list), batch_size=batch_size)
                     
                for i in range(len(out)):
  
                    #存储预测结果
                    scans_res[batch_list_coords[i][0], batch_list_coords[i][1], batch_list_coords[i][2]] = out[i][0]
            
                    #保存被判断为postive的nodule的坐标和Size
                    if out[i][0] > 0.5:
        
                        coord_z = round( (batch_list_coords[i][0] * step + crop_size / 2) / patient_img.shape[0], 4)
                        coord_y = round( (batch_list_coords[i][1] * step + crop_size / 2) / patient_img.shape[1], 4)
                        coord_x = round( (batch_list_coords[i][2] * step + crop_size / 2) / patient_img.shape[2], 4)

                        res_to_csv_line = [len(res_to_csv), coord_x, coord_y, coord_z, round(out[i][0], 4)]
                        res_to_csv.append(res_to_csv_line)
     
                batch_list = []; batch_list_coords = []
                    
            done_count += 1
                    
        df = pandas.DataFrame(res_to_csv, columns=["anno_index", "coord_x", "coord_y", "coord_z", "nodule_chance"])
        
        result_filter(df, patient_mask)

        df.to_csv(csv_target_path, index=False)

def load_patient_images(patient_id):

    src_path = "G:/LungCancerPredict/extracted/ndsb3_extracted_images/" + patient_id + "/"
    src_files = glob.glob(src_path + "*_i.png")
    
    src_files.sort()
    
    imgs = [cv2.imread(src_file, cv2.IMREAD_GRAYSCALE) for src_file in src_files]
    imgs = [img.reshape((1, ) + img.shape) for img in imgs]
    
    res = numpy.vstack(imgs)
    
    return res

def load_patient_masks(patient_id):

    src_path = "G:/LungCancerPredict/extracted/ndsb3_extracted_images/" + patient_id + "/"
    src_files = glob.glob(src_path + "*_m.png")
    
    src_files.sort()
    
    imgs = [cv2.imread(src_file, cv2.IMREAD_GRAYSCALE) for src_file in src_files]
    imgs = [img.reshape((1, ) + img.shape) for img in imgs]
    
    res = numpy.vstack(imgs)
    
    return res

def normalize_and_expand(img):
    
    img = img.astype(numpy.float32)
    img -= 41  # 41 is MEAN_PIXEL_VALUE
    img /= 255.
    img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2], 1)
    
    return img

def result_filter(df, patient_mask):
    
    delete_indices = set()
    crop_size = 32
    
    for index, row in df.iterrows():
        
        z_perc, y_perc, x_perc = row["coord_z"], row["coord_y"], row["coord_x"]

        center_x = int(round(x_perc * patient_mask.shape[2]))
        center_y = int(round(y_perc * patient_mask.shape[1]))
        center_z = int(round(z_perc * patient_mask.shape[0]))

        start_y = center_y - crop_size / 2
        start_x = center_x - crop_size / 2
        
        nodule_in_mask = False
        
        for z_index in [-1, 0, 1]:
            
            img = patient_mask[z_index + center_z]
            
            start_x = int(start_x)
            start_y = int(start_y)
    
            img_roi = img[start_y:start_y+crop_size, start_x:start_x + crop_size]
            
            if img_roi.sum() > 255: nodule_in_mask = True

        if not nodule_in_mask: delete_indices.add(index)
        if center_z < 30: delete_indices.add(index)
        if (z_perc > 0.75 or z_perc < 0.25) and y_perc > 0.85: delete_indices.add(index)
        if center_z < 50 and y_perc < 0.30: delete_indices.add(index)
    
    df.drop(df.index[list(delete_indices)], inplace=True)
    
    return df

In [None]:
from keras.models import Model
from keras.optimizers import SGD 
from keras.metrics import binary_accuracy, binary_crossentropy
from keras.layers import Input, AveragePooling3D, Convolution3D, MaxPooling3D, Flatten

def get_net(input_shape=(32, 32, 32, 1), load_weight_path=None) -> Model:  
    
    inputs = Input(shape=input_shape, name="input_1")
    x = inputs
    x = AveragePooling3D(pool_size=(2, 1, 1), strides=(2, 1, 1), border_mode="same")(x)
    x = Convolution3D(64, 3, 3, 3, activation='relu', border_mode='same', name='conv1', subsample=(1, 1, 1))(x)
    x = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), border_mode='valid', name='pool1')(x)

    # 2nd layer group
    x = Convolution3D(128, 3, 3, 3, activation='relu', border_mode='same', name='conv2', subsample=(1, 1, 1))(x)
    x = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), border_mode='valid', name='pool2')(x)

    # 3rd layer group
    x = Convolution3D(256, 3, 3, 3, activation='relu', border_mode='same', name='conv3a', subsample=(1, 1, 1))(x)
    x = Convolution3D(256, 3, 3, 3, activation='relu', border_mode='same', name='conv3b', subsample=(1, 1, 1))(x)
    x = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), border_mode='valid', name='pool3')(x)

    # 4th layer group
    x = Convolution3D(512, 3, 3, 3, activation='relu', border_mode='same', name='conv4a', subsample=(1, 1, 1))(x)
    x = Convolution3D(512, 3, 3, 3, activation='relu', border_mode='same', name='conv4b', subsample=(1, 1, 1),)(x)
    x = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), border_mode='valid', name='pool4')(x)

    # output
    last64 = Convolution3D(64, 2, 2, 2, activation="relu", name="last_64")(x)
    out_class = Convolution3D(1, 1, 1, 1, activation="sigmoid", name="out_class_last")(last64)
    out_class = Flatten(name="out_class")(out_class)

    model = Model(input=inputs, output=out_class)
    
    if load_weight_path is not None: model.load_weights(load_weight_path, by_name=False)
    
    optimizer = SGD(lr=0.001, momentum=0.9, nesterov=True)
    loss = {"out_class": "binary_crossentropy"}
    metrics = {"out_class": [binary_accuracy, binary_crossentropy]}
    
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    model.summary(line_length=140)

    return model