In [1]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.30
set_session(tf.Session(config=config))

Using TensorFlow backend.


In [2]:
from keras.models import load_model
import numpy as np
import pandas as pd
import os
import time

In [3]:
def feature_vect(model, PATH_VOXELS, patient):
    
    patient_array = np.load(PATH_VOXELS + patient)
    voxels = patient_array['vox']  
    voxels = (voxels+1000.)/1400.
    voxels = np.clip(voxels,0,1)
    
    preds = np.array(model.predict(x = voxels, batch_size=60))
    ixs = np.argmax(preds[0])
    
    xmax_malig = np.max(preds[0], axis=0)
    xmax_spiculation = np.max(preds[1], axis=0)
    xmax_lobulation = np.max(preds[2], axis=0)
    xmax_diameter = np.max(preds[3], axis=0)
    
    xsd_malig = np.std(preds[0], axis=0)
    xsd_spiculation = np.std(preds[1], axis=0)
    xsd_lobulation = np.std(preds[2], axis=0)
    xsd_diameter = np.std(preds[3], axis=0)
    
    centroids = patient_array['cents']
    shape = patient_array['shape']
    normalized_locs = centroids.astype('float32') / shape.astype('float32')
    
    feats = (np.concatenate([xmax_malig,xmax_spiculation,xmax_lobulation,xmax_diameter,\
               xsd_malig,xsd_spiculation,xmax_lobulation,xsd_diameter,\
               normalized_locs[ixs],normalized_locs.std(axis=0)]))        
    return feats


def score_model(PATH_MODEL,PATH_VOXELS, file_name):
    
    model = load_model(PATH_MODEL)
    
    start = time.time()
    patients = [f for f in os.listdir(PATH_VOXELS)]
    print ("patient numbers: ", len(patients))

    all_features = []
    for num, patient in enumerate(patients):
        feats = feature_vect(model,PATH_VOXELS, patient)
        all_features.append(feats)
        X = np.stack(all_features)
        if num%50==0:
            print ("\nPatient %4d:" %num, patient[:-4])
            print ("\tIt took %d seconds"%(time.time()-start))

    col=['max_malig','max_spiculation','max_lobulation','max_diameter',\
         'xsd_malig', 'xsd_spiculation', 'xsd_lobulation','xsd_diameter',\
         'loc_from_malig_x','loc_from_malig_y','loc_from_malig_z','std_locs_x','std_locs_y','std_locs_z',]
    df = pd.DataFrame(data=X,index=patients, columns=col)
    df.to_csv(file_name + '.csv')
    print ("It took %d seconds"%(time.time()-start))

In [4]:
def get_feature_matrix(model_num, stage_num):
    PATH_MODEL = "../Models/LUNA_model_v2_%d.h5"%(model_num)
    PATH_VOXELS = '../../data/stage%d_voxels_mask/'%(stage_num)
    file_name = 'feature_matrix_model%d_stage%d'%(model_num,stage_num)
    score_model(PATH_MODEL,PATH_VOXELS, file_name)

In [5]:
def main():
    get_feature_matrix(model_num=1,stage_num=1)
    get_feature_matrix(model_num=1,stage_num=2)
    get_feature_matrix(model_num=2,stage_num=1)
    get_feature_matrix(model_num=2,stage_num=2)

In [None]:
get_feature_matrix(2,1)

  return cls(**config)


patient numbers:  1434

Patient    0: d777a77cc7a2ec2f1eed68799cc9075c
	It took 62 seconds

Patient  150: 573a661e2d784f9385a3b78c9757ddad
	It took 8372 seconds

Patient  200: e5cf847e616cc2fe94816ffa547d2614
	It took 10230 seconds

Patient  250: b4581f4f4cad292b5a013d35d1c39f24
	It took 12845 seconds

Patient  300: aa594c8968f06fcf374e76bc9a375477
	It took 15830 seconds

Patient  350: 6cb2908fd789700db727dd96526bc342
	It took 18982 seconds

Patient  400: fb52dd8152e53a4ca7da5403d6d0db13
	It took 21878 seconds

Patient  450: 31c090cf66ec7bf090801d14d63199b5
	It took 24626 seconds

Patient  500: 4d7df08f074b221eec6311c2617a5ba8
	It took 26707 seconds

Patient  550: 58e4646cbead2fc4a6a1f86bf806e76f
	It took 28188 seconds

Patient  600: c8a82a4e7e76702f5f10ece6508f2bbe
	It took 29706 seconds

Patient  650: be2be08151ef4d3aebd3ea4fcd5d364b
	It took 31214 seconds

Patient  700: f82560aeea0309873716efe3aa71ef0a
	It took 32712 seconds

Patient  750: 53ddfb7769619035ad7f8f58c72f697c
	It took 3

400 patients  at 02:39 am