In [1]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.48
set_session(tf.Session(config=config))

from keras.models import load_model
PATH_MODEL = "../Models/LUNA_model_v2_2.h5"
model_v24 = load_model(PATH_MODEL)

Using TensorFlow backend.
  return cls(**config)


In [2]:
import numpy as np
import pandas as pd
import os
import time
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
import matplotlib.pyplot as plt
%matplotlib inline

PATH_VOXELS = '../../data/stage1_voxels_mask/'

In [None]:
def feature_vect(patient):          
    patient_array = np.load(PATH_VOXELS + patient)
    voxels = patient_array['vox']  
    preds = np.array(model_v24.predict(x= voxels))
    ixs = np.argmax(preds[0])
    
    xmax_malig = np.max(preds[0], axis=0)
    xmax_spiculation = np.max(preds[1], axis=0)
    xmax_lobulation = np.max(preds[2], axis=0)
    xmax_diameter = np.max(preds[3], axis=0)
    
    xsd_malig = np.std(preds[0], axis=0)
    xsd_spiculation = np.std(preds[1], axis=0)
    xsd_lobulation = np.std(preds[2], axis=0)
    xsd_diameter = np.std(preds[3], axis=0)
    
    centroids = patient_array['cents']
    shape = patient_array['shape']
    normalized_locs = centroids.astype('float32') / shape.astype('float32')
    
    feats = (np.concatenate([xmax_malig,xmax_spiculation,xmax_lobulation,xmax_diameter,\
               xsd_malig,xsd_spiculation,xmax_lobulation,xsd_diameter,\
               normalized_locs[ixs],normalized_locs.std(axis=0)]))        
    return feats

In [None]:
start = time.time()
patients = [f for f in os.listdir(PATH_VOXELS)]
print ("patient numbers: ", len(patients))

all_features = []
for num, patient in enumerate(patients):
    feats = feature_vect(patient)
    all_features.append(feats)
    X = np.stack(all_features)
    if num%100==0:
        print ("Patient %4d:" %num, patient[:-4])
        
col=['max_malig','max_spiculation','max_lobulation','max_diameter',\
     'xsd_malig', 'xsd_spiculation', 'xmax_lobulation','xsd_diameter',\
     'loc_from_malig_x','loc_from_malig_y','loc_from_malig_z','std_locs_x','std_locs_y','std_locs_z',]
df = pd.DataFrame(data=X,index=patients, columns=col)
df.to_csv('./model_v24_2_feature_matrix_14.csv')
print ("It took %d seconds"%(time.time()-start))

patient numbers:  1434
Patient    0: d777a77cc7a2ec2f1eed68799cc9075c
Patient  100: 3dfe8e80106f4136d2933ff72a16035c
Patient  200: e5cf847e616cc2fe94816ffa547d2614
Patient  300: aa594c8968f06fcf374e76bc9a375477
Patient  400: fb52dd8152e53a4ca7da5403d6d0db13
Patient  500: 4d7df08f074b221eec6311c2617a5ba8
Patient  600: c8a82a4e7e76702f5f10ece6508f2bbe
Patient  700: f82560aeea0309873716efe3aa71ef0a
Patient  800: 25c1c4f008e8addc4d386cab58815052
