In [1]:
import numpy as np
import pandas as pd
import pickle
import os
import time
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss

import tensorflow as tf

from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.3
set_session(tf.Session(config=config))

from keras.models import load_model
PATH_MODEL = "../Models/LUNA_model_v2_2.h5"
model2 = load_model(PATH_MODEL)

Using TensorFlow backend.
  return cls(**config)


In [2]:
def feature_vect(patient_array):          

    preds = np.array(model2.predict(x = patient_array))
    
    ixs = np.argmax(preds[0])
    
    xmax_malig = np.max(preds[0], axis=0)
    xmax_spiculation = np.max(preds[1], axis=0)
    xmax_lobulation = np.max(preds[2], axis=0)
    xmax_diameter = np.max(preds[3], axis=0)
    
    xsd_malig = np.std(preds[0], axis=0)
    xsd_spiculation = np.std(preds[1], axis=0)
    xsd_lobulation = np.std(preds[2], axis=0)
    xsd_diameter = np.std(preds[3], axis=0)

    feats = (np.concatenate([xmax_malig,xmax_spiculation,xmax_lobulation,xmax_diameter,\
               xsd_malig,xsd_spiculation,xmax_lobulation,xsd_diameter])) 
    
    return feats

In [3]:
with open('../../data/voxel_to_patient_dict.pickle', 'rb') as handle:
    voxel_to_patient_dict = pickle.load(handle)

In [4]:
start = time.time()

all_features = []
labels = []

for directory in [d for d in os.listdir('../../data/') if 'subset' in d]:
    print (directory)
    patients_by_dir = [f.replace('.mhd','') for f in os.listdir('../../data/'+directory) if '.mhd' in f]
    Xtrue = np.load('../../data/LUNA_voxels/'+directory+'Xtrue.npy')
    Xrandom = np.load('../../data/LUNA_voxels/'+directory+'Xrandom.npy')
    count={'true':0,'random':0}
    
    for num, patient in enumerate(patients_by_dir):
        print (patient)
        array = []
        
        try:
            
            lowerBoundary = count['true']
            upperBoundary = count['true'] + int(voxel_to_patient_dict[directory][patient]['true'])
            
            print ('\tLower boundary for True is ',lowerBoundary)
            print ('\tUpper boundary for True is ',upperBoundary)
            
            array.append(Xtrue[lowerBoundary:upperBoundary])
            labels.append(1)
            
            count['true'] = upperBoundary
            
            print(1)
            
        except:
            
            labels.append(0)
            print(0)
        
        lowerBoundary = count['random']
        upperBoundary = count['random'] + int(voxel_to_patient_dict[directory][patient]['random'])

        print ('\tLower boundary for Random is ',lowerBoundary)
        print ('\tUpper boundary for Random is ',upperBoundary)

        array.append(Xrandom[lowerBoundary:upperBoundary])        
        count['random'] = upperBoundary
        
        array = np.expand_dims(np.vstack(array),1)
        array = (array+1000.)/1400.
        array = np.clip(array,0,1)
        
        print ('Array shape',array.shape)
        
        feats = feature_vect(array)
        all_features.append(feats)
        
#         if num%100==0:
#             print ("Patient %4d:" %num, patient)
    
X = np.stack(all_features)

col=['max_malig','max_spiculation','max_lobulation','max_diameter',\
     'xsd_malig', 'xsd_spiculation', 'xmax_lobulation','xsd_diameter']

df = pd.DataFrame(data=X, columns=col)
df['labels'] = labels

df.to_csv('./feature_matrix_model2_luna.csv')

print ("It took %d seconds"%(time.time()-start))

subset2
1.3.6.1.4.1.14519.5.2.1.6279.6001.156579001330474859527530187095
0
	Lower boundary for Random is  0
	Upper boundary for Random is  6
Array shape (6, 1, 64, 64, 64)
1.3.6.1.4.1.14519.5.2.1.6279.6001.133378195429627807109985347209
	Lower boundary for True is  0
	Upper boundary for True is  12
1
	Lower boundary for Random is  6
	Upper boundary for Random is  18
Array shape (24, 1, 64, 64, 64)
1.3.6.1.4.1.14519.5.2.1.6279.6001.216526102138308489357443843021
0
	Lower boundary for Random is  18
	Upper boundary for Random is  24
Array shape (6, 1, 64, 64, 64)
1.3.6.1.4.1.14519.5.2.1.6279.6001.191301539558980174217770205256
0
	Lower boundary for Random is  24
	Upper boundary for Random is  30
Array shape (6, 1, 64, 64, 64)
1.3.6.1.4.1.14519.5.2.1.6279.6001.172845185165807139298420209778
0
	Lower boundary for Random is  30
	Upper boundary for Random is  36
Array shape (6, 1, 64, 64, 64)
1.3.6.1.4.1.14519.5.2.1.6279.6001.227796349777753378641347819780
	Lower boundary for True is  12
	Upp

In [5]:
X.shape

(888, 8)

In [8]:
df

Unnamed: 0,max_malig,max_spiculation,max_lobulation,max_diameter,xsd_malig,xsd_spiculation,xmax_lobulation,xsd_diameter,labels
0,0.496121,0.371900,0.358551,0.293208,0.099515,0.083970,0.358551,0.062682,0
1,0.512953,0.343098,0.334395,0.341244,0.090750,0.078329,0.334395,0.058964,1
2,0.447176,0.376131,0.333259,0.276387,0.020049,0.083681,0.333259,0.049480,0
3,0.442950,0.374566,0.366240,0.283358,0.084090,0.095962,0.366240,0.053300,0
4,0.431455,0.212259,0.308027,0.283699,0.083341,0.042448,0.308027,0.052760,0
5,0.507354,0.348568,0.361303,0.296878,0.113290,0.062295,0.361303,0.051358,1
6,0.514261,0.312580,0.342795,0.295014,0.127858,0.068341,0.342795,0.059625,0
7,0.481898,0.271938,0.341182,0.320418,0.130495,0.055012,0.341182,0.072845,1
8,0.531098,0.367647,0.318483,0.339589,0.135875,0.096811,0.318483,0.063378,1
9,0.522617,0.370673,0.375371,0.318399,0.112951,0.076792,0.375371,0.057575,1
