In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from copy import deepcopy
import time
import pickle

In [2]:

def top_patients(NUM_patients=1,MODEL='model2_feature_matrix_luna.csv'):
    df = pd.read_csv(MODEL).sort_values(['max_malig'],ascending=[False])
    top_patients_dict = {}
    for i in range(NUM_patients):
        
        patient = df.iloc[i]['Unnamed: 0']
        top_patients_dict[int(patient)] = {}
        
        malignancy = df.iloc[i]['max_malig']
        top_patients_dict[int(patient)]['max_malig'] = malignancy
        
        print ('Patient',i+1,':\t',patient,'\nMalignancy',i+1,':\t',malignancy)
        
        with open('./LUNA_model_v2_LUNA/dict_top_patients.pickle', 'wb') as handle:
            pickle.dump(top_patients_dict,handle,protocol=pickle.HIGHEST_PROTOCOL)
        print ('\tDictionary SAVED for..',patient)
        
    return top_patients_dict


In [3]:

top_patients_dict = top_patients()


Patient 1 :	 472.0 
Malignancy 1 :	 0.533818006516
	Dictionary SAVED for.. 472.0


In [4]:
def get_LUNA_patches(top_patients_DICT):
    labels = []
    num_patients = 0
    
    with open('../data/voxel_to_patient_dict.pickle', 'rb') as handle:
        voxel_to_patient_dict = pickle.load(handle)

    for directory in [d for d in os.listdir('../data/') if 'subset' in d]:
        patients_by_dir = [f.replace('.mhd','') for f in os.listdir('../data/'+directory) if '.mhd' in f]
        Xtrue = np.load('../data/LUNA_voxels/'+directory+'Xtrue.npy')
        Xrandom = np.load('../data/LUNA_voxels/'+directory+'Xrandom.npy')
        count={'true':0,'random':0}

        for num, patient in enumerate(patients_by_dir):
            num_patients += 1
            array = []
            try:

                lowerBoundary = count['true']
                upperBoundary = count['true'] + int(voxel_to_patient_dict[directory][patient]['true'])

                if num_patients == list(top_patients_DICT.keys())[0]+1:
                    print ('FOUND PATIENT',list(top_patients_DICT.keys())[0],'!_______________________')
                    print ('\tLower boundary for True is ',lowerBoundary)
                    print ('\tUpper boundary for True is ',upperBoundary)
                    array.append(Xtrue[lowerBoundary:upperBoundary])
                    labels.append(1)

                count['true'] = upperBoundary

            except:

                labels.append(0)

            lowerBoundary = count['random']
            upperBoundary = count['random'] + int(voxel_to_patient_dict[directory][patient]['random'])

            if num_patients == list(top_patients_DICT.keys())[0]+1:
                print ('\tLower boundary for Random is ',lowerBoundary)
                print ('\tUpper boundary for Random is ',upperBoundary)
                array.append(Xrandom[lowerBoundary:upperBoundary])

            count['random'] = upperBoundary

            if num_patients == list(top_patients_dict.keys())[0]+1:
                break
        if num_patients == list(top_patients_dict.keys())[0]+1:
            break

    array = np.vstack(array)
    print ('Array shape before expansion',array.shape)
    array = np.expand_dims(array,1)
    array = (array+1000.)/1400.
    array = np.clip(array,0,1)
    print ('Array shape',array.shape)
    
    return array

In [5]:

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
from keras.models import load_model

config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.15
set_session(tf.Session(config=config))


Using TensorFlow backend.


In [6]:

def top_patients_predict(top_patients_DICT,MODEL,TOP=1):
    top_patients_dict = deepcopy(top_patients_DICT)
    for patient in top_patients_dict.keys():
        print ('Patient..',patient)
        voxels = get_LUNA_patches(top_patients_dict)
        print ('\tNumber of voxels to predict..',voxels.shape[0])
        
        preds = np.array(MODEL.predict(x=voxels,batch_size=5))
        top_patients_dict[patient]['preds'] = preds
        np.save('./LUNA_model_v2/preds_'+str(int(patient))+'.npy',preds)
        print ('\tVoxels predicted..',len(preds))
        
        top_ixs = np.argsort(preds[0],axis=0)[-TOP:]
        top_ixs = [i[0] for i in top_ixs]
        top_patients_dict[patient]['top_ixs'] = top_ixs
        print ('\tNumber of top voxels for visualization..',len(top_ixs))
        
        top_patients_dict[patient]['top_voxels'] = np.vstack([voxels[i] for i in top_ixs])
        with open('./LUNA_model_v2_LUNA/dict_top_patients_predict.pickle', 'wb') as handle:
            pickle.dump(top_patients_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
        print ('\tDictionary SAVED for..',patient)
        
    return top_patients_dict


In [7]:

LUNA_model_v2 = load_model('../LungCancer/Models/LUNA_model_v2.h5')
top_patients_dict = top_patients_predict(top_patients_dict,LUNA_model_v2,TOP=1)


  return cls(**config)


Patient.. 472
FOUND PATIENT 472 !_______________________
	Lower boundary for True is  192
	Upper boundary for True is  204
	Lower boundary for Random is  252
	Upper boundary for Random is  264
Array shape before expansion (24, 64, 64, 64)
Array shape (24, 1, 64, 64, 64)
	Number of voxels to predict.. 24
	Voxels predicted.. 4
	Number of top voxels for visualization.. 1
	Dictionary SAVED for.. 472


In [None]:

def top_voxels_predict(top_patients_DICT,MODEL):
    top_patients_dict = deepcopy(top_patients_DICT)
    for patient in top_patients_dict.keys():
        print ('Patient..',patient)
        
        top_voxels = top_patients_dict[patient]['top_voxels']
        
        for i in range(top_voxels.shape[0]):
            print ('\tPredicting voxel',i+1)
            start = time.time()
            
            voxel = np.squeeze(top_voxels[i])
            count = 0
            preds_top_voxels = []
            
            for e in np.nditer(voxel,op_flags=['readwrite']):
                e_original = e.copy()
                e[...] = 0
                preds = MODEL.predict(x=np.expand_dims(np.expand_dims(voxel,axis=0),axis=0),batch_size=1)
                preds = [p[0][0] for p in preds]
                preds_top_voxels.append(preds)
                e[...] = e_original
                count +=1
                if count%1000==0:
                    print ('\t\tOut of',64*64*64,',',count,'are done in',time.time()-start)
                    start = time.time()
            top_patients_dict[patient][i+1] = preds_top_voxels
        
        with open('./LUNA_model_v2_LUNA/dict_top_voxels_predict.pickle', 'wb') as handle:
            pickle.dump(top_patients_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
        print ('\tDictionary SAVED for..',patient)
        
    return top_patients_dict


In [None]:

top_patients_dict = top_voxels_predict(top_patients_dict,LUNA_model_v2)


Patient.. 472
	Predicting voxel 1
		Out of 262144 , 1000 are done in 23.59552240371704
		Out of 262144 , 2000 are done in 20.02323031425476
		Out of 262144 , 3000 are done in 23.76174759864807
		Out of 262144 , 4000 are done in 23.23139214515686
		Out of 262144 , 5000 are done in 23.226093292236328
		Out of 262144 , 6000 are done in 20.140151977539062
		Out of 262144 , 7000 are done in 19.859532594680786
		Out of 262144 , 8000 are done in 22.237860679626465
		Out of 262144 , 9000 are done in 19.792877674102783
		Out of 262144 , 10000 are done in 23.052852392196655
		Out of 262144 , 11000 are done in 23.854332447052002
		Out of 262144 , 12000 are done in 21.5396990776062
		Out of 262144 , 13000 are done in 22.092008590698242
		Out of 262144 , 14000 are done in 20.085764169692993
		Out of 262144 , 15000 are done in 19.96256995201111
		Out of 262144 , 16000 are done in 23.306487798690796
		Out of 262144 , 17000 are done in 20.47577214241028
		Out of 262144 , 18000 are done in 22.030323266