In [1]:
import gc
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tnrange, tqdm_notebook
from time import time
import random

from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score

from CNN_New import get_model

import keras
from keras import backend as K
from keras.utils import multi_gpu_model
from keras.optimizers import SGD, Adam
from keras.utils.np_utils import to_categorical

import tensorflow as tf
from tensorflow.python.client import device_lib

Using TensorFlow backend.


In [2]:
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 3674329292543417123
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 9061465457
locality {
  bus_id: 1
  links {
    link {
      device_id: 1
      type: "StreamExecutor"
      strength: 1
    }
  }
}
incarnation: 10776930260252075812
physical_device_desc: "device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:0a:00.0, compute capability: 7.5"
, name: "/device:GPU:1"
device_type: "GPU"
memory_limit: 9061465457
locality {
  bus_id: 1
  links {
    link {
      type: "StreamExecutor"
      strength: 1
    }
  }
}
incarnation: 127884176242487866
physical_device_desc: "device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:41:00.0, compute capability: 7.5"
]


In [3]:
src_path = os.getcwd()
src_path

'C:\\Users\\rogerswilliam\\projects\\RadiomicsCNN'

In [4]:
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0',
 '/job:localhost/replica:0/task:0/device:GPU:1']

In [5]:
dta_path = os.path.join(src_path, 'data')
os.listdir(dta_path)

['cropped',
 'ids.csv',
 'images_small.npy',
 'imgs.npy',
 'labels.csv',
 'Lung1.clinical.csv',
 'msks.npy',
 'radiomics_features.csv']

In [6]:
cpd_path = os.path.join(dta_path, 'cropped')
nodule_id = os.listdir(cpd_path)

In [7]:
labels = pd.read_csv(os.path.join(dta_path, 'labels.csv'))
print("Total labels:", len(labels))
labels.head()
#labels = labels['survival']
#print(labels[:5])

Total labels: 422


Unnamed: 0,ids,survival
0,LUNG1-001,2165
1,LUNG1-002,155
2,LUNG1-003,256
3,LUNG1-004,141
4,LUNG1-005,353


In [8]:
df = pd.read_csv(os.path.join(dta_path, 'Lung1.clinical.csv'))
df.head()

Unnamed: 0,PatientID,age,clinical.T.Stage,Clinical.N.Stage,Clinical.M.Stage,Overall.Stage,Histology,gender,Survival.time,deadstatus.event
0,LUNG1-001,78.7515,2.0,3,0,IIIb,large cell,male,2165,1
1,LUNG1-002,83.8001,2.0,0,0,I,squamous cell carcinoma,male,155,1
2,LUNG1-003,68.1807,2.0,3,0,IIIb,large cell,male,256,1
3,LUNG1-004,70.8802,2.0,1,0,II,squamous cell carcinoma,male,141,1
4,LUNG1-005,80.4819,4.0,2,0,IIIb,squamous cell carcinoma,male,353,1


In [9]:
df['deadstatus.event'].value_counts()

1    244
0    178
Name: deadstatus.event, dtype: int64

In [10]:
divide = 491
bad  = len(df[(df['Survival.time'] < divide) & (df['deadstatus.event'] == 1)])
ignr = len(df[(df['Survival.time'] < divide) & (df['deadstatus.event'] == 0)])
good = len(df[df['Survival.time'] >= divide])
print(bad, ignr, good,  '=', bad+good+ignr)

172 77 173 = 422


In [11]:
conditions = [
    (df['Survival.time'] < divide) & (df['deadstatus.event'] == 1),
    (df['Survival.time'] < divide) & (df['deadstatus.event'] == 0),
    (df['Survival.time'] >= divide)
]

choices = [0, 2, 1]

df['goodstatus'] = np.select(conditions, choices, default=3)

In [12]:
df['goodstatus'].value_counts()

1    173
0    172
2     77
Name: goodstatus, dtype: int64

In [13]:
new_labels = []
for n, id in enumerate(nodule_id):
    survival = int(df.loc[df.PatientID == id[:9], 'goodstatus'].values[0])
    row = [id, survival]
    new_labels.append(row)

In [14]:
new_labels

[['LUNG1-001', 1],
 ['LUNG1-002', 0],
 ['LUNG1-003', 0],
 ['LUNG1-004', 0],
 ['LUNG1-005', 0],
 ['LUNG1-006', 0],
 ['LUNG1-007', 0],
 ['LUNG1-008', 0],
 ['LUNG1-009', 0],
 ['LUNG1-010', 1],
 ['LUNG1-011', 1],
 ['LUNG1-012', 0],
 ['LUNG1-013', 1],
 ['LUNG1-014', 1],
 ['LUNG1-015', 1],
 ['LUNG1-016', 0],
 ['LUNG1-017', 0],
 ['LUNG1-018', 1],
 ['LUNG1-019', 0],
 ['LUNG1-020', 0],
 ['LUNG1-021', 0],
 ['LUNG1-022', 0],
 ['LUNG1-023', 0],
 ['LUNG1-024', 1],
 ['LUNG1-025', 1],
 ['LUNG1-026', 0],
 ['LUNG1-027', 1],
 ['LUNG1-028', 0],
 ['LUNG1-029', 0],
 ['LUNG1-030', 0],
 ['LUNG1-031', 1],
 ['LUNG1-032', 1],
 ['LUNG1-033', 0],
 ['LUNG1-034', 1],
 ['LUNG1-035', 0],
 ['LUNG1-036', 0],
 ['LUNG1-037', 0],
 ['LUNG1-038', 0],
 ['LUNG1-039', 0],
 ['LUNG1-040', 1],
 ['LUNG1-041', 0],
 ['LUNG1-042', 0],
 ['LUNG1-043', 0],
 ['LUNG1-044', 0],
 ['LUNG1-045', 1],
 ['LUNG1-046', 0],
 ['LUNG1-047', 1],
 ['LUNG1-048', 1],
 ['LUNG1-049', 1],
 ['LUNG1-050', 0],
 ['LUNG1-051', 0],
 ['LUNG1-052', 0],
 ['LUNG1-053

In [15]:
columns = ['id', 'survival']
new_labels = pd.DataFrame(new_labels, columns = columns)
print("Labels length:", len(new_labels))
new_labels.head()

Labels length: 424


Unnamed: 0,id,survival
0,LUNG1-001,1
1,LUNG1-002,0
2,LUNG1-003,0
3,LUNG1-004,0
4,LUNG1-005,0


In [16]:
imgs = np.load( os.path.join(dta_path,'images_small.npy') )
#imgs = np.rollaxis(imgs, 3, 1)  
imgs = imgs.astype('float16') # Using fp16 capable gpus
print("Images shape:", imgs.shape)
print("Array type:  ", imgs.dtype)

Images shape: (424, 15, 50, 50)
Array type:   float16


In [17]:
'''
for n, data in enumerate(zip(imgs, new_labels.iterrows())):
    print(n+1, data[0].shape)
    print(data[1][1].id, ' - Survival:', data[1][1].survival)
    print()
'''

mask = np.zeros(len(imgs), dtype=bool)
keep = np.array(new_labels.loc[(new_labels['survival'] == 0) | (new_labels['survival'] == 1)].index)
mask[keep] = True
imgs = imgs[mask]

In [18]:
imgs.shape

(346, 15, 50, 50)

In [19]:
y = new_labels.loc[(new_labels['survival'] == 0) | (new_labels['survival'] == 1)]
y = np.array(y.survival)
y


array([1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1,
       0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0,
       0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0,

In [20]:
len(y)

346

In [21]:
"""
path = os.path.join(cpd_path, new_labels.loc[200].id)
path = os.path.join(path, 'image.npy')
img = np.load(path)
plt.imshow(img[:, :, 0])
"""

"\npath = os.path.join(cpd_path, new_labels.loc[200].id)\npath = os.path.join(path, 'image.npy')\nimg = np.load(path)\nplt.imshow(img[:, :, 0])\n"

In [22]:
imgs.shape
imgs = imgs.reshape((346, 15, 50, 50, 1))
print("Shape:", imgs.shape)

Shape: (346, 15, 50, 50, 1)


In [23]:
X_train, X_test, y_train, y_test = train_test_split(imgs, y, test_size=0.245, train_size=0.755)

In [24]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(261, 15, 50, 50, 1) (85, 15, 50, 50, 1) (261,) (85,)


In [25]:
def generator(X, y, batch_size):
    total_batches = 0
    while True:
        #batch = random.sample(range(0, X.shape[0]), batch_size)
        batch = [random.choice(range(0, X.shape[0])) for x in range(batch_size)]
        total_batches += len(batch)
        X_batch = X[batch]
        y_batch = y[batch]
        yield X_batch, y_batch

In [26]:
n_epochs = 32
batch_size = 32
learning_rate = .0000000000000000000000000000000000000001
decay_rate = 5e-6
momentum = 0.5
is_regression = True

model = get_model((15,50,50, 1))

In [27]:
model = multi_gpu_model(model, gpus=2)
#sgd = SGD(lr=learning_rate,momentum=momentum, decay=decay_rate, nesterov=False)

adam = Adam(lr=learning_rate)

model.compile(loss='binary_crossentropy', 
              optimizer=adam, 
              #metrics=['mean_absolute_error']
             )

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
conv3d_1_input (InputLayer)     (None, 15, 50, 50, 1 0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 15, 50, 50, 1 0           conv3d_1_input[0][0]             
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 15, 50, 50, 1 0           conv3d_1_input[0][0]             
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 1)            117519825   lambda_1[0][0]                   
                                                                 lambda_2[0][0]                   
__________

In [28]:
model.fit_generator(generator(X_train, y_train, batch_size), 
                    epochs = n_epochs,
                    steps_per_epoch = int(X_train.shape[0]/batch_size), 
                    validation_data = generator(X_test, y_test, batch_size*2),
                    validation_steps = X_train.shape[0]/batch_size*2,
                    #use_multiprocessing = True,
                    #workers=16
                    )

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32


Epoch 32/32


<keras.callbacks.History at 0x185bc09bcc0>

In [29]:
del model
gc.collect()

13542