# Environment Setup

ModuleNotFoundError: No module named 'sksurv'

In [1]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from skimage.transform import resize
import tensorflow as tf 
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, Input, MaxPool2D
from tensorflow.keras.optimizers import Adam
import tensorflow.compat.v2.summary as summary
from tensorflow.python.ops import summary_ops_v2
from tqdm import tqdm
from typing import Any, Dict, Iterable, Sequence, Tuple, Optional, Union

In [2]:
import sksurv

ModuleNotFoundError: No module named 'sksurv'

In [2]:
pip install import_ipynb

Note: you may need to restart the kernel to use updated packages.


In [3]:
import import_ipynb
from patient_data_split import pat_train_test_split
import Polsterl_tutorial

importing Jupyter notebook from Polsterl_tutorial.ipynb


ModuleNotFoundError: No module named 'sksurv'

# Data Loading and Setup

In [None]:
# Constants for development
FILESTOLOAD = 100
imdim_from_preprocessing = 299 # must match opt.ImageSize in image preprocessing configuration files
imdim_for_network = 1024
random_seed = 16

In [None]:
# Path to csvs that connect patient id to slices and rfs label
zero_info_path = '/Users/katyscott/Documents/ICC/Data/Labels/RFS_all_tumors_zero.csv'
nan_info_path = '/Users/katyscott/Documents/ICC/Data/Labels/RFS_all_tumors_NaN.csv'

zero_image_path = '/Users/katyscott/Documents/ICC/Data/Images/Tumors/' + str(imdim_from_preprocessing) + '/Zero/'
nan_image_path = '/Users/katyscott/Documents/ICC/Data/Images/Tumors/' + str(imdim_from_preprocessing) + '/NaN/'


In [None]:
# Reading in info for zero background images
info = pd.read_csv(zero_info_path)
image_fnames = np.asarray(info.iloc[:, 0])
pat_num = np.asarray(info.iloc[:, 1])
slice_num = np.asarray(info.iloc[:, 2])
rfs_code = np.asarray(info.iloc[:, 3])
rfs_time = np.asarray(info.iloc[:, 4])

print(rfs_code.shape)
print(rfs_time[1])

In [None]:
# Only loading in 100 number of files for development
images = np.empty((1,imdim_for_network,imdim_for_network))
file_count = 0
for image_file in tqdm(image_fnames):
    if file_count >= FILESTOLOAD:
        break
    else:
        file_count += 1
    #     print("Loading: ", image_file)
        # Load in file as an numpy array
        img = np.fromfile(zero_image_path + image_file)
        # Reshape image from 1D to 2D array - need to not hardcode this, square root?
        img_2D = np.reshape(img, (imdim_from_preprocessing,imdim_from_preprocessing))
        # Scale image to this dimension, smooth image with Gaussian filter, pads with the reflection of the vector
        # mirrored on the first and last values of the vector along each axis.
        img_final = resize(img_2D, (imdim_for_network, imdim_for_network), anti_aliasing=True, mode='reflect')
        # Not sure this next line is working, want an array with all the images as their own array in it
        img_final_3D = np.reshape(img_final, (1,) + img_final.shape)
        images = np.append(images, img_final_3D, axis=0)

images = np.delete(images, 0, axis=0)

In [None]:
# Confirming images loaded in
plt.imshow(images[1], cmap='Greys')

## Splitting data for training/testing

In [None]:
# Training and testing split
train_slice_indices, test_slice_indices = pat_train_test_split(pat_num[:FILESTOLOAD], rfs_code[:FILESTOLOAD], 0.7, random_seed)

print("Train: ", np.array(train_slice_indices).shape)
print("Test: ", np.array(test_slice_indices).shape)

In [None]:
train_slices = images[train_slice_indices,:,:]#[:][:]
train_slices = train_slices.squeeze() # Remove first dim of size 1

train_labels = rfs_time[tuple(train_slice_indices)]
print("Training set: ", train_slices.shape)
print("Training labels: ", train_labels.shape)

test_slices = images[test_slice_indices,:,:]
test_slices = test_slices.squeeze() # Remove first dim of size 1

test_labels = rfs_time[tuple(test_slice_indices)]
print("Testing set: ", test_slices.shape)
print("Testing labels: ", test_labels.shape)


In [None]:
plt.imshow(train_slices[0], cmap='Greys')

In [None]:
train_labels[0]

# Model Setup

## Survival Analysis for Deep Learning tutorial - Sebastian Pölsterl

https://k-d-w.org/blog/2019/07/survival-analysis-for-deep-learning/

Tensorflow 2 version of code

https://nbviewer.jupyter.org/github/sebp/survival-cnn-estimator/blob/master/tutorial_tf2.ipynb

In [None]:
model = Sequential([
        Conv2D(6, kernel_size=(5,5), activation='relu', name='conv_1'),
        MaxPool2D(pool_size=(2,2)),
        Conv2D(16, (5,5), activation='relu', name='conv_2'),
        MaxPool2D(pool_size=(2,2)),
        Flatten(),
        Dense(120, activation='relu', name='dense_1'),
        Dense(84, activation='relu', name='dense_2'),
        Dense(1, activation='linear', name='dense_3')
    ])

In [None]:
train_fn = InputFunction(x_train, time_train, event_train, drop_last = True, shuffle=True)

## DeepConvSurv 
(Zhu, Yao, & Huang, 2016)

https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7822579&casa_token=gVFzncUVfTgAAAAA:hGgPWCTzS2pUnsFG8xEZzZe7lAIupB_Z7SkKDnFJbilFmX1W6Ge5qCipPjiqCynL1lfSs64bVV4

In [None]:
img_in = Input(shape=(imdim_for_network, imdim_for_network,1))

network = Conv2D(filters=32, kernel_size=686, activation='relu', padding='valid')(img_in)
network = MaxPool2D(pool_size=(2,2))(network)

network = Conv2D(filters=32, kernel_size=88, activation='relu', padding='valid')(network)
network = Conv2D(filters=32, kernel_size=43, activation='relu', padding='valid')(network)

network = MaxPool2D(pool_size=(2,2))(network)

network = Flatten()(network)
network = Dense(32, activation='relu')(network)
output = Dense(1, activation='exponential')(network)

model = Model(inputs=img_in, outputs=output, name="deepconvsurv")
model.compile(
        optimizer = Adam(),
        loss='binary_crossentropy'
)
model.summary()

## Model Training

In [None]:
print("Training model: " + model.name)

epochs = 10
batch_size = 32

history = model.fit(x = train_slices,
                    y = train_labels,
                    batch_size = batch_size,
                    validation_split = 0.15,
                    epochs = epochs,
                    verbose = 1 
                    )

In [None]:
# Create AUC metric vs. epoch plot
plt.plot(history.history['auc'])
plt.plot(history.history['val_auc'])
plt.title('model accuracy - ' + model.name)
plt.ylabel('AUC')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()