In [1]:
# This is a sample notebook that shows the basic premise of how
# a model can combine both imaging and covariate features. 
#
# Due to the sample nature, none of the models are trained and a
# find prediction cannot be shown and would not be accurate at all. 

In [2]:
# Imports

import numpy as np
import os
import pandas as pd
from pathlib import Path
from sklearn.linear_model import LogisticRegression

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
from tensorflow.keras.optimizers import Adam

# Do not use a GPU
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [3]:
# Build simple data generator

# Additional patient features

covariate_columns = [
    'Age',
    'Sex_F', 
    'Sex_M',
    'APOE A1_2', 
    'APOE A1_3', 
    'APOE A1_4', 
    'APOE A2_2',
    'APOE A2_3', 
    'APOE A2_4', 
    'LEFT_HIPPOCAMPUS_VOLUME',
    'RIGHT_HIPPOCAMPUS_VOLUME', 
    'MMSE Total Score',
    'ADAS13',
    'AD', 
    'CN', 
    'EMCI', 
    'LMCI', 
    'MCI',
    'SMC'
]

class DataGenerator_Gaussian_Labels(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(
        self, 
        target, 
        batch_size, 
        dim,
        shuffle
    ):
        
        # Get data dir and subject ids
        self.data_dir = Path('data/matched_images/{}/mri_final/{}_input_{}x{}x{}_trimmed'.format(
            target, target, dim[0], dim[1], dim[2]
        ))
        self.subject_ids = [int(x.split('.npy')[0]) for x in os.listdir(self.data_dir) if '.npy' in x]
        
        # Get labels and covariates
        label_df = pd.read_csv('csv/generated/{}_complete_updated_gaussian.csv'.format(target))
        self.covariates = label_df[covariate_columns].T.to_dict('list')
        self.labels = label_df['A_GAUSSIAN_CLS'].to_dict()
    
        self.dim = dim
        self.batch_size = batch_size
    
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.subject_ids) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        temp_subject_ids = [self.subject_ids[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(temp_subject_ids)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.subject_ids))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, temp_subject_ids):
        # Initialization
        X_imgs = np.empty((self.batch_size, *self.dim))
        X_covariates = np.empty((self.batch_size, len(covariate_columns)), dtype=float)
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(temp_subject_ids):
            img = np.load(self.data_dir / '{:04d}.npy'.format(ID))
            
            # Store sample
            X_imgs[i,] = img
            X_covariates[i] = self.covariates[ID]

            # Store class
            y[i] = self.labels[ID]

        return (X_imgs, X_covariates), y

In [4]:
# Create generator

train_gen = DataGenerator_Gaussian_Labels(
    target = 'A_train', 
    batch_size = 1, 
    dim = [182, 182, 218], 
    shuffle = True
)

In [5]:
# Define a model that returns features generated from image

def Image_Feature_Generator(width=182, height=182, depth=218, dropout=0.5, image_features=50):
    image_input = layers.Input((width, height, depth, 1))

    x = layers.Conv3D(filters=8, kernel_size=3, padding="same")(image_input)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    
    x = layers.Conv3D(filters=16, kernel_size=3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPool3D(pool_size=2)(x)

    x = layers.Conv3D(filters=32, kernel_size=3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPool3D(pool_size=2)(x)

    x = layers.Conv3D(filters=64, kernel_size=3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPool3D(pool_size=2)(x)

    x = layers.Conv3D(filters=128, kernel_size=3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dropout(dropout)(x)
    
    x = layers.Dense(1300, activation='relu')(x)
    outputs = layers.Dense(image_features, activation='relu')(x)
    
    # Define the model.
    model = keras.Model(image_input, outputs)
    return model

In [6]:
# Create model

network = Image_Feature_Generator(
    dropout = 0.5, 
    image_features = 50
) 
network.compile(
    optimizer = Adam(learning_rate=3e-4),
    loss=tf.keras.losses.binary_crossentropy, 
    metrics=['binary_accuracy']
)

In [7]:
# Get sample image features

for x, y in train_gen:
    x_images, x_covariates = x
    
    image_features = network(x_images)
    
    break
  
image_features = tf.cast(tf.squeeze(image_features), tf.float64)

image_features

<tf.Tensor: shape=(50,), dtype=float64, numpy=
array([0.19271973, 0.10543033, 0.        , 0.        , 0.        ,
       0.        , 0.10589558, 0.        , 0.18437165, 0.38739026,
       0.        , 0.15440817, 0.        , 0.        , 0.        ,
       0.06293368, 0.        , 0.09729211, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.12967616, 0.13662846, 0.14129728, 0.        , 0.        ,
       0.07223156, 0.        , 0.        , 0.        , 0.11627224,
       0.        , 0.0899682 , 0.01199139, 0.        , 0.08200311,
       0.075515  , 0.        , 0.02645394, 0.15401304, 0.        ,
       0.        , 0.        , 0.02565606, 0.        , 0.09327351])>

In [8]:
# Display the covariate features
x_covariates = tf.squeeze(x_covariates)

x_covariates

<tf.Tensor: shape=(19,), dtype=float64, numpy=
array([0.38235294, 1.        , 0.        , 0.        , 1.        ,
       0.        , 0.        , 1.        , 0.        , 0.3664525 ,
       0.39123905, 0.91304348, 0.32394366, 0.        , 1.        ,
       0.        , 0.        , 0.        , 0.        ])>

In [9]:
# We can now take those image features and add them to the covariate data

image_plus_covariate_input = tf.concat([image_features, x_covariates], axis=0)

image_plus_covariate_input

<tf.Tensor: shape=(69,), dtype=float64, numpy=
array([0.19271973, 0.10543033, 0.        , 0.        , 0.        ,
       0.        , 0.10589558, 0.        , 0.18437165, 0.38739026,
       0.        , 0.15440817, 0.        , 0.        , 0.        ,
       0.06293368, 0.        , 0.09729211, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.12967616, 0.13662846, 0.14129728, 0.        , 0.        ,
       0.07223156, 0.        , 0.        , 0.        , 0.11627224,
       0.        , 0.0899682 , 0.01199139, 0.        , 0.08200311,
       0.075515  , 0.        , 0.02645394, 0.15401304, 0.        ,
       0.        , 0.        , 0.02565606, 0.        , 0.09327351,
       0.38235294, 1.        , 0.        , 0.        , 1.        ,
       0.        , 0.        , 1.        , 0.        , 0.3664525 ,
       0.39123905, 0.91304348, 0.32394366, 0.        , 1.        ,
       0.        , 0.        , 0.        , 0.        ])>

In [10]:
# Feed all of this data through a simple logistic regression model

# Define model
classifier = LogisticRegression()

# Predict
pred = classifier.predict(image_plus_covariate_input)

# Show prediction
pred

NotFittedError: This LogisticRegression instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.