In [1]:
# Import data from Excel sheet
import pandas as pd
df = pd.read_excel('ADNI combined.xlsx', sheet_name='sample')
#print(df)
sid = df['RID']
grp = df['Group at scan date (1=CN, 2=EMCI, 3=LMCI, 4=AD, 5=SMC)']
age = df['Age at scan']
sex = df['Sex (1=female)']
tiv = df['TIV']
field = df['MRI_Field_Strength']
grpbin = (grp > 1) # 1=CN, ...

In [2]:
# Scan for nifti file names
import glob
dataAD = sorted(glob.glob('mwp1_MNI/AD/*.nii.gz'))
dataLMCI = sorted(glob.glob('mwp1_MNI/LMCI/*.nii.gz'))
dataCN = sorted(glob.glob('mwp1_MNI/CN/*.nii.gz'))
dataFiles = dataAD + dataLMCI + dataCN
numfiles = len(dataFiles)
print('Found ', str(numfiles), ' nifti files')

Found  663  nifti files


In [3]:
import re
debug = False
cov_idx = [-1] * numfiles # list; array: np.full((numfiles, 1), -1, dtype=int)
print('Matching covariates for loaded files ...')
for i,id in enumerate(sid):
    p = [j for j,x in enumerate(dataFiles) if re.search('_%04d_' % id, x)] # translate ID numbers to four-digit numbers, get both index and filename
    if len(p)==0:
        if debug: print('Did not find %04d' % id) # did not find Excel sheet subject ID in loaded file selection
    else:
        if debug: print('Found %04d in %s: %s' % (id, p[0], dataFiles[p[0]]))
        cov_idx[p[0]] = i # store Excel index i for data file index p[0]
print('Checking for scans not found in Excel sheet: ', sum(x<0 for x in cov_idx))

labels = pd.DataFrame({'Group':grpbin}).iloc[cov_idx, :]
grps = pd.DataFrame({'Group':grp, 'RID':sid}).iloc[cov_idx, :]

Matching covariates for loaded files ...
Checking for scans not found in Excel sheet:  0


In [4]:
#Load residualized data from disk
import h5py
import numpy as np
hf = h5py.File('residuals_wb_mwp1_MNI.hdf5', 'r')
hf.keys # read keys
labels = np.array(hf.get('labels')) # note: was of data frame type before
images = np.array(hf.get('images'))
hf.close()

In [5]:
import tensorflow as tf
print(tf.__version__)
# disable tensorflow deprecation warnings
import logging
logging.getLogger('tensorflow').disabled=True
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto(
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1)
    # device_count = {'GPU': 1}
)
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
set_session(session)

1.15.0


Using TensorFlow backend.


In [6]:
import nibabel as nib

# define FOV to reduce required memory size
x_range_from = 10; x_range_to = 110
y_range_from = 10; y_range_to = 130
z_range_from = 5; z_range_to = 105

data_overlay = sorted(glob.glob('Hippocampus_masks/Hipp*_HarvOx_GMmasked0.5_MNI.nii*'))
hippo_overlay = np.zeros((len(data_overlay), z_range_to-z_range_from, x_range_to-x_range_from, y_range_to-y_range_from, 1), dtype=np.float32) # numfiles× z × x × y ×1; avoid 64bit types

for i in range(len(data_overlay)):   
    img = nib.load(data_overlay[i])
    img = img.get_data()[x_range_from:x_range_to, y_range_from:y_range_to, z_range_from:z_range_to]
    img = np.transpose(img, (2, 0, 1)) # reorder dimensions to match coronal view z*x*y in MRIcron etc.
    img = np.flip(img) # flip all positions
    hippo_overlay[i, :,:,:, 0] = np.nan_to_num(img)

In [7]:
###### check which file corresponds to hippo l,r and both with indexes 0,1 and 2#####
data_overlay[2]

'Hippocampus_masks\\HippR_HarvOx_GMmasked0.5_MNI.nii'

In [8]:
#hippos
hippo_both = hippo_overlay[0,:,:,:,0]
hippo_left = hippo_overlay[1,:,:,:,0]
hippo_right = hippo_overlay[2,:,:,:,0]

In [9]:
# Load CNN model from disk
from keras.models import load_model, Model
#!pip install innvestigate
import innvestigate
import innvestigate.utils as iutils
import numpy as np
from matplotlib import pyplot as plt
import scipy
import csv


# see https://github.com/albermax/innvestigate/blob/master/examples/notebooks/imagenet_compare_methods.ipynb for a list of alternative methods
methods = [ # tuple with method,     params,                  label
#            ("deconvnet",            {},                      "Deconvnet"),
#            ("guided_backprop",      {},                      "Guided Backprop"),
#            ("deep_taylor.bounded",  {"low": -1, "high": 1},  "DeepTaylor"),
#            ("input_t_gradient",     {},                      "Input * Gradient"),
#            ("lrp.z",                {},                      "LRP-Z"),
#            ("lrp.epsilon",          {"epsilon": 1},          "LRP-epsilon"),
            ("lrp.alpha_1_beta_0",   {"neuron_selection_mode":"index"},     "LRP-alpha1beta0"),
]


for k in range(20): #range(17,19):
    mymodel = load_model('newmodel/newmodel_wb_cv%d.hdf5' % (k+1))
    mymodel.name = 'newmodel_wb_cv%d_orig' % (k+1)
    #mymodel.summary()
    #model_wo_softmax = iutils.keras.graph.model_wo_softmax(mymodel)  ## sometimes raises: ValueError: The name "dense_1" is used 2 times in the model. All layer names should be unique.
    #model_wo_softmax = Model(inputs=mymodel.inputs,
    #                          outputs=iutils.keras.graph.pre_softmax_tensors(mymodel.outputs),
    #                          name=('wo_softmax_cv%d' % (k+1))) 
    #
    mymodel.layers[-1].activation=tf.keras.activations.linear
    mymodel.save('tmp_wo_softmax.hdf5')
    model_wo_softmax = load_model('tmp_wo_softmax.hdf5')
    if (k==0):
        model_wo_softmax.summary()

    # create analyzer
    analyzers = []
    for method in methods:
        #analyzer = innvestigate.create_analyzer("deep_taylor.bounded", model_wo_softmax, **params )
        analyzer = innvestigate.create_analyzer(method[0], model_wo_softmax, **method[1])
        # Some analyzers require training.
        #   analyzer.fit(test_img, batch_size=30, verbose=1)
        #  analyzers.append(analyzer)

    with open(('hipp_act_newmodel_cv%d.csv' % (k+1)), 'w', newline='') as csvfile:
        csvwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csvwriter.writerow(["subject_ID", "Sum_activation_of_right_hippocampal_volume", "Sum_activation_of_left_hippocampal_volume", "Sum_activation_of_both_hippocampal_volume"])
        if (k==7):
            print('hipp_act_newmodel_cv%d.csv' % (k+1))
            print("subject_ID Sum_activation_of_right_hippocampal_volume Sum_activation_of_left_hippocampal_volume Sum_activation_of_both_hippocampal_volume ")

        for indx in range(len(grps)):
            test_img = images[indx]
            #test_orig = images_orig[indx]
            #print('test image for subject of binary group: %d' % test_Y[subj_idx, 1]) # first col will indicate CN, second col indicates MCI/AD
            #print('test image for subject of ADNI diagnosis: %d [1-CN, 3-LMCI, 4-AD]' % testgrps.Group.to_numpy(dtype=np.int)[subj_idx])

            ####print('test subject ID %s' % grps.RID.to_numpy(dtype=np.int)[indx])

            test_img = np.reshape(test_img, (1,)+ test_img.shape) # add first subj index again to mimic original array structure
            #test_orig = np.reshape(test_orig, (1,)+ test_orig.shape) # add first subj index again to mimic original array structure

            #for method,analyzer in zip(methods, analyzers):
            a = np.reshape(analyzer.analyze(test_img, neuron_selection=1), test_img.shape[1:4])
            np.clip(a, a_min=0, a_max=None, out=a)
            a = scipy.ndimage.filters.gaussian_filter(a, sigma=0.8) # smooth activity image
            scale = np.quantile(a, 0.99) # no need for abs(a)
            if scale!=0:  # ignore if relevance maps contains only zeros, output will be zero as well
                a = (a/scale)

            overlay_act_both = hippo_both * a
            overlay_act_l = hippo_left * a
            overlay_act_r = hippo_right * a

            csvwriter.writerow([grps.RID.to_numpy(dtype=np.int)[indx], np.sum(overlay_act_r), np.sum(overlay_act_l), np.sum(overlay_act_both)])
            if (k==7):
                print(grps.RID.to_numpy(dtype=np.int)[indx], np.sum(overlay_act_r), np.sum(overlay_act_l), np.sum(overlay_act_both))
            #print('subject ID %s : Mean activation of left hippocampal volume %f : Sum activation of left hippocampal volume %f' % (grps.RID.to_numpy(dtype=np.int)[indx],np.mean(overlay_act_l[hippo_left>0]),np.sum(overlay_act_l[hippo_left>0])))
            #print('subject ID %s : Mean activation of both hippocampal volume %f : Sum activation of both hippocampal volume %f' % (grps.RID.to_numpy(dtype=np.int)[indx],np.mean(overlay_act_both[hippo_both>0]),np.sum(overlay_act_both[hippo_both>0])))
        csvfile.close()