In [None]:
import os
import numpy as np
import tensorflow as tf
import random
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import tensorflow.keras.backend as K

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip "/content/drive/MyDrive/ColabNotebooks/ADNI.zip" -d "/content"

In [None]:
#define the function to read all the image names from folder
#and count the image number
def image_list(path_folder_images):
    list_image = sorted(os.listdir(path_folder_images))
    image_count = len(list_image)
    print("Number of images: " + str(image_count))

    return list_image, image_count

In [None]:
# splits the dataset in training and validation
def split_data(list_image, split_ratio):
    train_number_split = int(len(list_image)*split_ratio)
    print(train_number_split)
    train_data = list_image[:train_number_split] #takes the first to train_number and defines them as training images
    valid_data = list_image[train_number_split:] #takes the train_number to the last element and defines them as validation images
    print(train_data)
    print(valid_data)
    number_train = len(train_data)
    number_valid = len(valid_data)
    print("Number of training images: " + str(number_train))
    print("Number of validation images AD: " + str(number_valid))
    return train_data, valid_data, number_train, number_valid

In [None]:
# define the function to load the images
def load_images(path_folder_images, chunk_images, height, width):
    image_array = []
    for a in chunk_images:
        path_image = path_folder_images + a # create the path of the image
        img = tf.keras.utils.load_img(path_image, target_size=(height, width), color_mode="grayscale") # load the image
        img = tf.keras.utils.img_to_array(img) # convert the image into an array
        img = img / 255 #normalize the image
        image_array.append(img) # append the array image to the list

    image_array = np.array(image_array) # create a numpy array
    return image_array

In [None]:
#data generator training images
def load_images_train_generator(path_folder_images1, path_folder_images2,  list_images1, list_images2, number_images1, number_images2, height, width, batch_size):
    while True:
        #list_images
        random.shuffle(list_images1) #shuffels the images from the first list
        random.shuffle(list_images2) #shuffels the images from the second list
        for i in range(0, len(list_images1), batch_size): #takes the frist images and loads them into the chunk variable
            chunk_AD = list_images1[i:i + batch_size]
            chunk_NC = list_images2[i:i + batch_size]
            image_array_AD = []
            image_array_NC = []

            for j in chunk_AD:
                path_image_AD = path_folder_images1 + j # create the path of the image
                imgAD = tf.keras.utils.load_img(path_image_AD, target_size=(height, width), color_mode="grayscale") # load the images
                imgAD = tf.keras.utils.img_to_array(imgAD) # convert the image into an array
                imgAD = imgAD / 255.0 #normalize the image
                image_array_AD.append(imgAD)
            image_array_AD = np.array(image_array_AD) # create a numpy array

            for k in chunk_NC:
                path_image_NC = path_folder_images2 + k # create the path of the image
                imgNC = tf.keras.utils.load_img(path_image_NC, target_size=(height, width), color_mode="grayscale") # load the image
                imgNC = tf.keras.utils.img_to_array(imgNC) # convert the image into an array
                imgNC = imgNC / 255.0 #normalize the image
                image_array_NC.append(imgNC)
            image_array_NC = np.array(image_array_NC) # create a numpy array
            image1_list = []
            image2_list = []
            label_list= []

            for ad1_data, nc1_data in zip(image_array_AD, image_array_NC):
                ad2_index = random.randint(0, len(list_images1)-1)
                ad2 = list_images1[ad2_index]
                ad2 = path_folder_images1 + ad2 # create the path of the image
                ad2_data = tf.keras.utils.load_img(ad2, target_size=(height, width), color_mode="grayscale") # load the image
                ad2_data = tf.keras.utils.img_to_array(ad2_data)
                ad2_data = ad2_data / 255.0 #normalize the image

                nc2_index = random.randint(0, len(list_images2) -1)
                # print(len(list_images2), nc2_index)
                nc2 = list_images2[nc2_index]
                nc2 = path_folder_images2 + nc2 # create the path of the image
                nc2_data = tf.keras.utils.load_img(nc2, target_size=(height, width), color_mode="grayscale") # load the image
                nc2_data = tf.keras.utils.img_to_array(nc2_data)
                nc2_data = nc2_data / 255.0 #normalize the image
                #creates four pairs of images
                #AD and AD with label 1, AD and NC with label 0,
                #NC and NC with label 1, NC and AD with label 0
                image1_list.append(ad1_data)
                image2_list.append(ad2_data)
                label_list.append(1)
                image1_list.append(ad1_data)
                image2_list.append(nc2_data)
                label_list.append(0)
                image1_list.append(nc1_data)
                image2_list.append(nc2_data)
                label_list.append(1)
                image1_list.append(nc1_data)
                image2_list.append(ad2_data)
                label_list.append(0)

            # shuffel the image pairs with the corresponding label
            zipped_list = list(zip(image1_list, image2_list, label_list))
            random.shuffle(zipped_list)
            image1_list, image2_list, label_list = zip(*(zipped_list))

            # convert to np array
            image1_array = np.array(image1_list)
            image2_array = np.array(image2_list)
            label_array = np.array(label_list)
            yield [image1_array, image2_array], label_array

In [None]:
# create a list of number and shuffle the list
# the shuffled list is the order of the validation images
def valid_order(valid_data):
    data_order = []
    data_order = list(range(0,len(valid_data)))
    print(data_order)
    random.shuffle(data_order)
    print(data_order)
    return data_order

In [None]:
#data generator validation images
def load_images_valid_generator(path_folder_images1, path_folder_images2,  list_images1, list_images2, number_images1, number_images2, shuffel_list1, shuffel_list2, height, width, batch_size):
    while True:
        for i in range(0, len(list_images1), batch_size): #takes the frist images and loads them into the chunk variable
            chunk_AD = list_images1[i:i + batch_size]
            chunk_NC = list_images2[i:i + batch_size]
            indexAD = shuffel_list1[i:i + batch_size]
            indexNC = shuffel_list2[i:i + batch_size]
            image_array_AD = []
            image_array_NC = []

            for j in chunk_AD:
                path_image_AD = path_folder_images1 + j # create the path of the image
                imgAD = tf.keras.utils.load_img(path_image_AD, target_size=(height, width), color_mode="grayscale") # load the images
                imgAD = tf.keras.utils.img_to_array(imgAD) # convert the image into an array
                imgAD = imgAD / 255.0 #normalize the image
                image_array_AD.append(imgAD)
            image_array_AD = np.array(image_array_AD) # create a numpy array

            for k in chunk_NC:
                path_image_NC = path_folder_images2 + k # create the path of the image
                imgNC = tf.keras.utils.load_img(path_image_NC, target_size=(height, width), color_mode="grayscale") # load the image
                imgNC = tf.keras.utils.img_to_array(imgNC) # convert the image into an array
                imgNC = imgNC / 255.0 #normalize the image
                image_array_NC.append(imgNC)
            image_array_NC = np.array(image_array_NC) # create a numpy array
            image1_list = []
            image2_list = []
            label_list= []

            for ad1_data, nc1_data, AD_index, NC_index in zip(image_array_AD, image_array_NC, indexAD, indexNC):
                ad2 = list_images1[AD_index]
                ad2 = path_folder_images1 + ad2 # create the path of the image
                ad2_data = tf.keras.utils.load_img(ad2, target_size=(height, width), color_mode="grayscale") # load the image
                ad2_data = tf.keras.utils.img_to_array(ad2_data)# convert the image into an array
                ad2_data = ad2_data / 255.0 #normalize the image

                nc2 = list_images2[NC_index]
                nc2 = path_folder_images2 + nc2 # create the path of the image
                nc2_data = tf.keras.utils.load_img(nc2, target_size=(height, width), color_mode="grayscale") # load the image
                nc2_data = tf.keras.utils.img_to_array(nc2_data)# convert the image into an array
                nc2_data = nc2_data / 255.0 #normalize the image
                #creates four pairs of images
                #AD and AD with label 1, AD and NC with label 0,
                #NC and NC with label 1, NC and AD with label 0
                image1_list.append(ad1_data)
                image2_list.append(ad2_data)
                label_list.append(1)
                image1_list.append(ad1_data)
                image2_list.append(nc2_data)
                label_list.append(0)
                image1_list.append(nc1_data)
                image2_list.append(nc2_data)
                label_list.append(1)
                image1_list.append(nc1_data)
                image2_list.append(ad2_data)
                label_list.append(0)

            # convert lists to np array
            image1_array = np.array(image1_list)
            image2_array = np.array(image2_list)
            label_array = np.array(label_list)

            yield [image1_array, image2_array], label_array

In [None]:
#Source from the website [Image similarity estimation using a Siamese Network with a contrastive loss]
#https://keras.io/examples/vision/siamese_contrastive/
def contrastive_loss(y_true_label, y_pred_label, margin=1.0):
    y_true_label = tf.cast(y_true_label, dtype=tf.float32)  # Cast y_true_label to float32
    sq_pred = tf.square(y_pred_label)
    margin_square = tf.square(tf.maximum(margin - y_pred_label, 0))
    return tf.reduce_mean((1-y_true_label) * sq_pred + y_true_label * margin_square)

In [None]:
def siamese_network(height,width,dimension):

    #define input of siamese network
    input_shape = (height, width, dimension)
    left_input = layers.Input(input_shape)
    right_input = layers.Input(input_shape)
    #define standard model of the left and right siamese network which is a VGG16 without all the dense layers
    vgg16 = tf.keras.Sequential([ layers.Conv2D (filters =64, kernel_size =3, padding ='same', activation='relu'),
                                layers.Conv2D (filters =64, kernel_size =3, padding ='same', activation='relu'),
                                layers.MaxPool2D(pool_size =2, strides =2, padding ='same'),

                                layers.Conv2D (filters =128, kernel_size =3, padding ='same', activation='relu'),
                                layers.Conv2D (filters =128, kernel_size =3, padding ='same', activation='relu'),
                                layers.MaxPool2D(pool_size =2, strides =2, padding ='same'),

                                layers.Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu'),
                                layers.Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu'),
                                layers.Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu'),
                                layers.MaxPool2D(pool_size =2, strides =2, padding ='same'),

                                layers.Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu'),
                                layers.Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu'),
                                layers.Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu'),
                                layers.MaxPool2D(pool_size =2, strides =2, padding ='same'),

                                layers.Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu'),
                                layers.Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu'),
                                layers.Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu'),
                                layers.MaxPool2D(pool_size =2, strides =2, padding ='same'),

                                layers.Flatten(),
                                layers.Dense(512, activation='sigmoid'),
                                ])

    #save the features from the left and right network in two variables
    feature_vector_left_output = vgg16(left_input)
    feature_vector_right_output = vgg16(right_input)

    # distance layer, which calculates the distance between both networks
    distance_layer = layers.Lambda(lambda features: tf.abs(features[0] - features[1]))([feature_vector_left_output, feature_vector_right_output])


    #fully connected layers
    output = layers.Dense(1, activation='sigmoid')(distance_layer)
    #create whole neural network model
    model = tf.keras.Model(inputs=[left_input, right_input], outputs=output)

    model.summary()

    #Configurates the loss funciton, optimizer type and metrics of the model for training.
    model.compile(loss=contrastive_loss,
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
            metrics=['accuracy'])

    return model

In [None]:
#define varibles
height = 128
width = 128
dimension = 1
batch_size = 32

#define paths

path_train_images_AD = "/content/ADNI/AD_NC/train/AD/"
path_train_images_NC = "/content/ADNI/AD_NC/train/NC/"

path_test_images_AD = "/content/ADNI/AD_NC/test/AD/"
path_test_images_NC = "/content/ADNI/AD_NC/test/NC/"

In [None]:
#load all image names of the different train and test folders
list_train_AD, number_AD = image_list(path_train_images_AD)
list_train_NC, number_NC = image_list(path_train_images_NC)

list_test_AD, number_test_AD = image_list(path_test_images_AD)
list_test_NC, number_test_NC = image_list(path_test_images_NC)

In [None]:
#split data into training and validation dataset
split_ratio_training = 0.8
train_data_AD, valid_data_AD, number_train_AD, number_valid_AD = split_data(list_train_AD, split_ratio_training)
train_data_NC, valid_data_NC, number_train_NC, number_valid_NC = split_data(list_train_NC, split_ratio_training)

In [None]:
#define order of validation images
valid_data_order_AD = valid_order(valid_data_AD)
valid_data_order_NC = valid_order(valid_data_NC)

In [None]:
#load all the images from the folder

#define list
train_images_AD = []
train_images_NC = []

#load images into list as a numpy array
train_images = load_images_train_generator(path_train_images_AD, path_train_images_NC, train_data_AD, train_data_NC, number_train_AD, number_train_NC, height, width, batch_size)
valid_images = load_images_valid_generator(path_train_images_AD, path_train_images_NC, valid_data_AD, valid_data_NC, number_valid_AD, number_valid_NC, valid_data_order_AD, valid_data_order_NC, height, width, batch_size)

In [None]:
#define callbacks
callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss",min_delta=0,patience=6,verbose=0,mode="auto",baseline=None,restore_best_weights=True,start_from_epoch=5)
#define model
siamese_model = siamese_network(height,width,dimension)
#training of the neural network
history = siamese_model.fit(x=train_images,
                            validation_data = valid_images,
                            steps_per_epoch = len(train_data_AD)//batch_size,
                            validation_steps = len(valid_data_AD)//batch_size,
                            shuffle = False, epochs=50, callbacks=[callback])

In [None]:
# Plot training and validation accuracy per epoch
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
# Get number of epochs
epochs = range(len(acc))

plt.figure()
plt.plot(epochs, acc)
plt.plot(epochs, val_acc)
plt.title('Training and validation accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()

# Plot training and validation loss per epoch
plt.figure()
plt.plot(epochs, loss)
plt.plot(epochs, val_loss)
plt.title('Training and validation loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()


In [None]:
#saves the model
siamese_model.save('/content/drive/MyDrive/ColabNotebooks/model.h5')

In [None]:
#load the trained weights of the neural network
#siamese_model = tf.keras.saving.load_model("/content/drive/MyDrive/ColabNotebooks/model.h5")

In [None]:
# validate the model with the validate dataset
metrics_valid = siamese_model.evaluate(valid_images,steps = len(train_data_AD)//batch_size)
print('Loss of {} and Accuracy is {} %'.format(metrics_valid[0], metrics_valid[1] * 100))

In [None]:
# Test model with the test dataset
#define the order of the test images
test_data_order_AD = valid_order(list_test_AD)
test_data_order_NC = valid_order(list_test_NC)

#load test images
test_images = load_images_valid_generator(path_test_images_AD, path_test_images_NC, list_test_AD, list_test_NC, number_test_AD, number_test_NC,test_data_order_AD, test_data_order_NC,height, width, batch_size= batch_size)

#test the model with test images
metrics_test = siamese_model.evaluate(test_images,steps = len(list_test_AD)//batch_size)
print('Loss of {} and Accuracy is {} %'.format(metrics_test[0], metrics_test[1] * 100))

In [None]:
#create a copie of the list and shuffle only the AD2 and NC2 list
list_test_AD2 = list_test_AD[1:number_test_AD]
list_test_NC2 = list_test_NC[1:number_test_NC]
temp_AD2 = list_test_AD[0]
temp_NC2 = list_test_NC[0]
print(list_test_AD)
print(list_test_NC)
print(list_test_AD2)
print(list_test_NC2)
random.Random(42).shuffle(list_test_AD2)
random.Random(42).shuffle(list_test_NC2)
list_test_AD2.append(temp_AD2)
list_test_NC2.append(temp_NC2)

print(list_test_AD)
print(list_test_NC)
print(list_test_AD2)
print(list_test_NC2)

#counter variable
number_of_correct = 0
number_of_incorrect = 0

#test loop for accuracy
for a in range(0, number_test_AD//batch_size*batch_size, batch_size):
  print(a)
  #1 load 32 images
  test_image_chunk_AD1 = list_test_AD[a:a+batch_size]
  test_image_chunk_NC1 = list_test_NC[a:a+batch_size]
  test_image_chunk_AD2 = list_test_AD2[a:a+batch_size]
  test_image_chunk_NC2 = list_test_NC2[a:a+batch_size]
  print(test_image_chunk_AD1)
  print(test_image_chunk_NC1)
  print(test_image_chunk_AD2)
  print(test_image_chunk_NC2)
  test_image_AD1 = load_images(path_test_images_AD, test_image_chunk_AD1, height, width)
  test_image_NC1 = load_images(path_test_images_NC, test_image_chunk_NC1, height, width)
  test_image_AD2 = load_images(path_test_images_AD, test_image_chunk_AD2, height, width)
  test_image_NC2 = load_images(path_test_images_NC, test_image_chunk_NC2, height, width)

  testimage1_list = []
  testimage2_list = []
  label_test_list = []
  #2 create image pairs
  for b in range(batch_size):
    testimage1_list.append(test_image_AD1[b,:,:,:])
    testimage2_list.append(test_image_AD2[b,:,:,:])
    label_test_list.append(1)
    testimage1_list.append(test_image_AD1[b,:,:,:])
    testimage2_list.append(test_image_NC1[b,:,:,:])
    label_test_list.append(0)
    testimage1_list.append(test_image_NC1[b,:,:,:])
    testimage2_list.append(test_image_NC2[b,:,:,:])
    label_test_list.append(1)
    testimage1_list.append(test_image_NC2[b,:,:,:])
    testimage2_list.append(test_image_AD2[b,:,:,:])
    label_test_list.append(0)

  testimage1_list_array = np.array(testimage1_list)
  testimage2_list_array = np.array(testimage2_list)
  label_array = np.array(label_test_list)
  #3 make prediction
  test_pair = [testimage1_list_array, testimage2_list_array]
  float_formatter = "{:.2f}".format
  prediction = siamese_model.predict(test_pair, steps = 1)
  print(np.round(prediction[:,0],3),(label_array))
  #4 defines the classification of the image pair depending on the score
  prediction_label = []
  for c in prediction[:,0]:
    if c >= 0.50:
        prediction_label.append(1)
    else:
        prediction_label.append(0)

  prediction_label = np.array(prediction_label)
  print(prediction_label)
  text_prediction = []
  #5 compare prediction label with correct label
  for d in range(0,len(prediction_label)):
      if prediction_label[d] == label_array[d]:
        number_of_correct = number_of_correct + 1
        text_prediction.append("correct")
      else:
        number_of_incorrect = number_of_incorrect + 1
        text_prediction.append("incorrect")
  print(number_of_correct)
  print(number_of_incorrect)
  print(text_prediction)

In [None]:
#calculate the accuray test result
accuray_test = number_of_correct/(len(list_test_AD)*4)
print("The overall accuracy of the saimese network is %.2f."% (accuray_test))

In [None]:
#plot the result of the first ten results
for i in range(0,10):
  titel = "The prediciton of the saimese network is %0.f and the actual label is %.0f. \nAs a result the prediciton is %s."% (prediction_label[i],label_array[i],text_prediction[i])
  fig = plt.figure()
  plt.suptitle(titel)
  ax1 = fig.add_subplot(2,2,1)
  ax1.imshow(testimage1_list_array[i,:,:,:])
  ax2 = fig.add_subplot(2,2,2)
  ax2.imshow(testimage2_list_array[i,:,:,:])