## Sections:

1.   Import packages
2.   Load data
3.   Data preprocessing

    *   Standardization
    *   Resizing

4.   Data Augmentation

    *   Rotate 180°
  
5.   Neural Network
    
    *   AlexNet
    *   ResNet

6.   Running the code

# Import packages


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random 
from os import listdir
from os.path import isfile, join
import glob
from PIL import Image
from pathlib import Path
import tensorflow as tf
import tensorflow_hub as hub 
from tensorflow import keras
import time
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, ZeroPadding2D,\
     Flatten, BatchNormalization, AveragePooling2D, Dense, Activation, Add 
from tensorflow.keras.models import Model
from tensorflow.keras import activations
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
import datetime

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

# Load data


In [None]:
def open_image(file_name):
  #input:   String of the full path of the image that is to be loaded
  #output:  Tensor (float32) of the loaded image
    
    return tf.convert_to_tensor(np.array(Image.open(file_name)), dtype = tf.float32)

In [None]:
def get_file_list(input_dir = '', test_predict = False):
  #input:   String of the input dir which should contain the folders [Chondrocytes, Stemcells, Test]
  #output:  A list of all the full paths of the images inside the folders
  #info:    If test_predict = True the list of the test images is loaded and returned.
  #         If not the "normal" file list of the Stemcell and Chondrocyte images is loaded and returned
    
    if test_predict:
        file_list_test_predict = glob.glob(input_dir + 'Test//Test-both//*')
        
        return file_list_test_predict
    
    else:

        file_list_chondrocytes = glob.glob(input_dir + 'Chondrocytes//*')
        file_list_stemcells = glob.glob(input_dir + 'Stemcells//*')
        file_list_all = file_list_chondrocytes
        for f in file_list_stemcells:
            file_list_all.append(f)

        np.random.seed(13)
        np.random.shuffle(file_list_all)

        return file_list_all

In [None]:
def compute_is_stemcell(filename):
  #input:   String of the full path of an image
  #output:  Tensor containing the information if the image is a Stemcell (1) or Chondrocyte (0)

    if 'Stemcell' in filename:
        label = np.array([1])
    else:
        label = np.array([0])
        
    return tf.convert_to_tensor(label, dtype = tf.float32)

# Preprocessing

In [None]:
def data_preprocessing(data, data_standardization_switch = False, grayscale_switch = False, edge_detection_switch = False):
  #input:   Tensor of the raw data of an image; Boolean information of which proprecessing methods are to be applied to the data
  #output:  Tensor of the preprocessed data of an image
  
    if grayscale_switch:
        data = rgb_to_grayscale(data)
        
    if edge_detection_switch:
        data = edge_detection(data)
        
    if data_standardization_switch:
        data = data_standardization(data)
        
    return data

In [None]:
def edge_detection(data):
  #input:   Tensor of the data of an image
  #output:  Tensor of the edge detection data of an image
    
    return tf.convert_to_tensor(tf.image.sobel_edges(tf.convert_to_tensor(np.expand_dims(data.numpy(), axis=0)))[0,:,:,:,0], dtype = tf.float32)

In [None]:
def data_standardization(data):
  #input:   Tensor of the data of an image
  #output:  Tensor of the standardized data of an image

    return tf.convert_to_tensor(tf.image.per_image_standardization(data), dtype = tf.float32)

In [None]:
def rgb_to_grayscale(data):
  #input:   Tensor of the data of an image
  #output:  Tensor of the grayscale data of an image
    
    rgb_data = tf.image.rgb_to_grayscale(data).numpy()[:,:,0]
    rgb_3D = np.transpose(np.array((rgb_data, rgb_data, rgb_data)), (1,2,0))
    return tf.convert_to_tensor(rgb_3D, dtype = tf.float32)

In [None]:
def print_preprocessing_progress(f, number_of_files):
  #input:   Integer of the current position in the "for" loop of going through the file list; Integer of the number of files inside the file list.
  #output:  None
  #info:    Prints the progress of the preprocessing (e.g. "Picture 4/19 processed.")
  
    print('Picture', str(f+1) + '/' + str(number_of_files),'processed.')
    
    return

In [None]:
def resize_image(data, x_shape, y_shape):
  #input:   Tensor of the data of an image; desired shape of the image (x and y)
  #output:  Tensor of the resized data of an image
    
    return tf.convert_to_tensor(tf.image.resize(data, [x_shape, y_shape]), dtype = tf.float32)

In [None]:
def concatenate_data(dataset, image, label = None, augmented = False, test_predict = False):  
  #input:   tf.data.Dataset of the already preprocessed images; Tensor of the data of an image(s); Booleans containing information of the current loop iteration.
  #output:  tf.data.Dataset of the already preprocessed images and the currently as "image" passed image(s).
    
    if test_predict:
        data = tf.data.Dataset.from_tensors((image.numpy()))

        if not dataset:
            dataset = data
        else:
            dataset = dataset.concatenate(data)
            
    else:
    
        if augmented:     

            for i in range(tf.shape(image)[0]):
                data = tf.data.Dataset.from_tensors((tf.convert_to_tensor(image, dtype = tf.float32).numpy()[i], 
                                                     tf.convert_to_tensor(label, dtype = tf.float32).numpy()[i]))

                if not dataset:
                    dataset = data
                else:
                    dataset = dataset.concatenate(data)

        else:
            data = tf.data.Dataset.from_tensors((image.numpy()[0], label.numpy()))

            if not dataset:
                dataset = data
            else:
                dataset = dataset.concatenate(data)

    return dataset

## Data augmentation


In [None]:
 def data_augmentation(data, label, progress,
                      rotate_180_switch = False, mirroring_switch = False, zoom_switch = False,
                      rotate_180_rate = 0.2, mirroring_rate = 0.2,
                      zoom_rate = 0.2, zoom_factor = 2):
  #input:   Tensor of the data of an image; Tensor of the label of this image; Float of the progress of the loop;
  #         Booleans containing the information on which data augmentation methods are to be applied to the data
  #         Floats contating the information of, on how much of the data the augmentation methods are to be applied.
  #output:  Tensor containing the data of the original image and the augmented image(s);
  #         Tensor containing the label of the original image and the augmented image(s).

    data_new = []
    label_new = []
    data_new.append(data)
    label_new.append(label)
    augmented = False
    
    if rotate_180_switch and (progress <= rotate_180_rate):
        data_rotated_180 = rotate_180(data)
        data_new.append(data_rotated_180)
        label_new.append(label)
        augmented = True
        
    if mirroring_switch and (progress <= mirroring_rate):
        data_mirrored = mirroring(data)
        data_new.append(data_mirrored)
        label_new.append(label)
        augmented = True
        
    if zoom_switch and (progress <= zoom_rate):
        data_zoomed = zoom(data, zoom_factor)
        for i in range(tf.shape(data_zoomed)[0]):
            data_new.append(data_zoomed[i])
            label_new.append(label)
        augmented = True
            
    return tf.convert_to_tensor(data_new, dtype = tf.float32), tf.convert_to_tensor(label_new, dtype = tf.float32), augmented

In [None]:
def zoom(data, zoom_factor = 2):
  #input:   Tensor of the data of an image; zooming factor (e.g. =2 -> 4 images created; =4 -> 16 images created; =1.5 -> 1 image created)
  #output:  Tensor of the data of the zoomed image(s)

    x_dim = tf.shape(data)[0]
    y_dim = tf.shape(data)[1]
    data_zoomed = []
    
    for i in range(zoom_factor):
        for j in range(zoom_factor):
            snippet = data[int((i*x_dim/zoom_factor)):int((i+1)*x_dim/zoom_factor),
                                             int((j*y_dim/zoom_factor)):int((j+1)*y_dim/zoom_factor)]
            snippet_original_dims = tf.image.resize(snippet, (x_dim, y_dim))
            data_zoomed.append(snippet_original_dims)

    return tf.convert_to_tensor(data_zoomed, dtype = tf.float32)

In [None]:
def mirroring(data):
  #input:   Tensor of the data of an image
  #output:  Tensor of the data of a mirrored image
    
    return tf.convert_to_tensor(tf.image.flip_left_right(data), dtype = tf.float32)

In [None]:
def rotate_180(data):
  #input:   Tensor of the data of an image
  #output:  Tensor of the data of a rotated (by 180°) image
        
    return tf.convert_to_tensor(tf.image.rot90(data, k=2), dtype = tf.float32)

In [None]:
def graph_pict(data,is_stemcell):
  #Displays a plot of 25 images and their labels.
  #input: data=Tensorflow object, is_stemcell=Tensorflow object
  
    plt.figure(figsize=(10,10))
    labels=tf.experimental.numpy.uint64(is_stemcell)
    title=[]
    for i in range(25):
        ax = plt.subplot(5,5,i+1)
        image=AUG_data[i]
        #image=tf.squeeze(AUG_data[i], axis=-1)
        plt.imshow(image)
        title.append(str(labels[i]))
        plt.title(title[i])
        plt.axis("off")
    return

# Neural networks

In [None]:
def runNet(data, aug_size, data_test_predict=[], epochs=30, batch_size=10, testsize=0.15, kfold=5, learn_rate=0.001, early_stopping_patience=20, shape=[227,227,3], ResNet=True, AlexNet=False):
    # This function sets up the test and kfold training data. Additionaly some monitoring functions are added to stop overfitting and enable early stopping as well as saving the model.
    # Afterwards in each loop iteration the model is trained with the set CNN-architecture and the respective training and validation data (kfold).
    # In addition, after the training of the CNN, the test data is evaluated. Optionally, an additional test set can also be predicted.
    
    #Number of folds specified
    num_folds = kfold
    
    #Get test size
    test_size = int(aug_size*(np.ceil(testsize*len(data)/aug_size)))
    
    #Include callbacks
    filepath='weights.best.hdf5'
    checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') # Callback to save the Keras model or model weights 
    earlystopper = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=early_stopping_patience, verbose=1,mode='min') # Early stopping function to prevent overfitting
    callback = [checkpointer, earlystopper]

    #Split in test and train data
    test = data.take(test_size)
    train = data.skip(test_size)
    
    #Get size
    foldsize = int(len(train)/num_folds)
    train_size = tf.data.experimental.cardinality(train).numpy()
    test_size = tf.data.experimental.cardinality(test).numpy()
    print("Training data size:", int(train_size *(kfold-1)/kfold))
    print("Test data size:", test_size)
    print("Validation data size:", int(train_size /kfold))
    
    #Shuffle test data and divide into batches
    test = test.shuffle(buffer_size = test_size, seed = 13)
    test = test.batch(batch_size=batch_size, drop_remainder=True)
    
    #Iterate over folds
    for fold in range(num_folds):
        
        print("\n{}".format("Fold number"),fold+1)
        
        #Define model
        if ResNet:
            model = resnet50(shape)
        if AlexNet:
            model = alexNet()
        #model.summary()
        
        #Compile model
        model.compile(loss='binary_crossentropy', optimizer=tf.optimizers.SGD(lr=learn_rate), metrics=['accuracy'])
        
        #Split training data in training and validation data for current fold 
        valfold = train.take(foldsize*(fold+1))
        valfold = valfold.skip(foldsize*fold)

        trainfold1 = train.take(foldsize*fold)
        trainfold2 = train.skip(foldsize*(fold+1))
        trainfold = trainfold1.concatenate(trainfold2)

        #Shuffle data  
        trainfold = trainfold.shuffle(buffer_size = train_size, seed = 13)
        valfold = valfold.shuffle(buffer_size = train_size, seed = 13)
        
        #Divide into batches
        trainfold = trainfold.batch(batch_size=batch_size, drop_remainder=True)
        valfold = valfold.batch(batch_size=batch_size, drop_remainder=True)

        #Train network
        model.fit(trainfold,
                epochs=epochs,
                validation_data=valfold,
                validation_freq=1,
                callbacks = callback)

        #Evaluate test data
        model.evaluate(test)
        
        #Print prediction of the additional test data 
        if not (len(data_test_predict) == 0):
            prediction = model.predict(data_test_predict.batch(batch_size=1))
            print("\n{}".format("Predictions for the additional test set:"))
            print(prediction)
  
    return model,test,train

In [None]:
def get_run_logdir(root_logdir):
    #Input:   Directory where all TensorBoard files are stored
    #Output:  The location of the exact directory that is named according to the current time the training phase starts
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

AlexNet

In [None]:
def alexNet():
    #This function defines the AlexNet model and returns it
    
    model = keras.models.Sequential([
        keras.layers.Conv2D(filters=96, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=(227,227,3)),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
        keras.layers.Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
        keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
        keras.layers.Flatten(),
        keras.layers.Dense(4096, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(4096, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    
    return model

In [None]:
def AlexNet(data, data_test_predict = [], epochs = 10, batch_size=10, train_split_rate = 0.7, learn_rate = 0.0001):
    
    # Split in train, test and validation
    print(data)
    a = int(len(data) * train_split_rate)
    train = data.take(a)
    data = data.skip(a)
    b = int(len(data)/2)
    test = data.take(b)
    validation = data.skip(b)
    
    #Shuffle data  
    train = train.shuffle(buffer_size = 100000, seed = 13)
    test = test.shuffle(buffer_size = 100000, seed = 13)
    validation = validation.shuffle(buffer_size = 100000, seed = 13)
    
    print(train)

    #Get size
    train_size = tf.data.experimental.cardinality(train).numpy()
    test_size = tf.data.experimental.cardinality(test).numpy()
    validation_size = tf.data.experimental.cardinality(validation).numpy()
    print("Training data size:", train_size)
    print("Test data size:", test_size)
    print("Validation data size:", validation_size)
    
    #Divide into batches
    train = train.batch(batch_size=batch_size, drop_remainder=True)
    test = test.batch(batch_size=1, drop_remainder=True)
    validation = validation.batch(batch_size=1, drop_remainder=True)
    
    #Include callback TensorBoard
    root_logdir = os.path.join(os.curdir, "logs//fit//")
    run_logdir = get_run_logdir(root_logdir)
    tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
    earlystopper = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, verbose=1,mode='min') # Early stopping function to prevent overfitting

    
    #Define model
    model = keras.models.Sequential([
        keras.layers.Conv2D(filters=96, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=(227,227,3)),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
        keras.layers.Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
        keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
        keras.layers.Flatten(),
        keras.layers.Dense(4096, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(4096, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    
    #Compile model
    model.compile(loss='binary_crossentropy', optimizer=tf.optimizers.SGD(lr=learn_rate), metrics=['accuracy'])
    model.summary()
    
    #Train network
    model.fit(train,
              epochs=epochs,
              validation_data=validation,
              validation_freq=1,
              callbacks=[tensorboard_cb, earlystopper])
    
    #Evaluate test data
    model.evaluate(test)
    
    if not (len(data_test_predict) == 0):
        prediction = model.predict(data_test_predict.batch(batch_size=1))
        print(prediction)
        
    return

ResNet

In [None]:
def res_identity(x, filters):     
    #resnet identity block  

    x_skip = x # this will be used for addition with the residual block 
    f1, f2 = filters

    #first block 
    x = Conv2D(f1, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_regularizer=l2(0.001))(x)
    x = BatchNormalization()(x)
    x = Activation(activations.relu)(x)

    #second block # bottleneck (but size kept same with padding)
    x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same', kernel_regularizer=l2(0.001))(x)
    x = BatchNormalization()(x)
    x = Activation(activations.relu)(x)

    # third block activation used after adding the input
    x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_regularizer=l2(0.001))(x)
    x = BatchNormalization()(x)
    # x = Activation(activations.relu)(x)

    # add the input 
    x = Add()([x, x_skip])
    x = Activation(activations.relu)(x)

    return x

In [None]:
def res_conv(x, s, filters):
    # resnet convolutional building block

    x_skip = x  # this will be used for addition with the residual block 
    f1, f2 = filters

    # first block
    x = Conv2D(f1, kernel_size=(1, 1), strides=(s, s), padding='valid', kernel_regularizer=l2(0.001))(x)
    x = BatchNormalization()(x)
    x = Activation(activations.relu)(x)

    # second block
    x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same', kernel_regularizer=l2(0.001))(x)
    x = BatchNormalization()(x)
    x = Activation(activations.relu)(x)

    #third block
    x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_regularizer=l2(0.001))(x)
    x = BatchNormalization()(x)

    # shortcut 
    x_skip = Conv2D(f2, kernel_size=(1, 1), strides=(s, s), padding='valid', kernel_regularizer=l2(0.001))(x_skip)
    x_skip = BatchNormalization()(x_skip)

    # add 
    x = Add()([x, x_skip])
    x = Activation(activations.relu)(x)

    return x

In [None]:
def resnet50(shape = [227, 227, 3]):
      
    # construct resnet 50
    input_im = Input(shape=(shape[0], shape[1], shape[2])) # image size
    x = ZeroPadding2D(padding=(3, 3))(input_im)

    # 1st stage
    # maxpooling
    x = Conv2D(64, kernel_size=(7, 7), strides=(2, 2))(x)
    x = BatchNormalization()(x)
    x = Activation(activations.relu)(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    #2nd stage 
    # from here on only conv block and identity block
    x = res_conv(x, s=1, filters=(64, 256))
    x = res_identity(x, filters=(64, 256))
    x = res_identity(x, filters=(64, 256))

    # 3rd stage
    x = res_conv(x, s=2, filters=(128, 512))
    x = res_identity(x, filters=(128, 512))
    x = res_identity(x, filters=(128, 512))
    x = res_identity(x, filters=(128, 512))

    # 4th stage
    x = res_conv(x, s=2, filters=(256, 1024))
    x = res_identity(x, filters=(256, 1024))
    x = res_identity(x, filters=(256, 1024))
    x = res_identity(x, filters=(256, 1024))
    x = res_identity(x, filters=(256, 1024))
    x = res_identity(x, filters=(256, 1024))

    # 5th stage
    x = res_conv(x, s=2, filters=(512, 2048))
    x = res_identity(x, filters=(512, 2048))
    x = res_identity(x, filters=(512, 2048))

    # Average pooling and dense connection at the end
    x = AveragePooling2D((2, 2), padding='same')(x)

    x = Flatten()(x)
    x = Dense(1, activation='sigmoid', kernel_initializer='he_normal')(x) #binary classification

    # define the model 
    model = Model(inputs=input_im, outputs=x, name='Resnet50')

    return model

In [None]:
def save_model(model, suffix=None):
    """
    Saves a given model in a models directory and appends a suffix (str)
    for clarity and reuse.
    """
    # Create model directory with current time
    modeldir = os.path.join("drive/My Drive/Data/models",
                            datetime.datetime.now().strftime("%Y%m%d-%H%M%s"))
    model_path = modeldir + "-" + suffix + ".h5" # save format of model
    print(f"Saving model to: {model_path}...")
    model.save(model_path)
    return model_path

In [None]:
def load_model(model_path):
    """
    Loads a saved model from a specified path.
    """
    print(f"Loading saved model from: {model_path}")
    model = tf.keras.models.load_model(model_path,
                                        custom_objects={"KerasLayer":hub.KerasLayer})
    return model

In [None]:
def unbatchify(data,test=False):
    """
    Unbatch a data set and returns a numpy array of images and labels
    """
    images = []
    labels =[]
    #Loop to unbatch data
    if not test:
        for image,label in data.unbatch().as_numpy_iterator():
            images.append(image)
            labels.append(label)
        return images, labels
    else:
        for image,label in data.unbatch().as_numpy_iterator():
            label=label[0]
            images.append(image)
            labels.append(label)
        return images,labels

In [None]:
def runResNet(data, epochs, batch_size, shape = [227, 227, 3], learn_rate = 0.0001, early_stopping_patience = 20, testsize=0.2, kfold=5):

    num_samples = len(data) # number of sample images
    num_folds = kfold   #number of folds specified

    test = data.take(int(testsize*num_samples)) # testdataset
    train = data.skip(int(testsize*num_samples)) # training dataset including validation data
    
    foldsize = int(len(train)/num_folds)  # number of images in one fold

    #Get size for printing
    train_size = tf.data.experimental.cardinality(train).numpy()
    test_size = tf.data.experimental.cardinality(test).numpy()
    #validation_size = tf.data.experimental.cardinality(validation).numpy()
    print("Training data size:", int(train_size *(kfold-1)/kfold))
    print("Test data size:", test_size)
    print("Validation data size:", int(train_size /kfold))
    
    test = test.shuffle(buffer_size = test_size, seed = 13) #shuffles the testdataset
    test = test.batch(batch_size=batch_size, drop_remainder=True)   #batch the testdataset

    # iterate over folds
    for fold in range(num_folds):

      print('Fold number', fold+1)

      model = resnet50(shape) # build model
      model.compile(loss='binary_crossentropy', optimizer=tf.optimizers.SGD(lr=learn_rate), metrics=['accuracy']) # compile model
      #model.summary()

      # split training data in training and validation data for current fold
      valfold = train.take(foldsize*(fold+1))
      valfold = valfold.skip(foldsize*fold)
      trainfold1 = train.take(foldsize*fold)
      trainfold2 = train.skip(foldsize*(fold+1))
      trainfold = trainfold1.concatenate(trainfold2)

      #Shuffle data  in folds
      trainfold = trainfold.shuffle(buffer_size = train_size, seed = 13)
      valfold = valfold.shuffle(buffer_size = train_size, seed = 13)
      
      # batch data
      trainfold = trainfold.batch(batch_size=batch_size, drop_remainder=True)
      valfold = valfold.batch(batch_size=batch_size, drop_remainder=True)

      #Early stopping callback
      earlystopper = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=early_stopping_patience, verbose=1,mode='min') # Early stopping function to prevent overfitting

      #Train network
      model.fit(trainfold,
                epochs=epochs,
                validation_data=valfold,
                validation_freq=1, callbacks = [earlystopper])

      #Evaluate test data
      model.evaluate(test)
  
    return model, test, train

# Running the code

In [None]:
def run_preprocessing(input_dir, x_shape = 227, y_shape = 227,
                      data_standardization_switch = False, grayscale_switch = False, edge_detection_switch = False,
                      rotate_180_switch = False, mirroring_switch = False, zoom_switch = False,
                      rotate_180_rate = 1, mirroring_rate = 1,
                      zoom_rate = 1, zoom_factor = 2, test_predict = False):
    
    #Geting a list of all files in the input directory
    filenames = get_file_list(input_dir) 
    
    #Initializing the dataset
    dataset = False

    for f in range(len(filenames)):
        
        #Printing the progress while running the preprocessing
        print_preprocessing_progress(f, len(filenames))
        
        #Loading the current file
        image = open_image(filenames[f])
        
        #Computing the label (is a stemcell (1) or not (0))
        label = compute_is_stemcell(filenames[f])

        #Preprocessing the image
        image = data_preprocessing(image, data_standardization_switch, grayscale_switch, edge_detection_switch)

        #Applying the data augmentation methods
        image, label, augmented = data_augmentation(image, label, f/len(filenames), rotate_180_switch, 
                                                    mirroring_switch, zoom_switch, rotate_180_rate, 
                                                    mirroring_rate, zoom_rate, zoom_factor)

        #Resizing the data to the requested shape
        image = resize_image(image, x_shape, y_shape)

        #Get number of augmentations + original picture 
        aug_size = int(tf.shape(image)[0])
            
        #Concatenating the input data to a tf.ConcatenateDataset
        dataset = concatenate_data(dataset, image, label, augmented)  
        
# -------------------------------------------------------------------------------------------------------- #
        
    if test_predict:
        #Geting a list of all testing files in the input directory
        filenames_test_predict = get_file_list(input_dir, test_predict = test_predict)
        
        #Initializing the dataset
        dataset_test_predict = False
        
        for f in range(len(filenames_test_predict)):
        
            #Printing the progress while running the preprocessing
            print_preprocessing_progress(f, len(filenames_test_predict))

            #Loading the current file
            image = open_image(filenames[f])

            #Preprocessing the image
            image = data_preprocessing(image, data_standardization_switch, grayscale_switch, edge_detection_switch)

            #Resizing the data to the requested shape
            image = resize_image(image, x_shape, y_shape)

            #Concatenating the input data to a tf.ConcatenateDataset
            dataset_test_predict = concatenate_data(dataset_test_predict, image, test_predict = test_predict)  
    
        return dataset, aug_size, dataset_test_predict
    
# -------------------------------------------------------------------------------------------------------- #
    
    else:
        return dataset, aug_size

In [None]:
input_dir = '/content/drive/MyDrive/Colab Notebooks/Pictures_Project_B/'
data, aug_size = run_preprocessing(input_dir, x_shape = 227, y_shape = 227,
                      data_standardization_switch = True, grayscale_switch = False, edge_detection_switch = False,
                      rotate_180_switch = True, mirroring_switch = False, zoom_switch = False,
                      rotate_180_rate = 1, mirroring_rate = 1,
                      zoom_rate = 1, zoom_factor = 2, test_predict = False)

In [None]:
AlexNet(data, epochs = 30, batch_size = 10,  train_split_rate = 0.7)

In [None]:
model,test,train = runResNet(data, epochs = 30, batch_size = 20)

In [None]:
model, test, train = runNet(data, aug_size, data_test_predict=[], epochs=100, batch_size=10, testsize=0.15, kfold=5, learn_rate=0.001, early_stopping_patience=20, shape=[227,227,3], ResNet=True, AlexNet=False)

In [None]:
save_model(model, suffix="Resnet-Full_Dataset-NoAug20-3")

In [None]:
full_set=load_model("drive/My Drive/Data/models/20210126-08491611650999-Resnet-Full_Dataset-NoAug20-3.h5")
predictions=full_set.predict(test,batch_size=40, verbose=1)

In [None]:
full_set.evaluate(test)

In [None]:
xtest,ytest= unbatchify(test,test=True)

In [None]:
xtest=np.asarray(xtest)
ytest=np.asarray(ytest)

In [None]:
predictions=tf.squeeze(predictions)

In [None]:
df=pd.DataFrame({"Pred":predictions,"Labels":ytest})

In [None]:
df['Pred'] = np.where((df['Pred'] >= 0.5),1,0)

In [None]:
df['Result'] = np.where((df['Pred'] == df['Labels']),1, np.nan)

In [None]:
df

In [None]:
df.count()

In [None]:
def create_dataset(input_dir):
    
    filenames = get_file_list(input_dir)
    
    counter = 0
    
    for file in filenames:
    
        counter += 1
        print(counter,'von',len(filenames))
        
        image = np.array(Image.open(file))
        
        #image = preprocess(image)
        #image = tf.image.rgb_to_grayscale(image)
        
        if 'Stemcell' in file:
            label = np.array([1])
        else:
            label = np.array([0])
        
        #print(image)
        #print(image.shape)
        #print(label.shape)
        data = tf.data.Dataset.from_tensors((image,label))
        
        if counter == 1:
            dataset = data
        else:
            dataset = dataset.concatenate(data)
            #dataset = tf.data.Dataset.zip((dataset,data))

    #dataset = tf.data.TFRecordDataset(filenames)
    
    
    return dataset

In [None]:
def create_dataset_s(input_dir):
    
    filenames = get_file_list(input_dir)
    
    counter = 0
    
    for file in filenames:
    
        counter += 1
        print(counter,'von',len(filenames))
        
        image = np.array(Image.open(file))
        
        image = preprocessing(image)
        #tf.image.rgb_to_grayscale(image)
        
        if 'Stemcell' in file:
            label = np.array([1])
        else:
            label = np.array([0])
        
        #print(image)
        #print(image.shape)
        #print(label.shape)
        #image, label = data_augmentation180(image,label)
        image, label = slice_into_x_y(image, label, x = 227, y = 227)
        
        data = tf.data.Dataset.from_tensor_slices((image,label))
        
        if counter == 1:
            dataset = data
        else:
            dataset = dataset.concatenate(data)
            #dataset = tf.data.Dataset.zip((dataset,data))

    #dataset = tf.data.TFRecordDataset(filenames)
    
    
    return dataset

In [None]:
create_dataset_s(input_dir="/content/gdrive/MyDrive/Colab Notebooks/Pictures_Project_B/")

In [None]:
input_dir = '/content/gdrive/MyDrive/Colab Notebooks/Pictures_Project_B/'

datadir = input_dir + 'dataset/'

data = create_dataset_s(input_dir)

tf.data.experimental.save(data,datadir)

In [None]:
# for loading the dataset after saving it
# you might have to adjust the shape depending on how you slice the data
elspec = (tf.TensorSpec(shape=(1920, 2560, 3), dtype=tf.uint8, name=None),tf.TensorSpec(shape=(1,), dtype=tf.int32, name=None))
data = tf.data.experimental.load(datadir, elspec)
