In [None]:
from tensorflow import keras
from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow.keras import layers
import os
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix,f1_score,recall_score,precision_score,classification_report

In [None]:
#some parameters
IMG_PATH = 'train_neoplastic'
IMG_HEIGHT = 512
IMG_WIDTH = 512
IMG_CHANNELS = 3
num_folds = 10  #kfold
BATCH_SIZE = 4

# Reads the image file path and divides it into train, validation, and test

In [None]:
import random,os
def get_images_path_list(data_type):
    images_path_list= [os.path.join(root, name)
                       for root, dirs, files in os.walk(os.path.join(IMG_PATH,data_type))
                       for name in files]
    random.shuffle(images_path_list) 
    return images_path_list

In [None]:
SSA_images_path_list=get_images_path_list('neoplastic')
len(SSA_images_path_list)

In [None]:
TA_images_path_list=get_images_path_list('non_neoplastic')
len(TA_images_path_list)

In [None]:
train_images_path_list = SSA_images_path_list+TA_images_path_list
len(train_images_path_list)

# Create image features

In [None]:
from PIL import Image
import numpy as np

In [None]:
def imagelist_to_np_array(images_path_list):
    img_num=len(images_path_list)
    img_np_array = np.ndarray((img_num,
                               IMG_HEIGHT, 
                               IMG_WIDTH,
                               3), dtype=np.uint8)
    for i, image_file_path in enumerate(images_path_list):
        #print(image_file_path)
        img = Image.open(image_file_path) 
        img = img.resize((IMG_HEIGHT , IMG_WIDTH), Image.BILINEAR )
        img_np_array[i] = img
    return img_np_array

In [None]:
x_train_image = imagelist_to_np_array(train_images_path_list)
x_train_normalize = x_train_image/255
print('train:',x_train_normalize.shape)

# Create label tags

In [None]:
def prepare_label(images_path_list):
    y_label = []
    for file_path in images_path_list:
        if 'neoplastic' in file_path:
            y_label.append(1)
        else:
            y_label.append(0)
    return np.array(y_label)

In [None]:
y_train_label =prepare_label(train_images_path_list)
print('train label:',len(y_train_label) )

# kfold

In [None]:
num_folds = 10

In [None]:
kfold = KFold(n_splits=num_folds, shuffle=True)

In [None]:
# Define per-fold score containers
acc_per_fold = []
loss_per_fold = []

In [None]:
def load_pretraind_model(model_name):
    base_model=0
    size=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
    if model_name=="Xception":
        base_model = keras.applications.Xception(
            weights="imagenet",  # Load weights pre-trained on ImageNet.
            input_shape=size,
            include_top=False
        )
    elif model_name=="InceptionResNetV2":
        base_model = keras.applications.InceptionResNetV2(
          weights="imagenet",  # Load weights pre-trained on ImageNet.
          input_shape=size,
          include_top=False
        )
    elif model_name=="ResNet101":
        base_model = keras.applications.ResNet101(
        weights="imagenet",  # Load weights pre-trained on ImageNet.
        input_shape=size,
        include_top=False
        )
    elif model_name=="EfficientNetB4":
        base_model = keras.applications.EfficientNetB4(
        weights="imagenet",  # Load weights pre-trained on ImageNet.
        input_shape=size,
        include_top=False
        )
    return base_model 

In [None]:
#model_names=["Xception","ResNet101","NASNetLarge","EfficientNetB4","InceptionResNetV2"]

In [None]:
model_names=["InceptionResNetV2"]

# perform 10-fold in model_names

In [None]:
for model_name in model_names:
    # Define per-fold score containers
    acc_per_fold = []
    loss_per_fold = []
    sensitivity_per_fold=[]
    specificity_per_fold=[]
    F1_per_fold=[]
    
    fold_no = 1

    for train, test in kfold.split(x_train_normalize, y_train_label):
        
        #data preparation############################################################
        Ori_train, Ori_val, Ori_train_label, Ori_val_label = train_test_split(x_train_normalize[train], y_train_label[train], test_size=0.1, random_state=42)
        
        #data augmentation
        datagen = keras.preprocessing.image.ImageDataGenerator(
                rotation_range=10,  
                width_shift_range=0.2,  
                height_shift_range=0.2,  
                horizontal_flip=True,  
                vertical_flip=False) 
        
        datagen = keras.preprocessing.image.ImageDataGenerator()
        datagen.fit(Ori_train)
        
        data_flow_train=datagen.flow(
            x=Ori_train,
            y=Ori_train_label,
            batch_size=BATCH_SIZE
        )

        datagen_val = keras.preprocessing.image.ImageDataGenerator()
        datagen_val.fit(Ori_val)
        
        data_flow_val=datagen.flow(
            x=Ori_val,
            y=Ori_val_label,
            batch_size=BATCH_SIZE
        )
        
        #define_model#########################################################
        
        base_model = load_pretraind_model(model_name)

        # Do not include the ImageNet classifier at the top.
        # Freeze the base_model
        base_model.trainable = False

        model=tf.keras.Sequential([
            base_model,
            layers.GlobalAveragePooling2D(),
            layers.Dense(1,activation='sigmoid')
        ])

        model.compile(
          optimizer=keras.optimizers.Adam(),
          loss=keras.losses.BinaryCrossentropy(),
          metrics=['accuracy'],
        )

        #callbacks############################################################
        
        early_stopping = keras.callbacks.EarlyStopping(patience=15, verbose=1)
        
        filepath="SaveModel\\"+model_name+"\\"+model_name+str(fold_no)+".h5"
        os.makedirs("SaveModel\\"+model_name,exist_ok=True)
        
        checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')  
        
        ######################################################################

        train_history=model.fit_generator(
            data_flow_train,
            steps_per_epoch=Ori_train.shape[0]//BATCH_SIZE,
            epochs=20, 
            #callbacks=[early_stopping,checkpoint], 
            #validation_split=0.2,
            validation_data=(data_flow_val),
            verbose=0
        )                     
        
        
        
        # Generate generalization metrics
        scores = model.evaluate(x_train_normalize[test], y_train_label[test], verbose=0)
        print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
        acc_per_fold.append(scores[1] * 100)
        loss_per_fold.append(scores[0])
        
        #predict test trun to 0,1 and the confusion matrix
        y_pred=(model.predict(x_train_normalize[test]) > 0.5).astype("int32")
        #ssa is 1 ta is 0
        confusion=confusion_matrix(y_train_label[test], y_pred)
        tn, fp, fn, tp = confusion.ravel()
        sensitivity=tp/(tp+fn)
        specificity=tn/(tn+fp)
        F1=f1_score(y_train_label[test], y_pred)
 

        sensitivity_per_fold.append(sensitivity)
        specificity_per_fold.append(specificity)
        F1_per_fold.append(F1)


        # Increase fold number
        fold_no = fold_no + 1
    
    # print all ten fold result
    print('------------------------------------------------------------------------')
    print('Score per fold')
    for i in range(0, len(acc_per_fold)):
        print('------------------------------------------------------------------------')
        print(f'> Fold {i+1} - Loss: {round(loss_per_fold[i],2)} - Accuracy: {round(acc_per_fold[i],2)}% - Sensitivity: {round(sensitivity_per_fold[i],2)} - Specificity: {round(specificity_per_fold[i],2)} - F1: {round(F1_per_fold[i],2)}')
    print('------------------------------------------------------------------------')
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print(f'> Sensitivity: {np.mean(sensitivity_per_fold)} (+- {np.std(sensitivity_per_fold)})')
    print(f'> Specificity: {np.mean(specificity_per_fold)} (+- {np.std(specificity_per_fold)})')
    print(f'> F1: {np.mean(F1_per_fold)} (+- {np.std(F1_per_fold)})')
    print(f'> Loss: {np.mean(loss_per_fold)}')
    print('------------------------------------------------------------------------')

# load model

In [None]:
#change your model path => filepath
model.load_weights(filepath)

In [None]:
scores = model.evaluate(x_test_normalize,y_test_label, verbose=1)
scores[1]

# predict & plot

In [None]:
prediction=model.predict_classes(x_test_normalize)

In [None]:
prediction[:10]

In [None]:
import matplotlib.pyplot as plt
label_dict={0:"ta",1:"ssa"}
def plot_images_labels_prediction(images,labels,prediction,
                                  idx=0,num=140):
    fig = plt.gcf()
    fig.set_size_inches(12, 14)
    if num>25: num=25 
    for i in range(0, num):
        ax=plt.subplot(5,5, 1+i)
        ax.imshow(images[idx],cmap='binary')
        title=label_dict[labels[idx]]
        if len(prediction)>0:
            title+='=>'+label_dict[prediction[idx][0]]
        ax.set_title(title,fontsize=10) 
        ax.set_xticks([]);ax.set_yticks([])        
        idx+=1 
    plt.show()

In [None]:
plot_images_labels_prediction(x_test_normalize,y_test_label,prediction,idx=20)