# Breast Cancer Histology Imaging
mkfold.py -- from: https://web.inf.ufpr.br/vri/databases/breast-cancer-histopathological-database-breakhis/
(Spanhol et al., 2016)

In [None]:
# Run only one time!

# %run mkfold.py

# Keras Modeling
Processing and SimpleCNN models adapted from: https://www.analyticsvidhya.com/blog/2020/10/create-image-classification-model-python-keras/

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

import cv2
import os

import numpy as np
import pandas as pd

import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout 
from tensorflow.keras.layers import GlobalAveragePooling2D ,BatchNormalization
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizer_v1 import Adam
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import TensorBoard
import tensorflow as tf

import warnings
warnings.filterwarnings("ignore")

In [None]:
## Labeling multiclass classification -- 8 classes
img_size = 224
def get_data_MC(data_dir):
    data = [] 
    path = data_dir
    for img in os.listdir(path):
        name1 = img.split("-")
        name2 = name1[0].split("_")
        label = name2[2]

        if label == 'A':
            class_num = 0
        elif label == 'F':
            class_num = 1
        elif label == 'PT':
            class_num = 2
        elif label == 'TA':
            class_num = 3
        elif label == 'DC':
            class_num = 4
        elif label == 'LC':
            class_num = 5
        elif label == 'MC':
            class_num = 6
        elif label == 'PC':
            class_num = 7
            
        try:
            img_arr = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
            resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
            data.append([resized_arr, class_num])
        except Exception as e:
            print(e)
    return np.array(data)

# Import Data for all Magnfications

In [None]:
# 40X MAG

train41 = get_data_MC('./fold1/train/40X')
val41 = get_data_MC('./fold1/test/40X')

train42 = get_data_MC('./fold2/train/40X')
val42 = get_data_MC('./fold2/test/40X')

train43 = get_data_MC('./fold3/train/40X')
val43 = get_data_MC('./fold3/test/40X')

train44 = get_data_MC('./fold4/train/40X')
val44 = get_data_MC('./fold4/test/40X')

train45 = get_data_MC('./fold5/train/40X')
val45= get_data_MC('./fold5/test/40X')

train_set40 = [train41, train42, train43, train44, train45]
val_set40 = [val41, val42, val43, val44, val45]

In [None]:
# 100 MAG

train1 = get_data_MC('./fold1/train/100X')
val1 = get_data_MC('./fold1/test/100X')

train2 = get_data_MC('./fold2/train/100X')
val2 = get_data_MC('./fold2/test/100X')

train3 = get_data_MC('./fold3/train/100X')
val3 = get_data_MC('./fold3/test/100X')

train4 = get_data_MC('./fold4/train/100X')
val4 = get_data_MC('./fold4/test/100X')

train5 = get_data_MC('./fold5/train/100X')
val5= get_data_MC('./fold5/test/100X')

train_set100 = [train1, train2, train3, train4, train5]
val_set100 = [val1, val2, val3, val4, val5]

In [None]:
# 200X Mag

train21 = get_data_MC('./fold1/train/200X')
val21 = get_data_MC('./fold1/test/200X')

train22 = get_data_MC('./fold2/train/200X')
val22 = get_data_MC('./fold2/test/200X')

train23 = get_data_MC('./fold3/train/200X')
val23 = get_data_MC('./fold3/test/200X')

train24 = get_data_MC('./fold4/train/200X')
val24 = get_data_MC('./fold4/test/200X')

train25 = get_data_MC('./fold5/train/200X')
val25= get_data_MC('./fold5/test/200X')

train_set200 = [train21, train22, train23, train24, train25]
val_set200 = [val21, val22, val23, val24, val25]

In [None]:
# 400X Mag
train421 = get_data_MC('./fold1/train/400X')
val421 = get_data_MC('./fold1/test/400X')

train422 = get_data_MC('./fold2/train/400X')
val422 = get_data_MC('./fold2/test/400X')

train423 = get_data_MC('./fold3/train/400X')
val423 = get_data_MC('./fold3/test/400X')

train424 = get_data_MC('./fold4/train/400X')
val424 = get_data_MC('./fold4/test/400X')

train425 = get_data_MC('./fold5/train/400X')
val425= get_data_MC('./fold5/test/400X')

train_set400 = [train421, train422, train423, train424, train425]
val_set400 = [val421, val422, val423, val424, val425]

# SimpleCNN and SimpleCNN2 Functions
Based on: https://www.analyticsvidhya.com/blog/2020/10/create-image-classification-model-python-keras/

In [None]:
# Data Preprocessing and Augmentation

def processing(train, val):
    x_train = []
    y_train = []
    x_val = []
    y_val = []

    for feature, label in train:
        x_train.append(feature)
        y_train.append(label)

    for feature, label in val:
        x_val.append(feature)
        y_val.append(label)
 
    
    # Normalize the data
    x_train = np.array(x_train) /255
    x_val = np.array(x_val) /255
    
    print(np.shape(x_train))   
    print(np.shape(x_val))
    
    x_train.reshape(-1, img_size, img_size, 1)
    y_train = np.array(y_train)

    x_val.reshape(-1, img_size, img_size, 1)
    y_val = np.array(y_val)
    
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.2, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    datagen.fit(x_train)
    
    return x_train, y_train, x_val, y_val

In [None]:
# Define simple CNN model with 3 Convolutional Layers followed by max-pooling layers + dropout layer (to avoid overfitting)
def simpleCNN():
    model = Sequential()
    model.add(Conv2D(32,3,padding="same", activation="relu", input_shape=(224,224,3)))
    model.add(MaxPool2D())

    model.add(Conv2D(32, 3, padding="same", activation="relu"))
    model.add(MaxPool2D())

    model.add(Conv2D(64, 3, padding="same", activation="relu"))
    model.add(MaxPool2D())
    
    model.add(Dropout(0.4))
    model.add(Flatten())
    
    model.add(Dense(128,activation="relu"))
    model.add(Dense(8, activation="softmax"))

    model.summary()
    
    return model

In [None]:
# Define simple CNN model with 3 Convolutional Layers followed by max-pooling layers + dropout layer (to avoid overfitting)
def simpleCNN_2():
    model = Sequential()
    model.add(Conv2D(64,3,padding="same", activation="relu", input_shape=(224,224,3)))
    model.add(MaxPool2D())
    
    model.add(Conv2D(128, 3,padding="same", activation="relu"))
    model.add(MaxPool2D())

    model.add(Conv2D(256, 3, padding="same", activation="relu"))
    model.add(MaxPool2D())
    
    model.add(Dropout(0.4))
    model.add(Flatten())
    model.add(Dense(128,activation="relu"))
    model.add(Dense(8, activation="softmax"))

    model.summary()
    
    return model

In [None]:
def run_CNN(x_train, y_train, x_val, y_val, model, epochs):
    # Using Adam Optimzer and CategoricalCrossentropy 
    opt = tf.keras.optimizers.Adam(learning_rate=0.000001)
    ls = tf.keras.losses.CategoricalCrossentropy()
    model.compile(optimizer = opt , loss = ls , metrics = ['accuracy'])
    
    history = model.fit(x_train, y_train, epochs=epochs, validation_data = (x_val, y_val))
    
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs_range = range(epochs)

    plt.figure(figsize=(15, 15))
    plt.subplot(2, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()
    
    predict_x=model.predict(x_val) 
    predictions=np.argmax(predict_x,axis=1)
    predictions = predictions.reshape(1,-1)[0]
    
    rounded_labels=np.argmax(y_val, axis=1)

    print(classification_report(rounded_labels, predictions))
    
    cm = confusion_matrix(rounded_labels, predictions)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot()
    plt.show()

## Run SimpleCNN on 8-class Dataset

In [None]:
# 200X

for i in range(len(train_set200)):
    print("THIS IS FOLD", i+1)
    x_train, y_train, x_val, y_val = processing(train_set200[i], train_set200[i])
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=8)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes=8)
    model = simpleCNN()
    run_CNN(x_train, y_train, x_val, y_val, model, 100)  

## Run SimpleCNN2 on 8-class Dataset

In [None]:
# 200X CNN2 (Folds 1-2)

for i in range(len(train_set200)):
    print("THIS IS FOLD", i+1)
    x_train, y_train, x_val, y_val = processing(train_set200[i], val_set200[i])
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=8)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes=8)
    model = simpleCNN_2()
    run_CNN(x_train, y_train, x_val, y_val, model, 100)  

In [None]:
train_set200_2 = [train23, train24, train25]
val_set200_2 = [val23, val24, val25]

# 200X CNN2 (folds 3-5 after force quit)

for i in range(len(train_set200_2)):
    print("THIS IS FOLD", i+3)
    x_train, y_train, x_val, y_val = processing(train_set200_2[i], train_set200_2[i])
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=8)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes=8)
    model = simpleCNN_2()
    run_CNN(x_train, y_train, x_val, y_val, model, 100)  

# SimpleCNN2 but with Split (Benign and Malignant) Datasets (4-Class)

In [None]:
# Explore how many of each type are in training data

x = []

for i in train_1B:
    if i[1] == 0:
        x.append("A")
    elif i[1] == 1:
        x.append("F")
    elif i[1] == 2:
        x.append("PT")
    elif i[1] == 3:
        x.append("TA")
#         elif i[1] == 4:
#             x.append("DC")
#         elif i[1] == 5:
#             x.append("LC")
#         elif i[1] == 6:
#             x.append("MC")
#         elif i[1] == 7:
#             x.append("PC")

In [None]:
sns.set_style('darkgrid')
sns.countplot(x)

In [None]:
# Test 200X CNN2-- Benign 

epochs = 40
for i in range(len(train_set200B)):
    print("THIS IS FOLD", i+1)
    x_train, y_train, x_val, y_val = processing(train_set200B[i], val_set200B[i])
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=4)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes=4)
    model = simpleCNN_2()
    run_CNN(x_train, y_train, x_val, y_val, model, epochs)  

In [None]:
# 200X CNN2-- Malignant 

epochs = 40

for i in range(len(train_set200M)):
    print("THIS IS FOLD", i+1)
    x_train, y_train, x_val, y_val = processing(train_set200M[i], val_set200M[i])
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=4)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes=4)
    model = simpleCNN_2()
    run_CNN(x_train, y_train, x_val, y_val, model, epochs)  

# DenseNet
Model adapted from: https://www.pluralsight.com/guides/introduction-to-densenet-with-tensorflow

In [None]:
def DenseNetModel(x_train, y_train, x_val, y_val, epochs):
    model_d=DenseNet121(weights='imagenet',include_top=False, input_shape=(224, 224, 3)) 

    x=model_d.output

    x= GlobalAveragePooling2D()(x)
    x= BatchNormalization()(x)
    x= Dropout(0.5)(x)
    x= Dense(1024,activation='relu')(x) 
    x= Dense(512,activation='relu')(x) 
    x= BatchNormalization()(x)
    x= Dropout(0.5)(x)
    
    #switch between 4 and 8 dependining on # of classes
    preds=Dense(8,activation='softmax')(x)
    
    model=Model(model_d.input, preds)
    
    for layer in model.layers[:-8]:
        layer.trainable=False
    
    for layer in model.layers[-8:]:
        layer.trainable=True
    
#     model.summary()
    
    model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])
    
    anne = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=5, verbose=1, min_lr=1e-3)
    checkpoint = ModelCheckpoint('model.h5', verbose=1, save_best_only=True)
    
    # Fits-the-model
    history = model.fit(x_train, y_train,
                   epochs=epochs,
                   verbose=1,
                   callbacks=[anne, checkpoint],
                   validation_data=(x_val, y_val))
    
    return history

In [None]:
# 4-Class Summary
DenseNetModel()

In [None]:
# 8-Class Summary
DenseNetModel()

In [None]:
def runDenseNet(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs_range = range(epochs)

    plt.figure(figsize=(15, 15))
    plt.subplot(2, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()
    
    predict_x=model.predict(x_val) 
    predictions=np.argmax(predict_x,axis=1)
    predictions = predictions.reshape(1,-1)[0]
    
    rounded_labels=np.argmax(y_val, axis=1)

    print(classification_report(rounded_labels, predictions))
    
    cm = confusion_matrix(rounded_labels, predictions)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot()
    plt.show()

In [None]:
## Benign only (4-class) -- DenseNet

epochs = 30

for i in range(len(train_set200B)):
    print("THIS IS FOLD", i+1)
    x_train, y_train, x_val, y_val = processing(train_set200B[i], val_set200B[i])
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=4)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes=4)
    history = DenseNetModel(x_train, y_train, x_val, y_val, epochs) 
    runDenseNet(history)

In [None]:
# Malignant Only (4-class)-- DenseNet

epochs = 20
for i in range(len(train_set200M)):
    print("THIS IS FOLD", i+1)
    x_train, y_train, x_val, y_val = processing(train_set200M[i], val_set200M[i])
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=4)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes=4)
    history = DenseNetModel(x_train, y_train, x_val, y_val, epochs) 
    runDenseNet(history)

In [None]:
## ALL 8 Categories

epochs = 30
for i in range(len(train_set200)):
    print("THIS IS FOLD", i+1)
    x_train, y_train, x_val, y_val = processing(train_set200[i], val_set200[i])
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=8)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes=8)
    history = DenseNetModel(x_train, y_train, x_val, y_val, epochs) 
    runDenseNet(history)

# Different Multiclass Data Splitting (4-class and Even # of Images)

In [None]:
## Split into two 4-class datasets
img_size = 224
def get_data_MBC(data_dir):
    data_B = [] 
    data_M = []
    path = data_dir
    for img in os.listdir(path):
        name1 = img.split("-")
        name2 = name1[0].split("_")
        label = name2[2]
        classif = name2[1]

        if classif == 'B':
            if label == 'A':
                class_num = 0
            elif label == 'F':
                class_num = 1
            elif label == 'PT':
                class_num = 2
            elif label == 'TA':
                class_num = 3
            try:
                img_arr_B = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
                resized_arr_B = cv2.resize(img_arr_B, (img_size, img_size)) # Reshaping images to preferred size
                data_B.append([resized_arr_B, class_num])
            except Exception as e:
                print(e)
            
        if classif == 'M':
            if label == 'DC':
                class_num = 0
            elif label == 'LC':
                class_num = 1
            elif label == 'MC':
                class_num = 2
            elif label == 'PC':
                class_num = 3
            
            try:
                img_arr_M = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
                resized_arr_M = cv2.resize(img_arr_M, (img_size, img_size)) # Reshaping images to preferred size
                data_M.append([resized_arr_M, class_num])
            except Exception as e:
                print(e)
                
    return np.array(data_B), np.array(data_M)

In [None]:
# 200X Mag 4-class splits

train_1B, train_1M = get_data_MBC('./fold1/train/200X')
val_1B, val_1M = get_data_MBC('./fold1/test/200X')

train_2B, train_2M = get_data_MBC('./fold2/train/200X')
val_2B,val_2M = get_data_MBC('./fold2/test/200X')

train_3B, train_3M = get_data_MBC('./fold3/train/200X')
val_3B, val_3M = get_data_MBC('./fold3/test/200X')

train_4B, train_4M = get_data_MBC('./fold4/train/200X')
val_4B, val_4M = get_data_MBC('./fold4/test/200X')

train_5B, train_5M = get_data_MBC('./fold5/train/200X')
val_5B, val_5M = get_data_MBC('./fold5/test/200X')

train_set200B = [train_1B, train_2B, train_3B, train_4B, train_5B]
val_set200B = [val_1B, val_2B, val_3B, val_4B, val_5B]

train_set200M = [train_1M, train_2M, train_3M, train_4M, train_5M]
val_set200M = [val_1M, val_2M, val_3M, val_4M, val_5M]

In [None]:
# Create a dataset with only 45 images of each type 

img_size = 224
def get_data_even(data_dir):
    data_0 = [] 
    data_1 = []
    data_2 = []
    data_3 = []
    data_4 = []
    data_5 = []
    data_6 = []
    data_7 = []
    
    val_0 = [] 
    val_1 = []
    val_2 = []
    val_3 = []
    val_4 = []
    val_5 = []
    val_6 = []
    val_7 = []

    path = data_dir
    for img in os.listdir(path):
        name1 = img.split("-")
        name2 = name1[0].split("_")
        label = name2[2]
        classif = name2[1]

        if label == 'A':
            class_num = 0
            if len(data_0) < 45:
                try:
                    img_arr = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
                    resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
                    data_0.append(resized_arr)
                    val_0.append(class_num)
                except Exception as e:
                    print(e)
        elif label == 'F':
            class_num = 1
            if len(data_1) < 45:
                try:
                    img_arr = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
                    resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
                    data_1.append(resized_arr)
                    val_1.append(class_num)
                except Exception as e:
                    print(e)
        elif label == 'PT':
            class_num = 2
            if len(data_2) < 45:
                try:
                    img_arr = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
                    resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
                    data_2.append(resized_arr)
                    val_2.append(class_num)
                except Exception as e:
                    print(e)  
        elif label == 'TA':
            class_num = 3
            if len(data_3) < 45:
                try:
                    img_arr = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
                    resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
                    data_3.append(resized_arr)
                    val_3.append(class_num)
                except Exception as e:
                    print(e)  
        elif label == 'DC':
            class_num = 4
            if len(data_4) < 45:
                try:
                    img_arr = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
                    resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
                    data_4.append(resized_arr)
                    val_4.append(class_num)
                except Exception as e:
                    print(e)  
        elif label == 'LC':
            class_num = 5
            if len(data_5) < 45:
                try:
                    img_arr = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
                    resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
                    data_5.append(resized_arr)
                    val_5.append(class_num)
                except Exception as e:
                    print(e)  
        elif label == 'MC':
            class_num = 6
            if len(data_6) < 45:
                try:
                    img_arr = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
                    resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
                    data_6.append(resized_arr)
                    val_6.append(class_num)
                except Exception as e:
                    print(e)  
        elif label == 'PC':
            class_num = 7
            if len(data_7) < 45:
                try:
                    img_arr = cv2.imread(os.path.join(path, img))[...,::-1] #convert BGR to RGB format
                    resized_arr = cv2.resize(img_arr, (img_size, img_size)) # Reshaping images to preferred size
                    data_7.append(resized_arr)
                    val_7.append(class_num)
                except Exception as e:
                    print(e)  
    
    x = np.concatenate((np.array(data_0), np.array(data_1),np.array(data_2),np.array(data_3),
                           np.array(data_4),np.array(data_5),np.array(data_6),np.array(data_7)))
    y = np.concatenate((np.array(val_0), np.array(val_1),np.array(val_2),np.array(val_3),
                           np.array(val_4),np.array(val_5),np.array(val_6),np.array(val_7)))
    
    return x, y

In [None]:
train_1x, train_1y = get_data_even('./fold1/train/200X')
val_1x, val_1y = get_data_even('./fold1/test/200X')

train_2x, train_2y = get_data_even('./fold2/train/200X')
val_2x,val_2y = get_data_even('./fold2/test/200X')

train_3x, train_3y = get_data_even('./fold3/train/200X')
val_3x, val_3y = get_data_even('./fold3/test/200X')

train_4x, train_4y = get_data_even('./fold4/train/200X')
val_4x, val_4y = get_data_even('./fold4/test/200X')

train_5x, train_5y = get_data_even('./fold5/train/200X')
val_5x, val_5y = get_data_even('./fold5/test/200X')

train_set200x = [train_1x, train_2x, train_3x, train_4x, train_5x]
val_set200x = [val_1x, val_2x, val_3x, val_4x, val_5x]

train_set200y = [train_1y, train_2y, train_3y, train_4y, train_5y]
val_set200y = [val_1y, val_2y, val_3y, val_4y, val_5y]

In [None]:
# Normalize train and test data

def proc_normalize(x_train, y_train, x_val, y_val):   
    # Normalize the data

    x_train = np.array(x_train) /255
    x_val = np.array(x_val) /255
    
    x_train.reshape(-1, img_size, img_size, 1)
    y_train = np.array(y_train)

    x_val.reshape(-1, img_size, img_size, 1)
    y_val = np.array(y_val)
    
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.2, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    datagen.fit(x_train)
    
    return x_train, y_train, x_val, y_val

In [None]:
# Test 200X SimpleCNN2 on even dataset. 

epochs = 5

for i in range(len(train_set200x)):
    print("THIS IS FOLD", i+1)
    x_train, y_train, x_val, y_val = proc_normalize(train_set200x[i], train_set200y[i], val_set200x[i], val_set200y[i])
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=8)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes=8)
    model = simpleCNN_2()
    run_CNN(x_train, y_train, x_val, y_val, model, epochs)  

In [None]:
# Run 200X DenseNet on even dataset

epochs = 30
for i in range(len(train_set200)):
    print("THIS IS FOLD", i+1)
    x_train, y_train, x_val, y_val = proc_normalize(train_set200x[i], train_set200y[i], val_set200x[i], val_set200y[i])
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=8)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes=8)
    history = DenseNetModel(x_train, y_train, x_val, y_val, epochs) 
    runDenseNet(history)