## Imports

In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold

In [5]:
class DataPreparator():
    def __init__(self, X, y):
        self.X_train, self.X_test = X
        self.y_train, self.y_test = y

    def draw_images(self, num_of_images):
        '''
        This function draws a random images from the data, by specifying how many images to draw
        '''
        random_indexes = [np.random.randint(0, len(self.X_train)) for i in range(num_of_images)]
        fig , axis = plt.subplots(5,10)
        fig.set_figwidth(15)
        fig.set_figheight(15)
        i , j = 0 , 0
        for idx in random_indexes:
            image = self.X_train[idx]
            axis[i,j].imshow(image, cmap='binary')
            axis[i,j].axis('off')
            j += 1
            if j % 10 == 0:
                i += 1
                j = 0
    
    def normalize_data(self, value):
        '''
        This function normalize the data by dividing it by the value
        '''
        self.X_train = self.X_train / value
        self.X_test = self.X_test / value
        return self.X_train, self.X_test
    
    def reshape_data(self, axis):
        self.X_train = np.expand_dims(self.X_train, axis=axis)
        self.X_test = np.expand_dims(self.X_test, axis=axis)
        return self.X_train, self.X_test
    
    def encode_labels(self):
        self.y_train = pd.get_dummies(self.y_train)
        self.y_test = pd.get_dummies(self.y_test)
        return self.y_train, self.y_test

In [6]:
class TrainLogs():
    def __init__(self, model):
        self.model = model

    # Plot Utility
    def train_curves(self, history, special_title):
        '''
        This function draws accuracy and loss curves for each epoch between train and validation data
        '''
        title_loss = 'Model loss per epoch ' + special_title
        title_accuracy = 'Model accuracy per epoch ' + special_title
        fig , axis = plt.subplots(nrows=1, ncols=2)
        # dimensions of figure
        fig.set_figheight(6)
        fig.set_figwidth(14)
        # loss
        loss = history.history['loss']
        val_loss = history.history['val_loss']
        # accuracy
        accuracy = history.history['accuracy']
        val_accuracy = history.history['val_accuracy']
        epoch = np.arange(150)
        # loss curve
        axis[0].plot(loss,label='Train')
        axis[0].plot(val_loss,label='Validation')
        axis[0].set_xlabel('epoch')
        axis[0].set_ylabel('loss')
        axis[0].set_title(title_loss)
        axis[0].legend()
        # accuracy curve
        axis[1].plot(accuracy, label='Train')
        axis[1].plot(val_accuracy, label='Validation')
        axis[1].set_xlabel('epoch')
        axis[1].set_ylabel('accuracy')
        axis[1].set_title(title_accuracy)
        axis[1].legend()
    
    # Evaluation Utilities
    def k_fold_cross_validation(self, X, y, k, batch_size=256, compiled_model = None):
        '''
        This function evaluates the model by performing k fold cross validation
        '''
        # instantiate k_fold 
        k_fold = StratifiedKFold(k, shuffle=True, random_state=42)
        # output lists that will hold accuracy and loss for both train and validation
        # for each fold
        history_list = []
        accuracy_list = []
        val_accuracy_list = []
        val_loss_list = []
        loss_list = []
        # loop through the 5 folds
        for train_idx , val_idx in k_fold.split(X,y):
            # determine from the train data which fold will be used for validation, and the other will be for training
            X_train , y_train = X[train_idx], y[train_idx]
            X_val , y_val = X[val_idx], y[val_idx]
            # encode the labels
            y_train , y_val = pd.get_dummies(y_train) , pd.get_dummies(y_val)
            if compiled_model is None:
                model = self.model
            else:
                model = compiled_model
            # training...
            history = model.fit(x= X_train, y= y_train, epochs=25,  batch_size= batch_size, validation_data= (X_val, y_val), verbose= 0)
            # accuracy and loss after the completion of training
            accuracy = history.history['accuracy']
            val_accuracy = history.history['val_accuracy']
            loss = history.history['loss']
            val_loss = history.history['val_loss']
            history_list.append(history)
            accuracy_list.append(accuracy)
            loss_list.append(loss)
            val_accuracy_list.append(val_accuracy)
            val_loss_list.append(val_loss)
        # get the mean of results for all folds
        mean_accuracy = np.mean(accuracy_list)
        mean_val_accuracy = np.mean(val_accuracy_list)
        mean_loss = np.mean(loss_list)
        mean_val_loss = np.mean(val_loss_list)
        return history_list, mean_accuracy, mean_val_accuracy, mean_loss,  mean_val_loss
    
    def cross_validation_report(self, history_list, mean_accuracy, mean_val_accuracy, mean_loss,  mean_val_loss, k, title):
        print('The mean accuracy of the model after', k, 'fold cross validation is:', mean_accuracy)
        print('The mean validation accuracy of the model after', k, 'fold cross validation is:', mean_val_accuracy)
        print('The mean loss of the model after', k, 'fold cross validation is:', mean_loss)
        print('The mean validation loss of the model after', k, 'fold cross validation is:', mean_val_loss)
        for i, history in enumerate(history_list,start=1):
            self.train_curves(history, 'of fold-' + str(i) + title)