## Importing libraries

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pickle
import string

from keras import callbacks
from tensorflow.keras import optimizers
from keras.models import load_model
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from tensorflow.keras.utils import load_img
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

## Metaparameters

In [7]:
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
BATCH_SIZE = 128
SEED = 99
EPOCHS = 10

dataset_dir = os.path.abspath("..\\Datasets\\TrainingDatasets")
datagen = ImageDataGenerator(rescale=1.0/255)
data_gen_args = dict(directory=dataset_dir, x_col='images', y_col='labels', target_size=IMAGE_SIZE, class_mode='categorical', batch_size=BATCH_SIZE, seed = SEED)

## Choose a model training records

In [8]:
with open('TrainingStatistics\\BaselineTraining.pkl', 'rb') as f:
#with open('TrainingStatistics\\CbamTraining.pkl', 'rb') as f:
#with open('TrainingStatistics\\SeTraining.pkl', 'rb') as f:
    history = pickle.load(f)

FileNotFoundError: [Errno 2] No such file or directory: 'TrainingStatistics\\BaselineTraining.pkl'

## Loading dataset

In [5]:
def get_labels_images(path):
    labels = []
    images = []
    directories = []
    
    for directory in os.listdir(path):
        for Label in os.listdir(path + '/' + directory):
            for Image in os.listdir(path + '/' + directory + '/' + Label):
                directories.append(directory)
                labels.append(Label)
                images.append(directory + '/' + Label + '/' + Image)
                
    return pd.DataFrame({'directories':directories, 'labels':labels, 'images':images})

In [6]:
df = get_labels_images(dataset_dir)

NameError: name 'dataset_dir' is not defined

In [None]:
def split_data(data):
    train_df, test_df = train_test_split(data, test_size=0.10, random_state=SEED)
    train_df, val_df = train_test_split(train_df, test_size=0.15, random_state=SEED)

    train_df = train_df.reset_index(drop=True)
    val_df = val_df.reset_index(drop=True)
    test_df = test_df.reset_index(drop=True)

    print('----------------------------------------------------------------')
    print("The Number of Samples per Split")
    print('----------------------------------------------------------------')
    print('Number of   training samples : {}'.format(train_df.shape[0]))
    print('Number of validation samples : {}'.format(val_df.shape[0]))
    print('Number of       test samples : {}'.format(test_df.shape[0]))
    print('----------------------------------------------------------------')

    return train_df, val_df, test_df

In [None]:
def define_image_generators(train_df, val_df, test_df):
    train_generator = datagen.flow_from_dataframe(train_df, **data_gen_args)
    val_generator = datagen.flow_from_dataframe(val_df, **data_gen_args)
    test_generator = datagen.flow_from_dataframe(test_df, **data_gen_args, shuffle = False)
    
    return train_generator, val_generator, test_generator

In [None]:
temp_df = df.sort_values(by=['labels'])
alphabet_labels = np.array(list(string.ascii_lowercase))
print_labels = np.array(temp_df['labels'].unique())
set_diff = np.setdiff1d(alphabet_labels, print_labels)

In [None]:
train_df, val_df, test_df = split_data(df)

In [None]:
train_generator, val_generator, test_generator = define_image_generators(train_df, val_df, test_df)

## Model Evaluation

In [None]:
def plot_accuracy_loss(history):
    acc = history['accuracy']
    val_acc = history['val_accuracy']

    loss = history['loss']
    val_loss = history['val_loss']

    epochs = range(1, len(acc) + 1)

    plt.figure(figsize = (10, 7))

    plt.subplot(1,2,1)
    plt.plot(epochs, acc, c = 'b', label = 'Training Accuracy')
    plt.plot(epochs, val_acc, c = 'g', label = 'Validation Accuracy')
    plt.title('Training vs. Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(epochs, loss, c = 'b', label = 'Training Loss')
    plt.plot(epochs, val_loss, c = 'g', label = 'Validation Loss')
    plt.title('Training vs. Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

In [None]:
def show_accuracy_loss(model, test_generator):
    print('---------------Evaluation Against Test Data---------------------')
    eval_loss, eval_acc = model.evaluate(test_generator)
    print('Evaluation Loss: {:.4f}, Evaluation Accuracy: {:.2f}'.format(eval_loss, eval_acc * 100))
    print('----------------------------------------------------------------')

In [None]:
def show_predictions(model, test_generator, directory):
    preds = model.predict(test_generator)
    y_test = test_generator.classes
    y_pred_binary = preds.argmax(axis=1)

    print('---------------Predictions against Test Data---------------------')
    print("The Accuracy of the model with the given test sample is : ", accuracy_score(y_test, y_pred_binary)*100, "%")
    print('----------------------------------------------------------------')
    print('')

    print('-----------------Classification Report--------------------------')
    print(classification_report(y_test,y_pred_binary))
    print('----------------------------------------------------------------')
    print('')

    print('-----------------Confusion Matrix-------------------------------')
    cm = confusion_matrix(y_test,y_pred_binary)
    plt.subplots(figsize=(18, 6))
    sns.heatmap(cm/np.sum(cm), annot= True, fmt='.2%', cmap='Blues')
    plt.show()
    print('')

    print('----------------Actual vs Predicted Figures---------------------')
    plt.figure(figsize = (25,20))
    for i in range(20):
      plt.subplot(4,5,i+1)
      image = load_img(directory+'/'+test_generator.filenames[i],target_size=(64,64))
      plt.imshow(image)
      plt.title('Actual: {} - Predicted: {}'.format(print_labels[y_test[i]], print_labels[y_pred_binary[i]]))
    plt.show()


In [None]:
pipeline = Pipeline(steps=[('plot_accuracy_loss',plot_accuracy_loss(history)),
                                  ('show_accuracy_loss',show_accuracy_loss(model, test_generator)),
                                  ('show_predictions',show_predictions(model, test_generator, dataset_dir))])

In [None]:
pipeline = Pipeline(steps=[('show_accuracy_loss',show_accuracy_loss(model, test_generator)),
                                  ('show_predictions',show_predictions(model, test_generator, dataset_dir))])

## Testing model against Custom Test Data

In [None]:
lbl_binarizer = LabelBinarizer()
labels = lbl_binarizer.fit_transform(print_labels)

with open("lbl_binarizer.pkl", 'wb') as file:
    pickle.dump(lbl_binarizer, file)

In [None]:
CUSTOM_TEST_PATH = "custom_dataset"

def prepare_custom_images(filepath):
    images = []
    labels = []
    for file in os.listdir(filepath):
        images.append(file)
        labels.append(os.path.splitext(file)[0])

    df = pd.DataFrame({'labels':labels, 'images':images})
    return df

custom_test_df = prepare_custom_images(CUSTOM_TEST_PATH)

In [None]:
data_gen_args = dict(directory=CUSTOM_TEST_PATH,
                     x_col='images',
                     y_col='labels',
                     target_size=IMAGE_SIZE,
                     class_mode='categorical',
                     batch_size=BATCH_SIZE,
                     seed = SEED)

custom_test_generator = datagen.flow_from_dataframe(custom_test_df, **data_gen_args, shuffle = False, validate_filenames=False)

In [None]:
custom_preds = model.predict(custom_test_generator)

In [None]:
y_pred_binary = custom_preds.argmax(axis=1)

plt.figure(figsize = (25,20))
for i in range(len(custom_preds)):
  plt.subplot(4,5, i + 1)
  image = load_img(CUSTOM_TEST_PATH+'/'+custom_test_generator.filenames[i],target_size=(64,64))
  plt.imshow(image)
  plt.title('Actual: {} - Predicted: {}'.format(os.path.splitext(custom_test_generator.filenames[i])[0], print_labels[y_pred_binary[i]]))
plt.show()