In [None]:
# Import necessary libraries and modules

import pandas as pd
import matplotlib.pyplot as plt
import json
import numpy as np
import seaborn as sns
import dataframe_image as dfi
from sklearn.metrics import confusion_matrix

In [None]:
# Define font to be used for graphics and increase dataframe rows visible in window

plt.rcParams["font.family"] = "Times New Roman"
pd.set_option('display.max_rows', 100)

In [None]:
# Function to read metrics from json files

def json_reader(file):
    with open(file) as json_data:
        json_dict = json.load(json_data)
    return json_dict

In [None]:
# Create dataframe of metrics for each model

benchmark_training_df = pd.DataFrame.from_dict(json_reader('benchmark.json'))
family_training_df = pd.DataFrame.from_dict(json_reader('family.json'))
canidae_training_df = pd.DataFrame.from_dict(json_reader('canidae.json'))
felinae_training_df = pd.DataFrame.from_dict(json_reader('felinae.json'))
sciuridae_training_df = pd.DataFrame.from_dict(json_reader('sciuridae.json'))

In [None]:
# Function to clean up dataframe

def dataframe_tidy(df):
    df.index = range(1,51)
    df.index.name='Epoch'
    df.columns = ["Training Loss", "Training Accuracy", "Top 5 Training Accuracy", 
                                 "Validation Loss", "Validation Accuracy", "Validation Top 5 Accuracy"]
    return df

In [None]:
benchmark_training_df = dataframe_tidy(benchmark_training_df)
family_training_df = dataframe_tidy(family_training_df)
canidae_training_df = dataframe_tidy(canidae_training_df)
felinae_training_df = dataframe_tidy(felinae_training_df)
sciuridae_training_df = dataframe_tidy(sciuridae_training_df)

In [None]:
# Save dataframes as images for use in report

dfi.export(benchmark_training_df, 'results_images/benchmark_training_df.png')
dfi.export(family_training_df, 'results_images/family_training_df.png')
dfi.export(canidae_training_df, 'results_images/canidae_training_df.png')
dfi.export(felinae_training_df, 'results_images/felinae_training_df.png')
dfi.export(sciuridae_training_df, 'results_images/sciuridae_training_df.png')

In [None]:
# Get training/validation metrics for best epoch for each model

benchmark_best_epoch = pd.DataFrame(benchmark_training_df.loc[48,]).transpose()
family_best_epoch = pd.DataFrame(family_training_df.loc[45,]).transpose()
canidae_best_epoch = pd.DataFrame(canidae_training_df.loc[43,]).transpose()
felinae_best_epoch = pd.DataFrame(felinae_training_df.loc[40,]).transpose()
sciuridae_best_epoch = pd.DataFrame(sciuridae_training_df.loc[35,]).transpose()

In [None]:
# Combine into single dataframe for visualisation

best_epoch_df = pd.concat([benchmark_best_epoch, family_best_epoch,
                          canidae_best_epoch, felinae_best_epoch,
                          sciuridae_best_epoch])

In [None]:
# Tidy and save dataframe

best_epoch_df.reset_index(inplace=True)
best_epoch_df.rename(columns={'index':'Epoch'}, inplace=True)
best_epoch_df.index = ['Benchmark', 'Family-Level', 'Canidae',
                       'Felinae', 'Sciuridae']
best_epoch_df.index.name = 'Classifier'
best_epoch_df = best_epoch_df.round(2)
dfi.export(best_epoch_df, 'results_images/best_epoch_df.png')

In [None]:
# Function to plot training/validation progress

def train_val_plot(df, best_epoch, acc_text_y, acc_arrow_y, val_text_y, val_arrow_y, filename):
    plt.style.use("ggplot")
    fig = plt.figure(figsize=(15.5,5.5))
    N = np.arange(1, len(df)+1)
    ax1 = fig.add_subplot(1,2,1)
    ax1.plot(N, df["Training Accuracy"], label="Training Accuracy")
    ax1.plot(N, df["Validation Accuracy"], label="Validation Accuracy")
    ax1.set_title("Training and Validation Accuracy", pad=20, size=18)
    ax1.set_xlabel("Epoch", size=14)
    ax1.set_ylabel("Accuracy", size=14)
    ax1.axvline(x=best_epoch, color='k', linestyle = 'dashed', linewidth = 0.5)
    ax1.annotate('Epoch used for evaluation', xy=(best_epoch-23, acc_text_y), size=12)
    ax1.arrow(x=best_epoch-13, y=acc_arrow_y, dx=12, dy=0, color='k', head_width=0.02,
              width=0.005, head_length=0.4, alpha=0.5) 
    ax1.legend(fontsize=12)
    ax2 = fig.add_subplot(1,2,2)
    ax2.plot(N, benchmark_training_df["Training Loss"], label="Training Loss")
    ax2.plot(N, benchmark_training_df["Validation Loss"], label="Validation Loss")
    ax2.set_title("Training and Validation Loss", pad=20, size=18)
    ax2.set_xlabel("Epoch", size=14)
    ax2.set_ylabel("Loss", size=14)
    ax2.axvline(x=best_epoch, color='k', linestyle = 'dashed', linewidth = 0.5)
    ax2.annotate('Epoch used for evaluation', xy=(best_epoch-23, val_text_y), size=12)
    ax2.arrow(x=best_epoch-13, y=val_arrow_y, dx=12, dy=0, color='k', head_width=0.1,
              width=0.025, head_length=0.4, alpha=0.5) 
    ax2.legend(fontsize=12)
    plt.subplots_adjust(left=None, bottom=None, right=None, top = None, wspace=None, hspace = 0.3)
    plt.savefig(filename)
    plt.show()

In [None]:
# Plot and save as image file training/validation progress

train_val_plot(benchmark_training_df, 48, 0.55, 0.53, 2.25, 2.15, "results_images/benchmark_plot.png")

In [None]:
train_val_plot(family_training_df, 45, 0.55, 0.53, 2.25, 2.15, "results_images/family_plot.png")

In [None]:
train_val_plot(canidae_training_df, 43, 0.55, 0.53, 2.25, 2.15, "results_images/canidae_plot.png")

In [None]:
train_val_plot(felinae_training_df, 40, 0.55, 0.53, 2.25, 2.15, "results_images/felinae_plot.png")

In [None]:
train_val_plot(sciuridae_training_df, 35, 0.925, 0.915, 2.25, 2.15, "results_images/sciuridae_plot.png")

In [None]:
# Function to create dataframe showing test performance by model

def test_scores(file_list):
    df = pd.read_csv(file_list[0])
    for file in file_list[1:]:
        test_scores_df = pd.concat([df,pd.read_csv(file)])
        df = test_scores_df
    test_scores_df.drop(columns='Unnamed: 0', inplace=True)
    test_scores_df.index = ['Benchmark', 'Family-Level', 'Canidae', 
                            'Felinae', 'Sciuridae']
    test_scores_df.index.name = 'Classifier'
    test_scores_df = test_scores_df.round(2)
    return test_scores_df

In [None]:
test_scores_df = test_scores(["./Results/output_benchmark_score.csv", "./Results/family_data_output_score.csv",
                   "./Results/canidae_output_canidae_score.csv","./Results/felinae_output_felinae_score.csv",
                   "./Results/sciuridae_output_sciuridae_score.csv"])
dfi.export(test_scores_df, 'results_images/test_scores_df.png')

In [None]:
#Function to plot test performance

def test_plot(df, filename):
    df_melted = df.drop(columns='Loss')
    df_melted = df_melted.reset_index()
    df_melted = pd.melt(df_melted, id_vars='Classifier')
    df_melted.columns = ['Classifier', 'Metric', 'Accuracy']
    fig = plt.figure(figsize=(12,7))
    ax = sns.barplot(y='Classifier', x='Accuracy', hue='Metric', data=df_melted, orient='h', palette='deep')
    ax.set_title('Test Accuracy', pad=20, size=18)
    ax.set_xlim(0, 1.3)
    ax.set_xlabel('Accuracy', size=14, weight='bold')
    ax.set_ylabel('Classifier', size=14, weight='bold')
    for container in ax.containers:
        ax.bar_label(container, padding=5, fmt='%.2f', size=11)
    ax.legend(fontsize=12)
    plt.savefig(filename)
    plt.show()

In [None]:
test_plot(test_scores_df, "results_images/test_scores_plot.png")

In [None]:
# Function to create two dataframes: (1) precision, recall and F1 score by species (2) weighted average
# across all species

def test_evaluation(file, index_label, name):
    df = pd.read_csv(file)
    df.columns=[index_label, "Precision", "Recall", "F1-Score", "No. Images"]
    df.set_index(index_label, inplace=True)
    df = df.round(2)
    df2 = df.tail(1)
    df2.index=[name]
    df2.index.name = 'Classifier'
    df.drop(df.tail(3).index,inplace = True)
    return df, df2

In [None]:
benchmark_test_evaluation_df, benchmark_avg = test_evaluation("./Results/output_benchmark_evaluation.csv", 
                                                              'Species', 'Benchmark')
dfi.export(benchmark_test_evaluation_df, 'results_images/benchmark_test_evaluation_df.png')
family_test_evaluation_df, family_avg = test_evaluation("./Results/family_data_output_evaluation.csv",
                                                        'Family', 'Family')
dfi.export(family_test_evaluation_df, 'results_images/family_test_evaluation_df.png')
canidae_test_evaluation_df, canidae_avg = test_evaluation("./Results/canidae_output_canidae_evaluation.csv",
                                                          'Species', 'Canidae')
dfi.export(canidae_test_evaluation_df, 'results_images/canidae_test_evaluation_df.png')
felinae_test_evaluation_df, felinae_avg = test_evaluation("./Results/felinae_output_felinae_evaluation.csv",
                                                          'Species', 'Felinae')
dfi.export(felinae_test_evaluation_df, 'results_images/felinae_test_evaluation_df.png')
sciuridae_test_evaluation_df, sciuridae_avg = test_evaluation("./Results/sciuridae_output_sciuridae_evaluation.csv",
                                                              'Species', 'Sciuridae')
dfi.export(sciuridae_test_evaluation_df, 'results_images/sciuridae_test_evaluation_df.png')
weighted_average_df = pd.concat([benchmark_avg, family_avg,
                               canidae_avg, felinae_avg, sciuridae_avg])
dfi.export(weighted_average_df, 'results_images/weighted_average_df.png')

In [None]:
# Function to plot weighted average metrics

def weighted_average_plot(df, filename):
    df_melted = df.drop(columns='No. Images')
    df_melted = df_melted.reset_index()
    df_melted = pd.melt(df_melted, id_vars='Classifier')
    df_melted.columns = ['Classifier', 'Metric', 'Value']
    fig = plt.figure(figsize=(12,7))
    ax = sns.barplot(y='Classifier', x='Value', hue='Metric', data=df_melted, orient='h', palette='deep')
    ax.set_title('Test Evaluation', pad=20, size=18)
    ax.set_xlim(0, 1.2)
    ax.set_xlabel('Value', size=14, weight='bold')
    ax.set_ylabel('Classifier', size=14, weight='bold')
    for container in ax.containers:
        ax.bar_label(container, padding=5, fmt='%.2f', size=11)
    ax.legend(fontsize=12)
    plt.savefig(filename)
    plt.show()

In [None]:
weighted_average_plot(weighted_average_df, "results_images/weighted_average_plot.png")

In [None]:
# Function to create mappings between numeric labels and equivalent species name

def create_mappings(df):
    mapping={}
    for i in range(0,len(df)):
        mapping[i] = df.index[i]
    return mapping

In [None]:
# Function to create:
# (1) dataframe of incorrect classifications
# (2) dataframe showing filenames of incorrect classifications
# (3) dataframe showing filenames of correct classifications
# (4) confusion matrix for predictions vs. truth

def test_mistakes(file, mapping):
    df = pd.read_csv(file, index_col=0)
    # reorder columns
    df = df[["Actual", "Prediction", "File"]]
    # Change integers to labels
    df['Prediction']=df['Prediction'].map(mapping)
    df['Actual']=df['Actual'].map(mapping)
    # Create separate dataframe of misclassified images
    misclassified_images = df[df["Prediction"] != df["Actual"]]
    # Create separate dataframe of correct classifications
    correct_classifications = df[df["Prediction"] == df["Actual"]]
    # Summarise mislcassifications and create confusion matrix
    df.drop(columns='File', inplace=True)
    conf_mat = confusion_matrix(df['Actual'], df['Prediction'])
    df = df.groupby(['Actual', 'Prediction']).size().reset_index()
    df.rename(columns={0:'Count'}, inplace=True)
    return df, misclassified_images, correct_classifications, conf_mat

In [None]:
# Function to create and save labelled confusion matrix 

def conf_matrix(matrix, labels, filename):
    sns.set(font_scale=1.2)
    plt.figure(figsize = (10,7))  
    cf = sns.heatmap(matrix, cmap='Blues', annot=True, cbar=False, 
            xticklabels = labels,  yticklabels = labels, fmt='g')   
    cf.set_xticklabels(cf.get_xticklabels(), rotation=90) 
    cf.set_yticklabels(cf.get_yticklabels(), rotation=0)
    cf.set_title('Predicted vs. Actual Class', pad=30, size=18)
    plt.xlabel("Prediction", weight='bold')
    plt.ylabel("Actual", weight='bold')
    plt.savefig(filename)

In [None]:
benchmark_mapping = create_mappings(benchmark_test_evaluation_df)
benchmark_mistakes_df, benchmark_mistake_files, benchmark_correct, benchmark_conf_mat = test_mistakes("./Results/output_benchmark_mistakes.csv", 
                                                                                  benchmark_mapping)
conf_matrix(benchmark_conf_mat, list(benchmark_test_evaluation_df.index), "results_images/benchmark_conf_mat.png")

In [None]:
canidae_mapping = create_mappings(canidae_test_evaluation_df)
canidae_mistakes_df, canidae_mistake_files, canidae_correct, canidae_conf_mat = test_mistakes("./Results/canidae_output_canidae_mistakes.csv", 
                                                                            canidae_mapping)
dfi.export(canidae_mistakes_df, 'results_images/canidae_mistakes_df.png')
conf_matrix(canidae_conf_mat, list(canidae_test_evaluation_df.index), "results_images/canidae_conf_mat.png")

In [None]:
felinae_mapping = create_mappings(felinae_test_evaluation_df)
felinae_mistakes_df, felinae_mistake_files, felinae_correct, felinae_conf_mat = test_mistakes("./Results/felinae_output_felinae_mistakes.csv", 
                                                                            felinae_mapping)
dfi.export(felinae_mistakes_df, 'results_images/felinae_mistakes_df.png')
conf_matrix(felinae_conf_mat, list(felinae_test_evaluation_df.index), "results_images/felinae_conf_mat.png")

In [None]:
sciuridae_mapping = create_mappings(sciuridae_test_evaluation_df)
sciuridae_mistakes_df, sciuridae_mistake_files, sciuridae_correct, sciuridae_conf_mat = test_mistakes("./Results/sciuridae_output_sciuridae_mistakes.csv",
                                                                                  sciuridae_mapping)
dfi.export(sciuridae_mistakes_df, 'results_images/sciuridae_mistakes_df.png')
conf_matrix(sciuridae_conf_mat, list(sciuridae_test_evaluation_df.index), "results_images/sciuridae_conf_mat.png")

In [None]:
# Repeat entire test process for out-of-sample datasets

In [None]:

def oos_scores(file_list):
    df = pd.read_csv(file_list[0])
    for file in file_list[1:]:
        oos_scores_df = pd.concat([df,pd.read_csv(file)])
        df = oos_scores_df
    oos_scores_df.drop(columns='Unnamed: 0', inplace=True)
    oos_scores_df.index = ['Benchmark', 'Canidae', 
                            'Felinae', 'Sciuridae']
    oos_scores_df.index.name = 'Classifier'
    return oos_scores_df

In [None]:
oos_scores_df = oos_scores(["./Results/output_benchmark_oos_score.csv",
                   "./Results/canidae_output_canidae_oos_score.csv",
                   "./Results/felinae_output_felinae_oos_score.csv",
                   "./Results/sciuridae_output_sciuridae_oos_score.csv"])
dfi.export(oos_scores_df, 'results_images/oos_scores_df.png')

In [None]:
# Function to plot out-of-sample scores

def oos_plot(df, filename):
    df_melted = df.drop(columns='Loss')
    df_melted = df_melted.reset_index()
    df_melted = pd.melt(df_melted, id_vars='Classifier')
    df_melted.columns = ['Classifier', 'Metric', 'Accuracy']
    fig = plt.figure(figsize=(12,7))
    ax = sns.barplot(y='Classifier', x='Accuracy', hue='Metric', data=df_melted, orient='h', palette='deep')
    ax.set_title('Out-of-Sample Accuracy', pad=20, size=18)
    ax.set_xlim(0, 1.3)
    ax.set_xlabel('Accuracy', size=14, weight='bold')
    ax.set_ylabel('Classifier', size=14, weight='bold')
    for container in ax.containers:
        ax.bar_label(container, padding=5, fmt='%.2f', size=11)
    ax.legend(fontsize=12)
    plt.savefig(filename)
    plt.show()

In [None]:
oos_plot(oos_scores_df, "results_images/oos_scores_plot.png")

In [None]:
# Use test function to create equivalent dataframes for out-of-sample data

benchmark_oos_evaluation_df, benchmark_oos_avg = test_evaluation("./Results/output_benchmark_oos_evaluation.csv", 
                                                              'Species', 'Benchmark')
canidae_oos_evaluation_df, canidae_oos_avg = test_evaluation("./Results/canidae_output_canidae_oos_evaluation.csv",
                                                          'Species', 'Canidae')
felinae_oos_evaluation_df, felinae_oos_avg = test_evaluation("./Results/felinae_output_felinae_oos_evaluation.csv",
                                                          'Species', 'Felinae')
sciuridae_oos_evaluation_df, sciuridae_oos_avg = test_evaluation("./Results/sciuridae_output_sciuridae_oos_evaluation.csv",
                                                              'Species', 'Sciuridae')

In [None]:
dfi.export(benchmark_oos_evaluation_df, 'results_images/benchmark_oos_evaluation_df.png')
dfi.export(canidae_oos_evaluation_df, 'results_images/canidae_oos_evaluation_df.png')
dfi.export(felinae_oos_evaluation_df, 'results_images/felinae_oos_evaluation_df.png')
dfi.export(sciuridae_oos_evaluation_df, 'results_images/sciuridae_oos_evaluation_df.png')

In [None]:
weighted_average_oos_df = pd.concat([benchmark_oos_avg, canidae_oos_avg,
                                     felinae_oos_avg, sciuridae_oos_avg])
dfi.export(weighted_average_oos_df, 'results_images/weighted_average_oos_df.png')

In [None]:
def weighted_average_oos_plot(df, filename):
    df_melted = df.drop(columns='No. Images')
    df_melted = df_melted.reset_index()
    df_melted = pd.melt(df_melted, id_vars='Classifier')
    df_melted.columns = ['Classifier', 'Metric', 'Value']
    fig = plt.figure(figsize=(12,7))
    ax = sns.barplot(y='Classifier', x='Value', hue='Metric', data=df_melted, orient='h', palette='deep')
    ax.set_title('Out-of-Sample Evaluation', pad=20, size=18)
    ax.set_xlim(0, 1.3)
    ax.set_xlabel('Value', size=14, weight='bold')
    ax.set_ylabel('Classifier', size=14, weight='bold')
    for container in ax.containers:
        ax.bar_label(container, padding=5, fmt='%.2f', size=11)
    ax.legend(fontsize=12)
    plt.savefig(filename)
    plt.show()

In [None]:
weighted_average_oos_plot(weighted_average_oos_df, "results_images/weighted_average_oos_plot.png")

In [None]:
def oos_mistakes(file, mapping):
    df = pd.read_csv(file, index_col=0)
    # reorder columns
    df = df[["Actual", "Prediction", "File"]]
    # Create separate dataframe of misclassified images
    misclassified_images = df[df["Prediction"] != df["Actual"]]
    # Change integers to labels
    misclassified_images['Prediction']=misclassified_images['Prediction'].map(mapping)
    misclassified_images['Actual']=misclassified_images['Actual'].map(mapping)
    # Create separate dataframe of correct classifications
    correct_classifications = df[df["Prediction"] == df["Actual"]]
    # Change integers to labels
    correct_classifications['Prediction']=correct_classifications['Prediction'].map(mapping)
    correct_classifications['Actual']=correct_classifications['Actual'].map(mapping)
    # Summarise mislcassifications and create confusion matrix
    df.drop(columns='File', inplace=True)
    conf_mat = confusion_matrix(df['Actual'], df['Prediction'])
    mapping_list = list(mapping.keys())
    predictions_list = df['Prediction'].unique()
    # correct size of confusion matrix to account for values not predicted
    missing_list = [item for item in mapping_list if item not in predictions_list]
    for val in missing_list:
        conf_mat = np.insert(conf_mat, val, np.zeros((1,conf_mat.shape[0])), 0)
        conf_mat = np.insert(conf_mat, val, np.zeros((1,1)), 1)
    df = df.groupby(['Actual', 'Prediction']).size().reset_index()
    df.rename(columns={0:'Count'}, inplace=True)
    df['Prediction']=df['Prediction'].map(mapping)
    df['Actual']=df['Actual'].map(mapping)
    return df, misclassified_images, correct_classifications, conf_mat

In [None]:
benchmark_oos_mapping = create_mappings(benchmark_oos_evaluation_df)
canidae_oos_mapping = create_mappings(canidae_oos_evaluation_df)
felinae_oos_mapping = create_mappings(felinae_oos_evaluation_df)
sciuridae_oos_mapping = create_mappings(sciuridae_oos_evaluation_df)

In [None]:
benchmark_oos_mistakes_df, benchmark_oos_mistake_files, benchmark_oos_correct, benchmark_oos_conf_mat = oos_mistakes("./Results/output_benchmark_oos_mistakes.csv",
                                                                                benchmark_oos_mapping)
canidae_oos_mistakes_df, canidae_oos_mistake_files, canidae_oos_correct, canidae_oos_conf_mat = oos_mistakes("./Results/canidae_output_canidae_oos_mistakes.csv",
                                                                          canidae_oos_mapping)
felinae_oos_mistakes_df, felinae_oos_mistake_files, felinae_oos_correct, feliane_oos_conf_mat = oos_mistakes("./Results/felinae_output_felinae_oos_mistakes.csv",
                                                                      felinae_oos_mapping)
sciuridae_oos_mistakes_df, sciuridae_oos_mistake_files, sciuridae_oos_correct, sciuridae_oos_conf_mat = oos_mistakes("./Results/sciuridae_output_sciuridae_oos_mistakes.csv",
                                                                            sciuridae_oos_mapping)

In [None]:
dfi.export(benchmark_oos_mistakes_df, 'results_images/benchmark_oos_mistakes_df.png')
conf_matrix(benchmark_oos_conf_mat, list(benchmark_test_evaluation_df.index), 
            "results_images/benchmark_oos_conf_mat.png")
dfi.export(canidae_oos_mistakes_df, 'results_images/canidae_oos_mistakes_df.png')
conf_matrix(canidae_oos_conf_mat, list(canidae_test_evaluation_df.index), 
             "results_images/canidae_oos_conf_mat.png")
dfi.export(felinae_oos_mistakes_df, 'results_images/felinae_oos_mistakes_df.png')
conf_matrix(feliane_oos_conf_mat, list(felinae_test_evaluation_df.index),
             "results_images/felinae_oos_conf_mat.png")
dfi.export(sciuridae_oos_mistakes_df, 'results_images/sciuridae_oos_mistakes_df.png')
conf_matrix(sciuridae_oos_conf_mat, list(sciuridae_test_evaluation_df.index),
             "results_images/sciuridae_oos_conf_mat.png")

In [None]:
# Function to choose random images from dataframe

def random_files(df_list):
    df = df_list[0].sample(1)
    for dataframe in df_list[1:]:
        new_df = pd.concat([df,dataframe.sample(1)])
        df = new_df
    return new_df

In [None]:
# Use function to create dataframe of misclassified files

misclassified_files = random_files([benchmark_mistake_files, benchmark_oos_mistake_files, canidae_mistake_files,
             canidae_oos_mistake_files, felinae_mistake_files, felinae_oos_mistake_files,
             sciuridae_mistake_files, sciuridae_oos_mistake_files])

In [None]:
# Change display settings so entire dataframe is visible

misclassified_files.style.set_properties(subset=['File'], **{'width-min': '300px'})

In [None]:
# Repeat with correct classifications

correct_files = random_files([benchmark_correct, benchmark_oos_correct, canidae_correct,
             canidae_oos_correct, felinae_correct, felinae_oos_correct,
             sciuridae_correct, sciuridae_oos_correct])

In [None]:
correct_files.style.set_properties(subset=['File'], **{'width-min': '300px'})