# AUEB M.Sc. in Data Science

- Course: **Deep Learning**

- Semester: Spring 2020

- Instructor: Prof. P Malakasiotis

- Author: S. Politis (p3351814)

- Homework: 2

# Model evaluation

## Import required libraries and custom packages

In [1]:
# Automagic to reload source code.
%load_ext autoreload
%autoreload 2

In [2]:
import logging
import numpy as np
import platform
import os
import pandas as pd
import pathlib
import re
import seaborn as sns
import sklearn
import sys
import tensorflow as tf
import tensorflow_addons as tfa
import time

In [3]:
sys.path.append("../src/")

from AUEB.DL.H002 import Env, Experiment, Logger, Metrics, Reporting, Visualization
from AUEB.DL.H002.Data import Ingest
from AUEB.DL.H002.Models import DenseNet

In [4]:
log = Logger.create_logger(
    name = "msc-ds-dl-h-002", 
    level = logging.INFO
)

In [5]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", -1)

  after removing the cwd from sys.path.


### Evaluation report (all models without data augmentation)

In [None]:
evaluation_report_df = Reporting.evaluate_to_df(architecture = None)

In [None]:
evaluation_report_df.to_csv(
    f"{pathlib.Path(os.path.join(pathlib.Path.cwd().parents[0], os.path.normpath(Env.OUTPUT_REPORT_SUMMARIES), os.path.normpath('CNN_DenseNet_evaluation_report.csv')))}", 
    sep = "\t"
)

### Predictions report (all models without data augmentation)

In [None]:
predictions_report_df = Reporting.prediction_report_to_df(
    architecture = None
)

In [None]:
predictions_report_df.to_csv(
    f"{pathlib.Path(os.path.join(pathlib.Path.cwd().parents[0], os.path.normpath(Env.OUTPUT_REPORT_SUMMARIES), os.path.normpath('CNN_DenseNet_predictions_report.csv')))}", 
    sep = "\t"
)

### Graphs

In [None]:
Reporting.generate_confusion_matrices()

## Evaluation of best model (DenseNet201) with data augmentation

In [6]:
# Get the test data generator.
test_data_frame_iterator = Ingest.test_binary(
    batch_size = 32, 
    target_size = (224, 224), 
    shuffle = False, 
    filter_classes = None
)

Found 3197 validated image filenames belonging to 2 classes.


In [11]:
model_id = "DenseNet201_da_1_ts_224_224_t_0_s_1_bs_4_all_classes_binary"

model_checkpoints_path = pathlib.Path(os.path.join(pathlib.Path.cwd().parents[0], os.path.normpath(Env.OUTPUT_MODEL_BASE_DIR), os.path.normpath(Env.OUTPUT_MODEL_CHECKPOINT_DIR))) 
report_summaries_path = pathlib.Path(os.path.join(pathlib.Path.cwd().parents[0], os.path.normpath(Env.OUTPUT_REPORT_SUMMARIES)))

model_checkpoint_path = model_checkpoints_path.rglob(f"{model_id}.hdf5")

log.info(f"Evaluating {model_id}")

model = tf.keras.models.load_model(
    list(model_checkpoint_path)[0]
)

test_loss, test_accuracy, test_auc = model.evaluate(test_data_frame_iterator)

evaluation_report_df = pd.DataFrame(
    np.array(
        [[model_id, test_loss, test_accuracy, test_auc]]
    ),
    columns = [
        "model_id", 
        "test_loss", 
        "test_accuracy", 
        "test_auc"
    ]
)

display(evaluation_report_df)

2020-07-07 18:16:54,874	[INFO]	msc-ds-dl-h-002	<ipython-input-11-f17937db2df0>.<module>: Evaluating DenseNet201_da_1_ts_224_224_t_0_s_1_bs_4_all_classes_binary




Unnamed: 0,model_id,test_loss,test_accuracy,test_auc
0,DenseNet201_da_1_ts_224_224_t_0_s_1_bs_4_all_classes_binary,0.6111918687820435,0.7037848234176636,0.7481727004051208


In [13]:
evaluation_report_df.to_csv(
    f"{pathlib.Path(os.path.join(pathlib.Path.cwd().parents[0], os.path.normpath(Env.OUTPUT_REPORT_SUMMARIES), os.path.normpath('DenseNet_Data_Augmentation_evaluation_report.csv')))}", 
    sep = "\t"
)

## Predictions of best model (DenseNet201) with data augmentation

In [14]:
model_id = "DenseNet201_da_1_ts_224_224_t_0_s_1_bs_4_all_classes_binary"

model_checkpoints_path = pathlib.Path(os.path.join(pathlib.Path.cwd().parents[0], os.path.normpath(Env.OUTPUT_MODEL_BASE_DIR), os.path.normpath(Env.OUTPUT_MODEL_CHECKPOINT_DIR))) 
report_summaries_path = pathlib.Path(os.path.join(pathlib.Path.cwd().parents[0], os.path.normpath(Env.OUTPUT_REPORT_SUMMARIES)))

model_checkpoint_path = model_checkpoints_path.rglob(f"{model_id}.hdf5")

log.info(f"Predictions for {model_id}")

model = tf.keras.models.load_model(
    list(model_checkpoint_path)[0]
)

# Compute true and predicted classes.
y_true = test_data_frame_iterator.classes
y_pred = model.predict(test_data_frame_iterator)
y_pred = tf.math.argmax(
    input = y_pred, 
    axis = 1, 
    output_type = tf.dtypes.int64, 
    name = "y_pred"
).numpy()

# Compute classification report.
classification_report = sklearn.metrics.classification_report(
    y_true, 
    y_pred, 
    labels = [0, 1], 
    target_names = ["negative", "positive"], 
    output_dict = True
)

accuracy = classification_report["accuracy"]
precision = classification_report["macro avg"]["precision"]
recall = classification_report["macro avg"]["recall"]
f1_score = classification_report["macro avg"]["f1-score"]

# Compute Cohen Kappa.
cohen_kappa = sklearn.metrics.cohen_kappa_score(
    y_true, 
    y_pred, 
    labels = [0, 1]
)

predictions_report_df = pd.DataFrame(
    data = [
        [model_id, accuracy, precision, recall, f1_score, cohen_kappa]
    ], 
    columns = [
        "model_id", "accuracy", "precision", "recall", "f1_score", "cohen_kappa"
    ]
)

display(predictions_report_df)

2020-07-07 18:18:22,966	[INFO]	msc-ds-dl-h-002	<ipython-input-14-f342087c1ca9>.<module>: Predictions for DenseNet201_da_1_ts_224_224_t_0_s_1_bs_4_all_classes_binary


Unnamed: 0,model_id,accuracy,precision,recall,f1_score,cohen_kappa
0,DenseNet201_da_1_ts_224_224_t_0_s_1_bs_4_all_classes_binary,0.703785,0.74285,0.694551,0.684831,0.395984


In [16]:
predictions_report_df.to_csv(
    f"{pathlib.Path(os.path.join(pathlib.Path.cwd().parents[0], os.path.normpath(Env.OUTPUT_REPORT_SUMMARIES), os.path.normpath('DenseNet_Data_Augmentation_predictions_report.csv')))}", 
    sep = "\t"
)

---