# Evaluation of Logs
Tensorboard logs play a huge role in evaluating our model performance
This script focuses on getting the necessary information from a log directory to have educated information on the model performance.

In [6]:
import os
import numpy as np
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

log_dir = r"C:\Users\Admin\Documents\MasterThesis\results\ResNet_BCos_224\seed_0\tensorboard_logs_fold_1"
event_acc = EventAccumulator(log_dir)
event_acc.Reload()

# List all available scalars
available_tags = event_acc.Tags()["scalars"]
print("Available scalars:", available_tags)

# Extract scalar values per epoch
precision_values = event_acc.Scalars("Metrics/Precision")
accuracy_values = event_acc.Scalars("Accuracy/Validation")
f1_values = event_acc.Scalars("Metrics/F1")
recall_values = event_acc.Scalars("Metrics/Recall")
auc_values = event_acc.Scalars("Metrics/AUC")


# Get epoch-wise values
epochs = [x.step for x in precision_values]
accuracy_scores = [x.value for x in accuracy_values]
f1_scores = [x.value for x in f1_values]
precision_scores = [x.value for x in precision_values]  # Convert to list
recall_scores = [x.value for x in recall_values]  # Convert to list

# Compute mean, min, max precision
mean_precision = np.mean(precision_scores)
minimum_precision = np.min(precision_scores)
maximum_precision = np.max(precision_scores)

print(f"Mean Average Precision (mAP): {mean_precision}")
print(f"Min Precision: {minimum_precision}")
print(f"Max Precision: {maximum_precision}")

# Compute mean, min, max accuracy
mean_accuracy = np.mean(accuracy_scores)
minimum_accuracy = np.min(accuracy_scores)
maximum_accuracy = np.max(accuracy_scores)

print()
print(f"Mean Accuracy: {mean_accuracy}")
print(f"Min Accuracy: {minimum_accuracy}")
print(f"Max Accuracy: {maximum_accuracy}")

# Find best epoch based on highest F1 score
best_f1_idx = np.argmax(f1_scores)
best_epoch = epochs[best_f1_idx]
best_f1 = f1_scores[best_f1_idx]

# Get corresponding precision and recall for that epoch
best_precision = precision_values[best_f1_idx].value
best_recall = recall_values[best_f1_idx].value
best_accuracy = accuracy_values[best_f1_idx].value
best_auc = auc_values[best_f1_idx].value

best_precision = round(best_precision, 4)
best_recall = round(best_recall, 4)
best_accuracy = round(best_accuracy, 4)
best_auc = round(best_auc, 4)
best_f1 = round(best_f1, 4)

print()
print(f"Corresponding Accuracy: {best_accuracy}")
print(f"Corresponding Precision: {best_precision}")
print(f"Corresponding Recall: {best_recall}")
print(f"Highest F1 Score: {best_f1} at Epoch {best_epoch}")
print(f"Corresponding AUC: {best_auc}")

Available scalars: ['Loss/Train', 'Accuracy/Train', 'Loss/Validation', 'Accuracy/Validation', 'Metrics/Precision', 'Metrics/Recall', 'Metrics/F1', 'Metrics/AUC', 'Learning_Rate']
Mean Average Precision (mAP): 0.7615803480148315
Min Precision: 0.6440078616142273
Max Precision: 0.8868072032928467

Mean Accuracy: 0.8490406890710195
Min Accuracy: 0.8051604628562927
Max Accuracy: 0.8685081005096436

Corresponding Accuracy: 0.8621
Corresponding Precision: 0.7894
Corresponding Recall: 0.7687
Highest F1 Score: 0.7789 at Epoch 18
Corresponding AUC: 0.9098
