# Training and Validation Loss Curves for K-fold training history pickel files
<br> This code is used for creating graphs displaying the average training and validation loss per epoch over the number of folds. 
<br> The the shaded region behind the mean-curves represents one standard deviation. 
<br> The validation loss is also individually plotted per fold. 

In [None]:
# Import Libraries
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np

"""LOAD PICKLE FILE"""
# Set Directory of Pickle files
os.chdir('C:/set/the/directory/of/picklefiles/')  # Specify Input Path
graphtitle = 'This is the Title of your graph' 
# Load data from the pickle file
filename = 'filename.pkl'
with open(filename, 'rb') as file:
    modelhistory = pickle.load(file)

"""SANITY CHECKS"""
if modelhistory: #check if file is available and non-empty.
    print("Keys in a dictionary:", list(modelhistory[0].keys())) #check the key names in the first dictionary. 
    print("Number of Folds =", len(modelhistory)) #check if the number of folds is correct.
    print("Number of Epochs =", len(modelhistory[0]['loss'])) #check if the number of epochs is correct.

"""SET GRAPH VARIABLES"""
# set range of Epochs based on first dictionary.
epochs = range(1, len(modelhistory[0]['loss']) + 1)

# Lists to store foldlists of training and validation loss values.
all_train_losses = [] 
all_val_losses = []
# Iterate through a list of dictionaries representing folds. 
for history in modelhistory:
    # Extract necessary information from each dictionary.
    train_loss = history['loss']
    val_loss = history['val_loss']
    # Append list of loss values to the corresponding collective list.
    all_train_losses.append(train_loss)
    all_val_losses.append(val_loss)

# Calculate mean/std loss along axis 0, which is the mean/std loss at each epoch i.e. elements of each list with the same index. 
mean_train_loss = np.mean(all_train_losses, axis=0)
mean_val_loss = np.mean(all_val_losses, axis=0)
std_train_loss = np.std(all_train_losses, axis=0)
std_val_loss = np.std(all_val_losses, axis=0)

"""PLOT GRAPHS"""
# Plot the average training and validation loss curves with shaded regions for one standard deviation.
plt.plot(epochs, mean_train_loss, label='Average Training Loss')
plt.fill_between(epochs, mean_train_loss - std_train_loss, mean_train_loss + std_train_loss, alpha=0.2) # alpha = level of transparancy between 0 (transparant) and 1 (opaque).
plt.plot(epochs, mean_val_loss, label='Average Validation Loss')
plt.fill_between(epochs, mean_val_loss - std_val_loss, mean_val_loss + std_val_loss, alpha=0.2) 

# Plot the individual validation loss curves with slightly transparent green dashed lines.
for idx, val_loss in enumerate(all_val_losses):
    label = 'Validation Loss per Fold' if idx == 0 else '_nolegend_' # Only add label for the first line, for a simplified legend.
    plt.plot(epochs, val_loss, 'g--', alpha=0.3, label=label) 

plt.title(f'{graphtitle}')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.yticks(np.arange(0.05, 0.325, 0.025))  # Set custom range on the y-axis.

plt.grid(True, linestyle='--', alpha=0.5) # Add gridlines.
plt.legend()

os.chdir('C:/Directory/to/save/your/image')  # Specify Output Path
save_name = f'trainval_curves_{graphtitle}.png'
plt.savefig(save_name, format='png')

plt.show()
