## Post processing

The purpose of this notebook is to process the checkpoints after a training has been made. It allows you to select the best chekpoint for each epoch.

#### Set up

In [1]:
import numpy as np
import pandas as pd
from glob import glob
import os
import math
from tqdm import tqdm
import pickle
import gzip
import shutil

In [4]:
# Test name 
test_name = 'msnlt_conf_t13'

#### Determine best epoch for each fold

In [6]:
# Rename result file

old_filename = './models/results/conf_t/' + test_name + '_0.pkl'
new_filename = './models/results/conf_t/' + test_name + '.pkl'

# Rename the file
os.rename(old_filename, new_filename)

In [7]:
# Make a dictionary with best epochs and print them

# Load the results from the pickle file
with open('./models/results/conf_t/' + test_name + '.pkl', 'rb') as f:
    dic = pickle.load(f)

best_epochs = {}  # Dictionary to store the best epochs

# Iterate through the folds
for fold, fold_results in dic.items():
    # Find the epoch with the maximum test_r2
    best_test_r2_epoch = np.argmax(fold_results['test_r2']) + 1  # Adjust for 0-based index
    best_test_r2_value = fold_results['test_r2'][best_test_r2_epoch - 1]  # R2 value at the best epoch
    
    best_epochs[fold] = {
        'best_test_r2_epoch': best_test_r2_epoch,
        'best_test_r2_value': best_test_r2_value
    }

# Dictionary containing the best epochs and R2 values for each fold
best_epochs_dict = {} 

# Define the model names ('A', 'B', 'C', 'D', 'E')
fold_names = ['A', 'B', 'C', 'D', 'E']

# Iterate through the model names
for fold_name in fold_names:
    # Get the best epochs and R2 values for the current model
    fold_best_epochs = {
        'best_test_r2_epoch': best_epochs[fold_name]['best_test_r2_epoch'],
        'best_test_r2_value': best_epochs[fold_name]['best_test_r2_value']
    }
    
    # Add the best epochs to the dictionary
    best_epochs_dict[fold_name] = fold_best_epochs

# Print the best epochs based on test_r2 in the specified format
print(test_name)
print("Best epochs based on test_r2:")
for fold_name, epochs in best_epochs_dict.items():
    print(f"Fold {fold_name}: Epoch {epochs['best_test_r2_epoch']} (R2_val = {epochs['best_test_r2_value']:.4f})")


msnlt_conf_t13
Best epochs based on test_r2:
Fold A: Epoch 1 (R2_val = 0.2521)
Fold B: Epoch 1 (R2_val = -2.1369)
Fold C: Epoch 1 (R2_val = 0.2042)
Fold D: Epoch 1 (R2_val = 0.3418)
Fold E: Epoch 1 (R2_val = 0.3176)


In [8]:
# Save the best epoch checkpoint files in a folder

# Path to the folder containing all checkpoints
all_checkpoints_folder = './models/checkpoints/conf_t/all'

# Path to the folder where the best checkpoints will be copied 
best_checkpoints_folder = './models/checkpoints/conf_t/' + test_name + '_best'

# Iterate through the folds and models to copy the best checkpoints
for fold, best_epochs_dict in best_epochs.items():
    # Extract the best epoch for test_r2
    best_test_r2_epoch = best_epochs_dict['best_test_r2_epoch']

    for model_name in fold_names:
        # Construct the checkpoint filename based on the provided structure
        checkpoint_filename = f"{test_name}_{fold}_{best_test_r2_epoch}.pth"  # Use two underscores

        # Source path of the best checkpoint file
        source_path = os.path.join(all_checkpoints_folder, checkpoint_filename)

        # Destination path where the best checkpoint will be copied
        destination_path = os.path.join(best_checkpoints_folder, checkpoint_filename)

        # Create the destination directory if it doesn't exist
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)

        # Copy the best checkpoint to the 'best' folder
        shutil.copy(source_path, destination_path)

# Inform the user that the operation is completed
print('Best checkpoints copied to '+ best_checkpoints_folder)


Best checkpoints copied to ./models/checkpoints/conf_t/msnlt_conf_t13_best


In [10]:
# Rename checkpoints

# Path to the folder containing the copied checkpoints
copied_checkpoints_folder = best_checkpoints_folder

# Iterate through the files in the "best" folder and rename them
for filename in os.listdir(copied_checkpoints_folder):
    print(filename)

    # Extract the relevant parts for the new name
    parts = filename.split('_')
    model = parts[0]
    fold = parts[-2]

    # Construct the new filename based on the extracted parts
    new_filename = f"{model}_{fold}.pth"

    # Source path of the checkpoint file
    source_path = os.path.join(copied_checkpoints_folder, filename)

    # Destination path with the new filename
    destination_path = os.path.join(copied_checkpoints_folder, new_filename)

    # Rename the file
    os.rename(source_path, destination_path)

    # Print the new filename
    print("Renamed to:", new_filename)

        

msnlt__A_1.pth
Renamed to: msnlt_A.pth
msnlt__B_1.pth
Renamed to: msnlt_B.pth
msnlt__C_1.pth
Renamed to: msnlt_C.pth
msnlt__D_1.pth
Renamed to: msnlt_D.pth
msnlt__E_1.pth
Renamed to: msnlt_E.pth


#### Other

In [None]:
# function to print the structure of a dic

def print_dict_str(d, indent=0):
    for key, value in d.items():
        if isinstance(value, dict):
            print(" " * indent + f"{key}: (dict)")
            print_dict_str(value, indent + 4)
        else:
            print(" " * indent + f"{key}: {type(value).__name__}")


In [None]:
# print structure of a dic

with open('../models/results/conf_t/msnlt_conf_t2.pkl', 'rb') as f:
    dic = pickle.load(f)

print_dict_str(dic)