In [1]:
import os
import csv


def find_max_f1_in_file(file_path):
    """
    Reads the specified file, finds the maximum value in the last column (F1-macro_all),
    and returns the maximum value along with the epoch.
    """
    max_f1 = -float('inf')
    max_epoch = None
    
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        # print(file_path)
        next(reader)  # Skip the first row (header)
        
        for row in reader:
            epoch = int(row[0])
            f1_value = float(row[-1])
            if f1_value > max_f1:
                max_f1 = f1_value
                max_epoch = epoch
    
    return max_f1, max_epoch


def find_epoch_values_in_file(file_path, target_epoch):
    """
    Reads the specified file, finds the values in the second (acc_avg) and last column (F1-macro_all)
    for the given epoch.
    """
    with open(file_path, 'r') as file:
        
        reader = csv.reader(file)
        
        next(reader)  # Skip the first row (header)
        
        for row in reader:
            epoch = int(row[0])
            if epoch == target_epoch:
                acc_avg = float(row[1])
                f1_value = float(row[-1])
                return acc_avg, f1_value
    
    return None, None


def process_directory(root_dir, sub_dir_str, file_name):
    """
    Processes all files named `file_name` within subdirectories containing `sub_dir_str` in their names,
    finds the maximum F1-macro_all value from `val_eval.csv` and the corresponding epoch, and then
    retrieves the acc_avg and F1-macro_all values for the same epoch from `test_eval.csv`, `id_val_eval.csv`, and `id_test_eval.csv`.
    """
    overall_max_f1 = -float('inf')
    overall_max_file = ""
    overall_max_epoch = None

    for root, dirs, files in os.walk(root_dir):
        for dir_name in dirs:
            if sub_dir_str in dir_name:
                val_file_path = os.path.join(root, dir_name, file_name)
                if os.path.exists(val_file_path):
                    max_f1, max_epoch = find_max_f1_in_file(val_file_path)
                    if max_f1 > overall_max_f1:
                        overall_max_f1 = max_f1
                        overall_max_file = val_file_path
                        overall_max_epoch = max_epoch

    if overall_max_epoch is not None:
        test_file_path = overall_max_file.replace("val_eval.csv", "test_eval.csv")
        id_val_file_path = overall_max_file.replace("val_eval.csv", "id_val_eval.csv")
        id_test_file_path = overall_max_file.replace("val_eval.csv", "id_test_eval.csv")
        
        test_acc, test_f1 = find_epoch_values_in_file(test_file_path, overall_max_epoch)
        id_val_acc, id_val_f1 = find_epoch_values_in_file(id_val_file_path, overall_max_epoch)
        id_test_acc, id_test_f1 = find_epoch_values_in_file(id_test_file_path, overall_max_epoch)

        print(f"The overall max F1 value in {overall_max_file} is: {overall_max_f1} at epoch {overall_max_epoch}")
        print(f"Corresponding test_eval acc_avg: {test_acc}, F1 value: {test_f1}")
        print(f"Corresponding id_val_eval acc_avg: {id_val_acc}, F1 value: {id_val_f1}")
        print(f"Corresponding id_test_eval acc_avg: {id_test_acc}, F1 value: {id_test_f1}")
    else:
        print("No valid epochs found.")

if __name__ == "__main__":
    root_directory = "/graft2/code/yufei/Ancient_Artifact_Dating_front/wilds/logs"  # root directory path

    # deepCORAL
    sub_directory_string = "deepCORAL"  # Replace with the substring to look for in subdirectory names
    target_file_name = "val_eval.csv"  # the target file name
    
    print(f"Processing {sub_directory_string}...")
    process_directory(root_directory, sub_directory_string, target_file_name)

    #ERM
    sub_directory_string = "ERM"  # Replace with the substring to look for in subdirectory names
    
    print(f"Processing {sub_directory_string}...")
    process_directory(root_directory, sub_directory_string, target_file_name)

    #IRM
    sub_directory_string = "IRM"  # Replace with the substring to look for in subdirectory names
    
    print(f"Processing {sub_directory_string}...")
    process_directory(root_directory, sub_directory_string, target_file_name)

    #groupDRO
    sub_directory_string = "groupDRO"  # Replace with the substring to look for in subdirectory names
    
    print(f"Processing {sub_directory_string}...")
    process_directory(root_directory, sub_directory_string, target_file_name)

    #
    sub_directory_string = "t23_front_resnet_50_bs_32_res_448_CORAL_lr_3e-5_weight_decay_0_seed_2_n_epochs_30"  # Replace with the substring to look for in subdirectory names
    
    print(f"Processing {sub_directory_string}...")
    process_directory(root_directory, sub_directory_string, target_file_name)




Processing deepCORAL...
The overall max F1 value in /graft2/code/yufei/Ancient_Artifact_Dating_front/wilds/logs/t23_iwildcam_deepCORAL_30_epoch_seed_2/val_eval.csv is: 0.5530638522549984 at epoch 23
Corresponding test_eval acc_avg: 0.8939467072486877, F1 value: 0.40456152053834327
Corresponding id_val_eval acc_avg: 0.9091801643371582, F1 value: 0.4861033686363874
Corresponding id_test_eval acc_avg: 0.9305413961410522, F1 value: 0.5250553742600103
Processing ERM...
The overall max F1 value in /graft2/code/yufei/Ancient_Artifact_Dating_front/wilds/logs/t23_front_ERM_lr_3e-5_weight_decay_0_n_epochs_30/val_eval.csv is: 0.47926998718551367 at epoch 10
Corresponding test_eval acc_avg: 0.8736077547073364, F1 value: 0.3482170402855048
Corresponding id_val_eval acc_avg: 0.9518900513648987, F1 value: 0.805990818294073
Corresponding id_test_eval acc_avg: 0.9473953247070312, F1 value: 0.6868888348615441
Processing IRM...
The overall max F1 value in /graft2/code/yufei/Ancient_Artifact_Dating_front/