In [2]:
import os
import re
import yaml
import glob
import pandas as pd

In [19]:
def find_files_by_patern(folder, pattern):
    
    # Initialize an empty list to store matching files
    config_files = []

    # Use glob to find files matching the current pattern
    matching_files = glob.glob(os.path.join(folder, pattern))
    config_files.extend(matching_files)
    
    return config_files

def read_acc_from_log(log_path):
    # Initialize empty lists to store metrics
    mean_iou, micro_iou, macro_iou, accuracy, recall, precision, f1 = [], [], [], [], [], [], []

    # Open the log file and process each line
    inside_section = False
    with open(log_path, 'r') as file:
        lines = file.readlines()
        # print(file.read())
        for line in lines[-8:]:
            if "Overall" in line:
                inside_section = True
            elif inside_section:
                metric_name, metric_value = line.split("---->")
                metric_name = metric_name.split('-')[-1].strip()
                metric_value = float(metric_value.strip())
                if metric_name == "Mean IOU":
                    mean_iou.append(metric_value)
                elif metric_name == "Micro IOU":
                    micro_iou.append(metric_value)
                elif metric_name == "Macro IOU":
                    macro_iou.append(metric_value)
                elif metric_name == "Accuracy":
                    accuracy.append(metric_value)
                elif metric_name == "Recall":
                    recall.append(metric_value)
                elif metric_name == "Precision":
                    precision.append(metric_value)
                elif metric_name == "F1":
                    f1.append(metric_value)

    # Create a Pandas DataFrame
    df = pd.DataFrame({
        'Mean IoU': mean_iou,
        'Micro IoU': micro_iou,
        'Macro IoU': macro_iou,
        'Accuracy': accuracy,
        'Recall': recall,
        'Precision': precision,
        'F1': f1
    })

    # Print the DataFrame
    # print(df)
    return df

def read_param_from_config(config_path):
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    
    extracted_params = {}
    for section, params in config.items():
        if isinstance(params, dict):
            for key, value in params.items():
                extracted_params[key] = value
    
    df_params = pd.DataFrame([extracted_params])
    # print(df_params)
    return df_params


In [24]:
print(os.getcwd())
# Read the log file
log_file = 'preds.log'
config_file = 'config.yml'

exp_root = 'experiments'

exp_paths = [os.path.join(exp_root, exp) for exp in os.listdir(exp_root)]
print(exp_paths)

for exp_path in exp_paths[:1]:

    init_config = find_files_by_patern(exp_path, 'config*.yml')[0]
    init_accuracy = find_files_by_patern(exp_path, 'preds.log')[0]
    print(init_accuracy)

    # df = read_acc_from_log(init_accuracy)

    

    fine_tune_paths = [os.path.join(exp_path, folder) for folder in os.listdir(exp_path) if folder.startswith('fine_tune')]
    # print(len(fine_tune_paths))
    # print(fine_tune_paths[0])
    dfs = []
    for fine_tune_path in fine_tune_paths[:1]:
        fine_tune_config = find_files_by_patern(fine_tune_path, 'config*.yml')[0]
        fine_tune_accuracy = find_files_by_patern(fine_tune_path, 'preds.log')[0]
        
        print(fine_tune_accuracy)
        df_acc = read_acc_from_log(fine_tune_accuracy)
        df_param = read_param_from_config(fine_tune_config)

        # Join these two dataframes
        merged_df = pd.concat([df_param, df_acc], axis=0, ignore_index=True)
        # Stack two rows to one row
        df_ft = pd.DataFrame([merged_df.stack().values], columns=merged_df.columns)
        dfs.append(df_ft)

    

        
        


/exports/csce/datastore/geos/users/s2135982/rivertools/mlfluv/script
['experiments/3001', 'experiments/3002', 'experiments/2001', 'experiments/2002', 'experiments/1001', 'experiments/1002']
experiments/3001/preds.log
experiments/3001/fine_tune_17/preds.log
            name                       test_paths     train_paths which_label  \
0  MLFluvDataset  data/labelled_data/test_dataset  data/fold_data       ESAWC   

   convert_to_tif  fluv_point_only  handle_nan_in_sentinel  move_data  \
0            True            False                   False       True   

   remap_to_sedi  distill_lamda  ...    train_fold  valid_fold  window_size  \
0          False           0.75  ...  [0, 1, 2, 3]         [4]        512.0   

   Mean IoU Micro IoU Macro IoU Accuracy  Recall Precision     F1  
0     0.333     0.822     0.333    0.972   0.902     0.902  0.902  

[1 rows x 42 columns]
