In [1]:
import os
import pandas as pd

# Define the parent folder
parent_folder = 'benchmarking_val/metrics'

# Define the postfixes to group by
postfixes = ['_1', '_2', '_3', '_4']

# Dictionary to hold dataframes for each postfix
grouped_dataframes = {postfix: [] for postfix in postfixes}

# Iterate over subfolders in the parent folder
for folder in os.listdir(parent_folder):
    folder_path = os.path.join(parent_folder, folder)
    
    # Ensure it's a directory
    if os.path.isdir(folder_path):
        # Iterate over files in the folder
        for file in os.listdir(folder_path):
            if file.endswith('.csv'):
                for postfix in postfixes:
                    if file.endswith(f"{postfix}.csv"):
                        file_path = os.path.join(folder_path, file)
                        # Read the CSV and append to the respective group
                        try:
                            df = pd.read_csv(file_path, delimiter="\t")
                            grouped_dataframes[postfix].append(df)
                        except Exception as e:
                            print(f"Error reading {file_path}: {e}")

# Combine and save grouped dataframes
output_folder = 'grouped_csvs'
os.makedirs(output_folder, exist_ok=True)

for postfix, dfs in grouped_dataframes.items():
    if dfs:  # Only process if there are dataframes for this postfix
        combined_df = pd.concat(dfs, ignore_index=True)
        output_file = os.path.join(output_folder, f"group{postfix}.csv")
        combined_df.to_csv(output_file, index=False)
        print(f"Saved {output_file}")


Saved grouped_csvs/group_1.csv
Saved grouped_csvs/group_2.csv
Saved grouped_csvs/group_3.csv
Saved grouped_csvs/group_4.csv
