In [8]:
import os
import pandas as pd

def merge_csv_files_with_timestamps(base_dir):
    # Initialize an empty DataFrame to store merged data
    merged_df = pd.DataFrame()

    # Traverse the directory structure
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.endswith('.csv'):
                # Extract the time step from the directory path
                time_step = os.path.basename(os.path.dirname(root))
                
                # Construct full file path
                file_path = os.path.join(root, file)
                
                # Read the CSV file
                df = pd.read_csv(file_path)
                
                # Extract columns after 'mode'
                model_columns = df.columns[4:]
                
                # Rename model columns by prepending the time step
                renamed_columns = {col: f"{time_step}_{col}" for col in model_columns}
                df.rename(columns=renamed_columns, inplace=True)
                
                # Concatenate horizontally
                if merged_df.empty:
                    # Start with the initial columns
                    merged_df = df
                else:
                    # Concatenate only the renamed model columns
                    merged_df = pd.concat([merged_df, df[list(renamed_columns.values())]], axis=1)

    return merged_df

def main():
    # Base directory containing the time step directories
    base_dir = '/mnt/petrelfs/hujucheng/integrated_eval/ocplayground/outputs/hu_bench_v1_0/chat_objective'
    
    # Merge all CSV files with renamed model columns
    merged_df = merge_csv_files_with_timestamps(base_dir)
    
    # Save the merged DataFrame to a new CSV file
    output_file = 'merged_output_with_timestamps.csv'
    merged_df.to_csv(output_file, index=False)
    print(f"Merged CSV saved to {output_file}")

if __name__ == "__main__":
    main()


Merged CSV saved to merged_output_with_timestamps.csv
