In [9]:
import os
import pandas as pd
import glob

hdir = '/home/workspace'

# df = pd.read_csv(hdir + e1440 + 'aggregated_metrics.csv')

In [10]:
def aggregate_metrics(directory_path, name):
    csv_files = glob.glob(os.path.join(directory_path, "*_metrics_summary.csv"))
    output_file = name + "_aggregated_metrics.csv"
    
    aggregated_data = {}
    metric_names = []
    
    for csv_file in csv_files:
        sample_id = os.path.basename(csv_file).split('_metrics_summary.csv')[0]
        
        try:
            df = pd.read_csv(csv_file, sep=',')
            
            if 'Metric Name' not in df.columns or 'Metric Value' not in df.columns:
                print(f"Warning: Required columns not found in {csv_file}. Skipping file.")
                continue
            
            df = df.head(16)
            
            if not metric_names:
                metric_names = df['Metric Name'].tolist()
    
            for _, row in df.iterrows():
                metric_name = row['Metric Name']
                metric_value = row['Metric Value']
                
                if metric_name not in aggregated_data:
                    aggregated_data[metric_name] = {}
                
                aggregated_data[metric_name][sample_id] = metric_value
                
        except Exception as e:
            print(f"Error processing {csv_file}: {e}")
    
    # Add "EXP #" to the beginning of metric_names list
    metric_names = ["EXP #"] + metric_names
    
    result_df = pd.DataFrame(index=metric_names)
    
    for sample_id in sorted(set(sample_id for values in aggregated_data.values() for sample_id in values)):
        # Create a column with sample_id values
        column_values = [name]  # First value is the name variable for "EXP #" row
        column_values.extend([aggregated_data.get(metric, {}).get(sample_id, '') for metric in metric_names[1:]])
        result_df[sample_id] = column_values
    
    result_df.reset_index(inplace=True)
    result_df.rename(columns={'index': 'Metric Name'}, inplace=True)
    
    result_df.to_csv(output_file, index=False)
    print(f"Aggregated metrics saved to {output_file}")
    
    return result_df

if __name__ == "__main__":
    directory_path = "/home/workspace/mm_bm_chip_organoids/EXP-01244/metrics_summaries"
    aggregate_metrics(directory_path, "EXP-01244")

Aggregated metrics saved to EXP-01244_aggregated_metrics.csv
