In [5]:
import pandas as pd
import numpy as np

def process_metric_strings(list_of_metric_strings):
    # Initialize an empty DataFrame to store all the data
    aggregated_data = pd.DataFrame()

    # Function to process each metric string into a DataFrame
    def string_to_df(metric_string):
        # Split the string by lines and process into a DataFrame
        lines = metric_string.strip().split("\n")
        data = []
        for line in lines[1:]:  # Skip the header line
            parts = line.split(maxsplit=2)
            if len(parts) == 3:
                index, metric, values = parts
                training, validation = values.split()
                data.append([metric, training, validation])
        df = pd.DataFrame(data, columns=['Metric', 'Training', 'Validation'])
        df['Training'] = pd.to_numeric(df['Training'], errors='coerce')
        df['Validation'] = pd.to_numeric(df['Validation'], errors='coerce')
        return df

    # Concatenate data from each string into a single DataFrame
    for metric_string in list_of_metric_strings:
        df = string_to_df(metric_string)
        aggregated_data = pd.concat([aggregated_data, df], ignore_index=True)

    # Group by 'Metric' and calculate the mean and std for each metric
    summary = aggregated_data.groupby('Metric').agg({
        'Training': [('Training Mean', 'mean'), ('Training Std', 'std')],
        'Validation': [('Validation Mean', 'mean'), ('Validation Std', 'std')]
    })

    # Flatten the multi-level columns and handle missing values
    summary.columns = [' '.join(col).strip() for col in summary.columns.values]
    summary.reset_index(inplace=True)
    
    # Replace NaN in tuples with the original 'NaN' and format the results
    formatted_results = []
    for _, row in summary.iterrows():
        formatted_results.append({
            'Metric': row['Metric'],
            'Training': (f"{row['Training Mean']:.3f}", f"{row['Training Std']:.3f}") if pd.notna(row['Training Mean']) else ('NaN', 'NaN'),
            'Validation': (f"{row['Validation Mean']:.3f}", f"{row['Validation Std']:.3f}") if pd.notna(row['Validation Mean']) else ('NaN', 'NaN')
        })

    result_df = pd.DataFrame(formatted_results)
    
    # Convert DataFrame to string maintaining the table format
    return result_df.to_string(index=False)

# Example usage
metric_tables = [
    """                              Metric  Training  Validation
0                   Average Accuracy     0.760       0.655
1     Standard Deviation of Accuracy     0.223       0.146
2                      Best Accuracy     0.959       0.773
3                      Last Accuracy     0.956       0.773
4                        Overall AUC       NaN       0.489
5                   Maximum F1 Score       NaN       0.770
6                       Minimum Loss       NaN       0.757
7  Difference in Average Loss Last N       NaN       0.846
8  Standard Deviation of Loss Last N       NaN       0.119""",

"""                              Metric  Training  Validation
0                   Average Accuracy     0.629       0.552
1     Standard Deviation of Accuracy     0.227       0.154
2                      Best Accuracy     0.907       0.714
3                      Last Accuracy     0.906       0.709
4                        Overall AUC       NaN       0.512
5                   Maximum F1 Score       NaN       0.708
6                       Minimum Loss       NaN       0.796
7  Difference in Average Loss Last N       NaN       0.735
8  Standard Deviation of Loss Last N       NaN       0.212""",

"""                              Metric  Training  Validation
0                   Average Accuracy     0.663       0.601
1     Standard Deviation of Accuracy     0.227       0.163
2                      Best Accuracy     0.925       0.783
3                      Last Accuracy     0.897       0.697
4                        Overall AUC       NaN       0.597
5                   Maximum F1 Score       NaN       0.778
6                       Minimum Loss       NaN       0.856
7  Difference in Average Loss Last N       NaN       0.711
8  Standard Deviation of Loss Last N       NaN       0.125"""



    # More tables would follow here
]

result = process_metric_strings(metric_tables)
print(result)


ValueError: too many values to unpack (expected 2)