In [2]:
import pandas as pd
import numpy as np

In [79]:


def process_metric_tables(metric_tables):
    def parse_tables_manual(metric_tables):
        dataframes = []
        for table in metric_tables:
            lines = table.strip().split('\n')
            headers = lines[0].split(maxsplit=2)
            data = []
            for line in lines[1:]:
                index_space = line.index(' ')
                first_split = line[:index_space].strip()
                remaining = line[index_space:].strip()
                parts = remaining.rsplit(maxsplit=2)
                if len(parts) == 3:
                    data.append([parts[0], parts[1], parts[2]])
                else:
                    data.append([parts[0], np.nan, np.nan])
            df = pd.DataFrame(data, columns=headers)
            df[headers[1]] = pd.to_numeric(df[headers[1]], errors='coerce')
            df[headers[2]] = pd.to_numeric(df[headers[2]], errors='coerce')
            dataframes.append(df)
        return dataframes

    def calculate_statistics_ordered(dataframes):
        metrics = dataframes[0]['Metric']
        result_list = []
        for metric in metrics:
            metric_data = [df[df['Metric'] == metric] for df in dataframes]
            metric_stats = {}
            for col in ['Training', 'Validation']:
                valid_data = pd.concat([md[col] for md in metric_data]).dropna()
                if not valid_data.empty:
                    mean_val = valid_data.mean()
                    std_val = valid_data.std()
                    metric_stats[col] = (mean_val, std_val)
                else:
                    metric_stats[col] = np.nan
            result_list.append((metric, metric_stats))
        return result_list

    def format_output_ordered(statistics_ordered):
        output_df = pd.DataFrame(columns=['Metric', 'Training', 'Validation'])
        for metric, values in statistics_ordered:
            row = {'Metric': metric}
            for col in ['Training', 'Validation']:
                if col in values and not pd.isna(values[col]):
                    row[col] = f"({values[col][0]:.3f}, {values[col][1]:.3f})"
                else:
                    row[col] = np.nan
            output_df = pd.concat([output_df, pd.DataFrame([row])], ignore_index=True)
        output_string = output_df.to_string(index=False)
        return output_string

    # Process the input tables
    dataframes = parse_tables_manual(metric_tables)
    statistics = calculate_statistics_ordered(dataframes)
    output_table = format_output_ordered(statistics)
    return output_table
# Example usage
metric_tables = [
    """                              Metric  Training  Validation
0                   Average Accuracy     0.624       0.576
1     Standard Deviation of Accuracy     0.242       0.203
2                      Best Accuracy     0.944       0.803
3                      Last Accuracy     0.944       0.803
4                        Overall AUC       NaN       0.464
5                   Maximum F1 Score       NaN       0.809
6                       Minimum Loss       NaN       0.650
7  Difference in Average Loss Last N       NaN       0.420
8  Standard Deviation of Loss Last N       NaN       0.093""",


"""                              Metric  Training  Validation
0                   Average Accuracy     0.339       0.326
1     Standard Deviation of Accuracy     0.005       0.004
2                      Best Accuracy     0.341       0.337
3                      Last Accuracy     0.341       0.327
4                        Overall AUC       NaN       0.518
5                   Maximum F1 Score       NaN       0.000
6                       Minimum Loss       NaN       1.099
7  Difference in Average Loss Last N       NaN      -0.001
8  Standard Deviation of Loss Last N       NaN       0.000""",


"""                              Metric  Training  Validation
0                   Average Accuracy     0.344       0.334
1     Standard Deviation of Accuracy     0.004       0.018
2                      Best Accuracy     0.361       0.414
3                      Last Accuracy     0.337       0.323
4                        Overall AUC       NaN       0.547
5                   Maximum F1 Score       NaN       0.000
6                       Minimum Loss       NaN       1.090
7  Difference in Average Loss Last N       NaN      -0.002
8  Standard Deviation of Loss Last N       NaN       0.002""", 

"""                              Metric  Training  Validation
0                   Average Accuracy     0.819       0.697
1     Standard Deviation of Accuracy     0.176       0.111
2                      Best Accuracy     0.966       0.778
3                      Last Accuracy     0.960       0.773
4                        Overall AUC       NaN       0.508
5                   Maximum F1 Score       NaN       0.778
6                       Minimum Loss       NaN       0.836
7  Difference in Average Loss Last N       NaN       1.190
8  Standard Deviation of Loss Last N       NaN       0.182""",

"""                              Metric  Training  Validation
0                   Average Accuracy     0.367       0.392
1     Standard Deviation of Accuracy     0.027       0.056
2                      Best Accuracy     0.412       0.480
3                      Last Accuracy     0.401       0.465
4                        Overall AUC       NaN       0.489
5                   Maximum F1 Score       NaN       0.010
6                       Minimum Loss       NaN       1.066
7  Difference in Average Loss Last N       NaN      -0.006
8  Standard Deviation of Loss Last N       NaN       0.013"""



    # More tables would follow here
]

result = process_metric_tables(metric_tables)
print(result)


                           Metric       Training     Validation
                 Average Accuracy (0.499, 0.215) (0.465, 0.164)
   Standard Deviation of Accuracy (0.091, 0.111) (0.078, 0.081)
                    Best Accuracy (0.605, 0.321) (0.562, 0.214)
                    Last Accuracy (0.597, 0.325) (0.538, 0.235)
                      Overall AUC            NaN (0.505, 0.031)
                 Maximum F1 Score            NaN (0.319, 0.433)
                     Minimum Loss            NaN (0.948, 0.199)
Difference in Average Loss Last N            NaN (0.320, 0.520)
Standard Deviation of Loss Last N            NaN (0.058, 0.079)


In [160]:
import re
import csv

def parse_metric_from_table(table, metric_name):
    """
    Parses a specified metric and its values from a string representation of a table.
    
    Parameters:
    - table (str): Multi-line string containing the table of metrics.
    - metric_name (str): Name of the metric to parse (e.g., 'Overall AUC', 'Best Accuracy').
    
    Returns:
    - Tuple[float, float]: (value_standard, value_augmented) from the Training and Validation columns if available.
    """
    pattern = re.compile(rf"{re.escape(metric_name)}\s+.*?\s+\((\d+\.\d+),\s*\d+\.\d+\)")
    match = pattern.search(table)
    if match:
        return (None, float(match.group(1)))  # Only validation values are returned, as training values are not needed
    return (None, None)

def calculate_percentage_increase(standard, augmented):
    """
    Calculates the percentage increase from the standard dataset to the augmented dataset.
    
    Parameters:
    - standard (float): Metric value from the standard dataset.
    - augmented (float): Metric value from the augmented dataset.
    
    Returns:
    - float: Percentage increase of the metric, rounded to three decimal places.
    """
    if standard is not None and augmented is not None and standard != 0:
        return round(((augmented - standard) / standard) * 100, 3)
    return None

def write_results_to_csv(path, method, weather, percentage_auc, percentage_accuracy):
    """
    Writes the calculated results to a CSV file.
    
    Parameters:
    - method (str): Description of the method (e.g., "Standard", "Augmented").
    - weather (str): Description of the weather condition (e.g., "Sunny", "Cloudy").
    - type_ (str): Type of data or experiment (e.g., "Simulation", "Real-World").
    - percentage_auc (float): Calculated percentage increase for AUC.
    - percentage_accuracy (float): Calculated percentage increase for Best Accuracy.
    """
    
    with open(path, 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([method, weather, percentage_auc, percentage_accuracy])


# Example string tables
table1 = """                           Metric       Training     Validation
                 Average Accuracy (0.508, 0.209) (0.491, 0.159)
   Standard Deviation of Accuracy (0.103, 0.107) (0.097, 0.068)
                    Best Accuracy (0.633, 0.280) (0.630, 0.173)
                    Last Accuracy (0.608, 0.303) (0.550, 0.204)
                      Overall AUC            NaN (0.525, 0.026)
                 Maximum F1 Score            NaN (0.392, 0.406)
                     Minimum Loss            NaN (0.938, 0.203)
Difference in Average Loss Last N            NaN (0.316, 0.483)
Standard Deviation of Loss Last N            NaN (0.065, 0.093)"""

table2 = """                           Metric       Training     Validation
                 Average Accuracy (0.499, 0.215) (0.465, 0.164)
   Standard Deviation of Accuracy (0.091, 0.111) (0.078, 0.081)
                    Best Accuracy (0.605, 0.321) (0.562, 0.214)
                    Last Accuracy (0.597, 0.325) (0.538, 0.235)
                      Overall AUC            NaN (0.505, 0.031)
                 Maximum F1 Score            NaN (0.319, 0.433)
                     Minimum Loss            NaN (0.948, 0.199)
Difference in Average Loss Last N            NaN (0.320, 0.520)
Standard Deviation of Loss Last N            NaN (0.058, 0.079)"""

# Use these functions to extract and calculate percentage increases
auc_values1 = parse_metric_from_table(table1, "Overall AUC")
auc_values2 = parse_metric_from_table(table2, "Overall AUC")

best_accuracy_values1 = parse_metric_from_table(table1, "Best Accuracy")
best_accuracy_values2 = parse_metric_from_table(table2, "Best Accuracy")

method = "contrast(2)"
weather = "45"

percentage_increase_auc = calculate_percentage_increase(auc_values1[1], auc_values2[1])
percentage_increase_best_accuracy = calculate_percentage_increase(best_accuracy_values1[1], best_accuracy_values2[1])

# Write to CSV
path = "../results/round1/percent_increase.csv"

write_results_to_csv(path, method, weather, percentage_increase_auc, percentage_increase_best_accuracy)

print("% Increase AUC:", percentage_increase_auc)
print("% Increase Best Accuracy:", percentage_increase_best_accuracy)

% Increase AUC: -3.81
% Increase Best Accuracy: -10.794
