In [5]:
# %%
import os
import re
import pandas as pd

# Path to your evaluation files
folder_path = "evaluations/SMD2"

# Initialize lists to store metrics
metrics_list = []

# Regular expressions to extract metrics
metric_patterns = {
    "TP": r"True Positives:\s*(\d+)",
    "FP": r"False Positives:\s*(\d+)",
    "FN": r"False Negatives:\s*(\d+)",
    "TN": r"True Negatives:\s*(\d+)",
    "Precision": r"Precision:\s*([\d\.]+)%",
    "Recall": r"Recall:\s*([\d\.]+)%",
    "F1": r"F1 Score:\s*([\d\.]+)%",
    "Accuracy": r"Accuracy:\s*([\d\.]+)%",
    "Balanced_Accuracy": r"Balanced Accuracy:\s*([\d\.]+)%",
    "ROC_AUC": r"ROC AUC:\s*([\d\.]+)%"
}

# Iterate through all files in the folder
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    
    if os.path.isfile(file_path):
        with open(file_path, "r") as f:
            content = f.read()
            metrics = {}
            for key, pattern in metric_patterns.items():
                match = re.search(pattern, content)
                if match:
                    metrics[key] = float(match.group(1))
            if metrics:
                metrics_list.append(metrics)

# Convert to DataFrame
df_metrics = pd.DataFrame(metrics_list)

# Convert percentages to fractions where needed
percentage_columns = ["Precision", "Recall", "F1", "Accuracy", "Balanced_Accuracy", "ROC_AUC"]
for col in percentage_columns:
    df_metrics[col] = df_metrics[col] / 100.0

# Show the dataframe
df_metrics.head()

# %%
# Calculate mean for each metric
mean_metrics = df_metrics.mean()
mean_metrics_percentage = mean_metrics.copy()
mean_metrics_percentage[percentage_columns] = mean_metrics[percentage_columns] * 100  # back to %
mean_metrics_percentage


TP                    18.500
FP                    10.500
FN                    14.500
TN                   507.500
Precision             63.794
Recall                56.065
F1                    59.676
Accuracy              95.461
Balanced_Accuracy     77.016
ROC_AUC               89.046
dtype: float64