<a href="https://colab.research.google.com/github/maverick98/TopoGAT/blob/master/report.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Utility code

In [114]:
import os
import re
import pandas as pd
from IPython.display import display, HTML

class DatasetVariantComparerGAT:
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path
        self.metrics = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc', 'log_loss']
        self.metric_icons = {
            "accuracy": "🎯",
            "precision": "📏",
            "recall": "📡",
            "f1": "📊",
            "roc_auc": "🚦",
            "log_loss": "📉"
        }
        self.icon_up = '🟢'
        self.icon_down = '🔴'
        self.icon_significant = '🌟'

    def load_csv(self, folder, filename):
        path = os.path.join(folder, filename)
        return pd.read_csv(path, index_col=0) if os.path.exists(path) else None

    def parse_ttest(self, folder):
        path = os.path.join(folder, "paired_ttest_results.txt")
        results = {}
        if not os.path.exists(path):
            return results
        with open(path, "r") as file:
            lines = file.readlines()

        current_metric = None
        for line in lines:
            line = line.strip()
            if line.startswith("=====") and "TopoGAT vs GAT for" in line:
                for metric in self.metrics:
                    if metric in line:
                        current_metric = metric
                        break
            elif line.startswith("T=") and current_metric:
                match = re.search(r"T=([\d\.\-]+), p-value=([\d\.e\-]+), Cohen's d=([\d\.e\-]+)", line)
                if match:
                    t, p, d = map(float, match.groups())
                    results[current_metric] = {'T': t, 'p': p, 'd': d}
        return results

    def compare_variant(self, variant_folder):
        topo_df = self.load_csv(variant_folder, 'summary_topogat.csv')
        gat_df = self.load_csv(variant_folder, 'summary_gat.csv')
        if topo_df is None or gat_df is None:
            return None, None, None

        topo_metrics = topo_df['mean'].to_dict()
        gat_metrics = gat_df['mean'].to_dict()
        ttest_result = self.parse_ttest(variant_folder)

        row = {'Variant': os.path.basename(variant_folder)}
        ttest_row = {'Variant': os.path.basename(variant_folder)}

        for metric in self.metrics:
            topo_val = topo_metrics.get(metric)
            gat_val = gat_metrics.get(metric)
            stat = ttest_result.get(metric)

            if topo_val is None or gat_val is None:
                row[f'{metric}_val'] = None
                row[f'{metric}'] = 'N/A'
                ttest_row[f'{metric}'] = "Not Available"
                continue

            icon = self.icon_up if ((metric != 'log_loss' and topo_val > gat_val) or
                                    (metric == 'log_loss' and topo_val < gat_val)) else self.icon_down

            row[f'{metric}_val'] = topo_val
            row[f'{metric}'] = f"{topo_val:.4f} {icon}"

            if stat:
                line = f"T={stat['T']:.4f}, p-value={stat['p']:.4f}, Cohen's d={stat['d']:.4f}"
                if stat['p'] < 0.05 and stat['d'] > 0.5:
                    line += f" {self.icon_significant}"
                ttest_row[f'{metric}'] = line
            else:
                ttest_row[f'{metric}'] = "Not Available"

        row['_accuracy'] = topo_metrics.get('accuracy', -1)
        return row, ttest_row, gat_metrics

    def generate_table(self):
        variant_folders = sorted([
            os.path.join(self.dataset_path, f)
            for f in os.listdir(self.dataset_path)
            if os.path.isdir(os.path.join(self.dataset_path, f)) and not f.startswith(".")
        ])

        rows, ttest_rows, gat_values = [], [], None

        for folder in variant_folders:
            row, t_row, gat_metrics = self.compare_variant(folder)
            if row:
                rows.append(row)
                ttest_rows.append(t_row)
                if gat_values is None:
                    gat_values = gat_metrics

        # Add GAT (baseline) row once
        gat_row = {'Variant': 'GAT (baseline)'}
        for metric in self.metrics:
            val = gat_values.get(metric)
            if val is not None:
                gat_row[f'{metric}_val'] = val
                gat_row[f'{metric}'] = f"{val:.4f}"
        gat_row['_accuracy'] = gat_values.get('accuracy', -1)
        rows.append(gat_row)

        df = pd.DataFrame(rows)

        # Add ranks and embed in metric column
        for metric in self.metrics:
            val_col = f"{metric}_val"
            rank_col = f"{metric}_rank"

            df[rank_col] = df[val_col].rank(
                ascending=True if metric == 'log_loss' else False,
                method='min'
            ).astype('Int64')

            def format_rank(row):
                val = row.get(metric)
                if pd.isna(val) or row['Variant'] == 'GAT (baseline)':
                    return val
                rank = row[rank_col]
                return f"{val} <span style='color:gray;font-size:smaller;'>({rank})</span>"

            df[metric] = df.apply(format_rank, axis=1)

        drop_cols = [f"{m}_val" for m in self.metrics] + [f"{m}_rank" for m in self.metrics] + ['_accuracy']
        df_display = df.drop(columns=drop_cols)

        # Rename with icons
        df_display.rename(columns={m: f"{self.metric_icons[m]} {m}" for m in self.metrics}, inplace=True)

        # Reorder: TopoGATs by accuracy, GAT at bottom
        df_topogats = df[df['Variant'] != 'GAT (baseline)'].sort_values(by='_accuracy', ascending=False)
        df_gat = df[df['Variant'] == 'GAT (baseline)']
        df_display = pd.concat([df_topogats.drop(columns=drop_cols), df_gat.drop(columns=drop_cols)])

        # Show performance table
        legend = """
        <b>Legend:</b> 🟢 TopoGAT better than GAT, 🔴 TopoGAT worse than GAT<br>
        <i>🌟 indicates statistically significant improvement (p < 0.05 and Cohen's d > 0.5)</i><br>
        <i>GAT scores are averaged over 4 random seeds</i><br><br>
        """
        display(HTML("<h3>TopoGAT Variant Performance vs GAT (Baseline)</h3>" + legend +
                     df_display.to_html(escape=False, index=False)))

        # Show statistical significance
        df_ttest = pd.DataFrame(ttest_rows)
        display(HTML("<h4>Statistical Significance (Paired t-tests)</h4>"))
        display(HTML(df_ttest.to_html(index=False)))

        # Show verdict
        top_variant = df_topogats.iloc[0]['Variant']
        top_acc = df_topogats.iloc[0]['_accuracy']
        verdict = f"<b>Verdict:</b> <b>{top_variant}</b> is the top-performing TopoGAT variant on this dataset with accuracy {top_acc:.4f}."
        display(HTML("<br>" + verdict))

        return df_display, df_ttest


In [115]:
import os
import re
import pandas as pd
from IPython.display import display, HTML

class DatasetVariantComparerGIN:
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path
        self.metrics = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc', 'log_loss']
        self.metric_icons = {
            "accuracy": "🎯",
            "precision": "📏",
            "recall": "📡",
            "f1": "📊",
            "roc_auc": "🚦",
            "log_loss": "📉"
        }
        self.icon_up = '🟢'
        self.icon_down = '🔴'
        self.icon_significant = '🌟'

    def load_csv(self, folder, filename):
        path = os.path.join(folder, filename)
        return pd.read_csv(path, index_col=0) if os.path.exists(path) else None

    def parse_ttest(self, folder, baseline_name):
        path = os.path.join(folder, "paired_ttest_results.txt")
        results = {}
        if not os.path.exists(path):
            return results
        with open(path, "r") as file:
            lines = file.readlines()

        current_metric = None
        for line in lines:
            line = line.strip()
            if line.startswith("=====") and f"TopoGAT vs {baseline_name.upper()}" in line:
                for metric in self.metrics:
                    if metric in line:
                        current_metric = metric
                        break
            elif line.startswith("T=") and current_metric:
                match = re.search(r"T=([\d\.\-]+), p-value=([\d\.e\-]+), Cohen's d=([\d\.e\-]+)", line)
                if match:
                    t, p, d = map(float, match.groups())
                    results[current_metric] = {'T': t, 'p': p, 'd': d}
        return results

    def compare_variant(self, variant_folder):
        folder_name = os.path.basename(variant_folder)
        if '_vs_' not in folder_name:
            return None, None, None  # can't determine baseline

        baseline_name = folder_name.split('_vs_')[-1].lower()
        topo_df = self.load_csv(variant_folder, 'summary_topogat.csv')
        base_df = self.load_csv(variant_folder, f'summary_{baseline_name}.csv')
        if topo_df is None or base_df is None:
            return None, None, None

        topo_metrics = topo_df['mean'].to_dict()
        base_metrics = base_df['mean'].to_dict()
        ttest_result = self.parse_ttest(variant_folder, baseline_name)

        row = {'Variant': folder_name.replace(f"_vs_{baseline_name}", "")}
        ttest_row = {'Variant': folder_name.replace(f"_vs_{baseline_name}", "")}

        for metric in self.metrics:
            topo_val = topo_metrics.get(metric)
            base_val = base_metrics.get(metric)
            stat = ttest_result.get(metric)

            if topo_val is None or base_val is None:
                row[f'{metric}_val'] = None
                row[f'{metric}'] = 'N/A'
                ttest_row[f'{metric}'] = "Not Available"
                continue

            icon = self.icon_up if ((metric != 'log_loss' and topo_val > base_val) or
                                    (metric == 'log_loss' and topo_val < base_val)) else self.icon_down

            row[f'{metric}_val'] = topo_val
            row[f'{metric}'] = f"{topo_val:.4f} {icon}"

            if stat:
                line = f"T={stat['T']:.4f}, p-value={stat['p']:.4f}, Cohen's d={stat['d']:.4f}"
                if stat['p'] < 0.05 and stat['d'] > 0.5:
                    line += f" {self.icon_significant}"
                ttest_row[f'{metric}'] = line
            else:
                ttest_row[f'{metric}'] = "Not Available"

        row['_accuracy'] = topo_metrics.get('accuracy', -1)
        return row, ttest_row, base_metrics, baseline_name

    def generate_table(self):
        variant_folders = sorted([
            os.path.join(self.dataset_path, f)
            for f in os.listdir(self.dataset_path)
            if os.path.isdir(os.path.join(self.dataset_path, f)) and not f.startswith(".")
        ])

        rows, ttest_rows, baseline_metrics, baseline_name = [], [], None, None

        for folder in variant_folders:
            row, t_row, base_metrics, base_name = self.compare_variant(folder)
            if row:
                rows.append(row)
                ttest_rows.append(t_row)
                if baseline_metrics is None:
                    baseline_metrics = base_metrics
                    baseline_name = base_name

        # Add baseline row once
        if baseline_metrics:
            base_row = {'Variant': f'{baseline_name.upper()} (baseline)'}
            for metric in self.metrics:
                val = baseline_metrics.get(metric)
                if val is not None:
                    base_row[f'{metric}_val'] = val
                    base_row[f'{metric}'] = f"{val:.4f}"
            base_row['_accuracy'] = baseline_metrics.get('accuracy', -1)
            rows.append(base_row)

        df = pd.DataFrame(rows)

        # Ranking
        for metric in self.metrics:
            val_col = f"{metric}_val"
            rank_col = f"{metric}_rank"

            df[rank_col] = df[val_col].rank(
                ascending=True if metric == 'log_loss' else False,
                method='min'
            ).astype('Int64')

            def format_rank(row):
                val = row.get(metric)
                if pd.isna(val) or 'baseline' in row['Variant']:
                    return val
                rank = row[rank_col]
                return f"{val} <span style='color:gray;font-size:smaller;'>({rank})</span>"

            df[metric] = df.apply(format_rank, axis=1)

        drop_cols = [f"{m}_val" for m in self.metrics] + [f"{m}_rank" for m in self.metrics] + ['_accuracy']
        df_display = df.drop(columns=drop_cols)
        df_display.rename(columns={m: f"{self.metric_icons[m]} {m}" for m in self.metrics}, inplace=True)

        # Reorder display: TopoGATs first, baseline last
        df_topogats = df[~df['Variant'].str.contains('baseline')].sort_values(by='_accuracy', ascending=False)
        df_base = df[df['Variant'].str.contains('baseline')]
        df_display = pd.concat([df_topogats.drop(columns=drop_cols), df_base.drop(columns=drop_cols)])

        # Display results
        legend = f"""
        <b>Legend:</b> 🟢 TopoGAT better, 🔴 TopoGAT worse than {baseline_name.upper()}<br>
        <i>🌟 = statistically significant (p < 0.05 and d > 0.5)</i><br>
        <i>{baseline_name.upper()} scores averaged over 4 random seeds</i><br><br>
        """
        display(HTML(f"<h3>TopoGAT Variants vs {baseline_name.upper()}</h3>" + legend +
                     df_display.to_html(escape=False, index=False)))

        df_ttest = pd.DataFrame(ttest_rows)
        display(HTML("<h4>Statistical Significance (Paired t-tests)</h4>"))
        display(HTML(df_ttest.to_html(index=False)))

        top_variant = df_topogats.iloc[0]['Variant']
        top_acc = df_topogats.iloc[0]['_accuracy']
        verdict = f"<b>Verdict:</b> <b>{top_variant}</b> performs best with accuracy {top_acc:.4f}."
        display(HTML("<br>" + verdict))

        return df_display, df_ttest


# Execution

In [124]:
reporter = DatasetVariantComparerGAT("/content/drive/MyDrive/topogat_vs_gat/ENZYMES")
df_ranked, df_ttest = reporter.generate_table()

Variant,accuracy,precision,recall,f1,roc_auc,log_loss
basic,0.2737 🟢 (1),0.2873 🟢 (1),0.2787 🟢 (1),0.2486 🟢 (1),0.6293 🟢 (1),1.7470 🟢 (1)
gated,0.2687 🟢 (2),0.2754 🟢 (2),0.2682 🟢 (2),0.2344 🟢 (2),0.6204 🟢 (2),1.7562 🟢 (3)
node_aware,0.2400 🟢 (3),0.2649 🟢 (3),0.2474 🟢 (3),0.2025 🟢 (3),0.6034 🟢 (3),1.7720 🟢 (4)
attn,0.2358 🟢 (4),0.2059 🟢 (4),0.2420 🟢 (4),0.1886 🟢 (4),0.6000 🟢 (4),1.7550 🟢 (2)
GAT (baseline),0.2004,0.1485,0.2140,0.1456,0.5623,1.7856


Variant,accuracy,precision,recall,f1,roc_auc,log_loss
attn,"T=3.8808, p-value=0.0010, Cohen's d=0.8860 🌟","T=4.4635, p-value=0.0003, Cohen's d=0.9568 🌟","T=3.3444, p-value=0.0034, Cohen's d=0.8008 🌟","T=5.1307, p-value=0.0001, Cohen's d=1.0246 🌟",Not Available,Not Available
basic,"T=5.9154, p-value=0.0000, Cohen's d=1.9935 🌟","T=7.1552, p-value=0.0000, Cohen's d=2.6488 🌟","T=5.4002, p-value=0.0000, Cohen's d=1.7955 🌟","T=10.5846, p-value=0.0000, Cohen's d=3.0204 🌟",Not Available,Not Available
gated,"T=7.0347, p-value=0.0000, Cohen's d=2.2740 🌟","T=5.7499, p-value=0.0000, Cohen's d=2.3084 🌟","T=6.4882, p-value=0.0000, Cohen's d=1.8282 🌟","T=8.5288, p-value=0.0000, Cohen's d=2.5352 🌟",Not Available,Not Available
node_aware,"T=2.7559, p-value=0.0126, Cohen's d=1.0057 🌟","T=4.4483, p-value=0.0003, Cohen's d=1.3621 🌟","T=2.9217, p-value=0.0088, Cohen's d=0.8758 🌟","T=4.3265, p-value=0.0004, Cohen's d=1.4231 🌟",Not Available,Not Available


In [125]:
reporter = DatasetVariantComparerGIN("/content/drive/MyDrive/topogat_vs_gin/ENZYMES")
df_ranked, df_ttest = reporter.generate_table()


Variant,accuracy,precision,recall,f1,roc_auc,log_loss
basic,0.2737 🔴 (2),0.2873 🔴 (2),0.2787 🔴 (2),0.2486 🔴 (2),0.6293 🔴 (2),1.7470 🟢 (1)
gated,0.2592 🔴 (3),0.2664 🔴 (3),0.2620 🔴 (3),0.2278 🔴 (3),0.6161 🔴 (3),1.7704 🟢 (4)
node_aware,0.2362 🔴 (4),0.2405 🔴 (4),0.2432 🔴 (4),0.1969 🔴 (4),0.6077 🔴 (4),1.7589 🟢 (2)
attn,0.2288 🔴 (5),0.1995 🔴 (5),0.2378 🔴 (5),0.1815 🔴 (5),0.5885 🔴 (5),1.7649 🟢 (3)
GIN (baseline),0.3358,0.3409,0.3361,0.3215,0.6821,1.7892


Variant,accuracy,precision,recall,f1,roc_auc,log_loss
attn,"T=-11.7491, p-value=0.0000, Cohen's d=-2.7877","T=-9.8125, p-value=0.0000, Cohen's d=-2.8472","T=-11.4478, p-value=0.0000, Cohen's d=-3.4942","T=-13.3383, p-value=0.0000, Cohen's d=-3.6264",Not Available,Not Available
basic,"T=-6.5296, p-value=0.0000, Cohen's d=-1.7515","T=-4.7419, p-value=0.0001, Cohen's d=-1.3072","T=-6.0865, p-value=0.0000, Cohen's d=-1.7953","T=-7.2051, p-value=0.0000, Cohen's d=-2.2938",Not Available,Not Available
gated,"T=-8.7431, p-value=0.0000, Cohen's d=-1.6822","T=-3.9127, p-value=0.0009, Cohen's d=-0.9759","T=-6.9764, p-value=0.0000, Cohen's d=-1.6074","T=-8.8958, p-value=0.0000, Cohen's d=-1.9390",Not Available,Not Available
node_aware,"T=-6.2405, p-value=0.0000, Cohen's d=-1.9336","T=-3.4059, p-value=0.0030, Cohen's d=-0.8391","T=-5.5415, p-value=0.0000, Cohen's d=-1.8228","T=-7.7870, p-value=0.0000, Cohen's d=-2.2015",Not Available,Not Available
