<a href="https://colab.research.google.com/github/maverick98/TopoGAT/blob/master/extract_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nbformat

def extract_log_block(notebook_path, dataset, variant):
    with open(notebook_path, 'r', encoding='utf-8') as f:
        nb = nbformat.read(f, as_version=4)

    start_phrase = f"Running experiment on {dataset} with variant '{variant}'"
    end_phrase = "All analysis completed and saved."

    collecting = False
    log_lines = []

    for cell in nb.cells:
        sources = []

        if cell.cell_type == 'markdown':
            sources = cell.source.splitlines()
        elif cell.cell_type == 'code':
            sources = cell.source.splitlines()

            # Also check outputs (like printed logs)
            for output in cell.get("outputs", []):
                if output.output_type == "stream":
                    sources += output.text.splitlines()
                elif output.output_type == "execute_result":
                    if isinstance(output.data, dict):
                        sources += output.data.get('text/plain', '').splitlines()

        for line in sources:
            if start_phrase in line:
                collecting = True
                log_lines.append(line)
                continue

            if collecting:
                log_lines.append(line)
                if end_phrase in line:
                    collecting = False

    if log_lines:
        print(f"\n📋 Log Block for {dataset.upper()} - {variant}:\n")
        print("\n".join(log_lines))
    else:
        print(f"\n❌ Could not extract log block for {dataset} with variant '{variant}'.")

# Example usage:
extract_log_block("TopoGAT_vs_GAT.ipynb", dataset="MUTAG", variant="basic")


In [22]:
import nbformat

def extract_summary_block(full_block: str) -> str:
    """
    Extract the section from 'Saved summary to' to 'All analysis completed and saved.' within a full experiment block.
    """
    lines = full_block.splitlines()
    start_index = None
    end_index = None

    for i, line in enumerate(lines):
        if start_index is None and 'Saved summary to' in line:
            start_index = i
        if 'All analysis completed and saved' in line:
            end_index = i
            break

    if start_index is not None and end_index is not None and start_index <= end_index:
        return '\n'.join(lines[start_index:end_index + 1])
    else:
        return '❌ Summary section not found.'

def extract_log_block(notebook_path, dataset, variant):
    with open(notebook_path, 'r', encoding='utf-8') as f:
        nb = nbformat.read(f, as_version=4)

    start_phrase = f"Running experiment on {dataset} with variant '{variant}'"
    end_phrase = "All analysis completed and saved."

    collecting = False
    log_lines = []

    for cell in nb.cells:
        sources = []

        if cell.cell_type in ('markdown', 'code'):
            sources = cell.source.splitlines()

            # Also check outputs for code cells
            if cell.cell_type == 'code':
                for output in cell.get("outputs", []):
                    if output.output_type == "stream":
                        sources += output.text.splitlines()
                    elif output.output_type == "execute_result":
                        if isinstance(output.data, dict):
                            sources += output.data.get('text/plain', '').splitlines()

        for line in sources:
            if start_phrase in line:
                collecting = True
                log_lines.append(line)
                continue

            if collecting:
                log_lines.append(line)
                if end_phrase in line:
                    collecting = False

    if log_lines:
        full_block = "\n".join(log_lines)
        summary = extract_summary_block(full_block)
        print(f"\n📋 Summary for {dataset.upper()} - {variant}:\n")
        print(summary)
    else:
        print(f"\n❌ Could not extract log block for {dataset} with variant '{variant}'.")

# Example usage:
extract_log_block("TopoGAT_vs_GAT.ipynb", dataset="PROTEINS", variant="attn")



📋 Summary for PROTEINS - attn:

 Saved summary to /content/drive/MyDrive/topogat/PROTEINS/attn/summary_topogat.csv
 Saved summary to /content/drive/MyDrive/topogat/PROTEINS/attn/summary_gat.csv
 Exported LaTeX table to /content/drive/MyDrive/topogat/PROTEINS/attn/topogat_table.tex
 Exported LaTeX table to /content/drive/MyDrive/topogat/PROTEINS/attn/gat_table.tex
 Exported comparison summary to /content/drive/MyDrive/topogat/PROTEINS/attn/comparison_summary.csv
 Representative Seed: 0 (Closest accuracy to mean 0.7101)
 Representative Metrics: {'accuracy': 0.7085201793721974, 'precision': 0.6971766848816029, 'recall': 0.6653793922933288, 'f1': 0.6702159124519373, 'roc_auc': np.float64(0.6829910032252589), 'log_loss': 0.6294615844584237}
 Representative Seed: 14 (Closest accuracy to mean 0.6435)
 Representative Metrics: {'accuracy': 0.6457399103139013, 'precision': 0.6318011257035647, 'recall': 0.5827861952861954, 'f1': 0.5691927713789646, 'roc_auc': np.float64(0.4738215488215488), 'log

In [28]:
import nbformat
import os

def extract_summary_block(notebook_path, dataset, variant, save_dir):
    with open(notebook_path, 'r', encoding='utf-8') as f:
        nb = nbformat.read(f, as_version=4)

    experiment_header = f"Running experiment on {dataset} with variant '{variant}'"
    start_phrase = "Saved summary to"
    end_phrase = "All analysis completed and saved."

    in_experiment = False
    collecting = False
    summary_lines = []

    for cell in nb.cells:
        lines = []

        if cell.cell_type == 'markdown' or cell.cell_type == 'code':
            lines.extend(cell.source.splitlines())

            for output in cell.get("outputs", []):
                if output.output_type == "stream":
                    lines += output.text.splitlines()
                elif output.output_type == "execute_result" and isinstance(output.data, dict):
                    lines += output.data.get('text/plain', '').splitlines()

        for line in lines:
            if experiment_header in line:
                in_experiment = True

            if in_experiment:
                if start_phrase in line:
                    collecting = True
                if collecting:
                    summary_lines.append(line)
                    if end_phrase in line:
                        in_experiment = False
                        collecting = False

    if summary_lines:
        os.makedirs(save_dir, exist_ok=True)
        save_path = os.path.join(save_dir, f"analysis_{dataset}_{variant}.log")

        with open(save_path, 'w', encoding='utf-8') as f:
            f.write("\n".join(summary_lines))

        print(f"✅ Extracted summary saved to:\n{save_path}")
    else:
        print(f"❌ Could not find summary block for {dataset} with variant '{variant}'.")




✅ Extracted summary saved to:
/content/drive/MyDrive/topogat_vs_gat/MUTAG/analysis_MUTAG_basic.log


In [None]:
# Example usage:
extract_summary_block(
    notebook_path="TopoGAT_vs_GAT.ipynb",
    dataset="MUTAG",
    variant="basic",
    save_dir="/content/drive/MyDrive/topogat_vs_gat/MUTAG"
)

In [31]:
import os
import nbformat

def extract_summary_block(notebook_path, dataset, variant, base_dataset_dir):
    with open(notebook_path, 'r', encoding='utf-8') as f:
        nb = nbformat.read(f, as_version=4)

    start_phrase = f"Running experiment on {dataset} with variant '{variant}'"
    collect_from = "Saved summary to"
    end_phrase = "All analysis completed and saved."

    collecting = False
    found_section = False
    log_lines = []

    for cell in nb.cells:
        sources = []

        if cell.cell_type == 'markdown':
            sources = cell.source.splitlines()
        elif cell.cell_type == 'code':
            sources = cell.source.splitlines()
            for output in cell.get("outputs", []):
                if output.output_type == "stream":
                    sources += output.text.splitlines()
                elif output.output_type == "execute_result":
                    if isinstance(output.data, dict):
                        sources += output.data.get('text/plain', '').splitlines()

        for line in sources:
            if start_phrase in line:
                found_section = True

            if found_section and collect_from in line:
                collecting = True

            if collecting:
                log_lines.append(line)
                if end_phrase in line:
                    collecting = False
                    found_section = False

    if log_lines:
        os.makedirs(base_dataset_dir, exist_ok=True)
        filename = f"analysis_{dataset}_{variant}.log"
        save_path = os.path.join(base_dataset_dir, filename)
        with open(save_path, 'w', encoding='utf-8') as f:
            f.write("\n".join(log_lines))
        print(f"✅ Saved: {save_path}")
    else:
        print(f"❌ No summary block found for {dataset} with variant '{variant}'.")

def extract_all_variants(notebook_path, dataset, base_dir):
    variants = ["basic", "node_aware", "gated", "attn"]
    dataset_dir = os.path.join(base_dir, dataset)
    for variant in variants:
        extract_summary_block(notebook_path, dataset, variant, dataset_dir)

# Example usage:
notebook_path = "TopoGAT_vs_GAT.ipynb"
base_dir = "/content/drive/MyDrive/topogat_vs_gat"
datasets = ["MUTAG", "PTC_MR", "PROTEINS", "ENZYMES"]

for dataset in datasets:
    extract_all_variants(notebook_path, dataset, base_dir)


✅ Saved: /content/drive/MyDrive/topogat_vs_gat/MUTAG/analysis_MUTAG_basic.log
✅ Saved: /content/drive/MyDrive/topogat_vs_gat/MUTAG/analysis_MUTAG_node_aware.log
✅ Saved: /content/drive/MyDrive/topogat_vs_gat/MUTAG/analysis_MUTAG_gated.log
✅ Saved: /content/drive/MyDrive/topogat_vs_gat/MUTAG/analysis_MUTAG_attn.log
✅ Saved: /content/drive/MyDrive/topogat_vs_gat/PTC_MR/analysis_PTC_MR_basic.log
✅ Saved: /content/drive/MyDrive/topogat_vs_gat/PTC_MR/analysis_PTC_MR_node_aware.log
✅ Saved: /content/drive/MyDrive/topogat_vs_gat/PTC_MR/analysis_PTC_MR_gated.log
✅ Saved: /content/drive/MyDrive/topogat_vs_gat/PTC_MR/analysis_PTC_MR_attn.log
✅ Saved: /content/drive/MyDrive/topogat_vs_gat/PROTEINS/analysis_PROTEINS_basic.log
✅ Saved: /content/drive/MyDrive/topogat_vs_gat/PROTEINS/analysis_PROTEINS_node_aware.log
✅ Saved: /content/drive/MyDrive/topogat_vs_gat/PROTEINS/analysis_PROTEINS_gated.log
✅ Saved: /content/drive/MyDrive/topogat_vs_gat/PROTEINS/analysis_PROTEINS_attn.log
✅ Saved: /content/dr