# GraphPulse Benchmark Runner - Google Colab

This notebook runs GraphPulse benchmarks on Google Colab with CUDA GPU support.

**Target Dataset:** `dgd`

**Models to Run:**
- HTGN (Hyperbolic Temporal Graph Network)
- EvolveGCN
- GRUGCN
- GraphPulse (RNN)
- GIN (Static Baseline - Raw Graphs)
- TDA-GIN (Static Baseline - TDA Graphs)

## Step 1: Setup & Verification

In [None]:
# Check GPU availability
!nvidia-smi

In [None]:
# Mount Google Drive (Optional - for saving results)
# from google.colab import drive
# drive.mount('/content/drive')
# RESULTS_PATH = '/content/drive/MyDrive/GraphPulse_Results'

In [None]:
# Clone the repository
!git clone https://github.com/trangiahuy8444/GraphPulse.git

import os
os.chdir('/content/GraphPulse')
print(f"Current directory: {os.getcwd()}")

In [None]:
# Check PyTorch and CUDA version
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Install PyTorch Geometric and dependencies
import torch
torch_version = torch.__version__.split('+')[0]  # Remove +cu118 suffix if present

!pip install torch-geometric
!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-{torch_version}.html

In [None]:
# Install other dependencies
!pip install kmapper geoopt networkx scikit-learn pandas numpy matplotlib tqdm pyyaml tensorflow

print("✓ All dependencies installed")

## Step 2: Data Preprocessing

In [None]:
# Check if dataset exists
import os
os.chdir('/content/GraphPulse')

if not os.path.exists('data/all_network/networkdgd.txt'):
    print("WARNING: networkdgd.txt not found!")
    print("Please upload the file to data/all_network/networkdgd.txt")
else:
    print("✓ Dataset file found")

In [None]:
# Run data preprocessing
import sys
sys.path.insert(0, '/content/GraphPulse')

from analyzer.network_parser import NetworkParser

parser = NetworkParser()
parser.file_path = "./data/all_network/"
parser.timeseries_file_path = "./data/all_network/TimeSeries/"

network_name = "networkdgd.txt"

print("Starting data preprocessing...")
parser.create_graph_features(network_name)
parser.create_time_series_graphs(network_name)
print("✓ Data preprocessing completed!")

## Step 3: Run Benchmarks

### Model 1: HTGN

In [None]:
# Run HTGN
import os
os.chdir('/content/GraphPulse/models/temporal_gnn/script')

!python main.py --dataset dgd --model HTGN --device cuda --device_id 0 --seed 1024 --max_epoch 500 --patience 50 --lr 0.01 --nfeat 128 --nhid 16 --nout 16

### Model 2: EvolveGCN

In [None]:
# Run EvolveGCN
import os
os.chdir('/content/GraphPulse/models/temporal_gnn/script')

!python main.py --dataset dgd --model EvolveGCN --egcn_type EGCNH --device cuda --device_id 0 --seed 1024 --max_epoch 500 --patience 50 --lr 0.01 --nfeat 128 --nhid 16 --nout 16

### Model 3: GRUGCN

In [None]:
# Run GRUGCN
import os
os.chdir('/content/GraphPulse/models/temporal_gnn/script')

!python main.py --dataset dgd --model GRUGCN --device cuda --device_id 0 --seed 1024 --max_epoch 500 --patience 50 --lr 0.01 --nfeat 128 --nhid 16 --nout 16

### Model 4: GraphPulse (RNN)

In [None]:
# Run GraphPulse RNN
import os
os.chdir('/content/GraphPulse')

!python models/rnn/rnn_methods.py

### Model 5: GIN (Static Baseline - Raw Graphs)

In [None]:
%%writefile models/static_gnn/config_GIN.yml
hidden_units:
  - [64, 64, 64, 64]
dropout:
  - 0.5
train_eps:
  - true
aggregation:
  - mean

In [None]:
# Create output directory
import os
os.makedirs('models/static_gnn/GnnResults', exist_ok=True)
os.makedirs('GnnResults', exist_ok=True)
print("✓ Output directories created")

In [None]:
# Modify static_graph_methods.py to run only for dgd dataset
# Create a backup and modify the networkList
import os
os.chdir('/content/GraphPulse')

# Backup original file
!cp models/static_gnn/static_graph_methods.py models/static_gnn/static_graph_methods.py.backup

# Modify to run only dgd dataset
import re
with open('models/static_gnn/static_graph_methods.py', 'r') as f:
    content = f.read()

# Replace networkList with dgd only
content = re.sub(
    r'networkList = \[.*?\]',
    'networkList = ["networkdgd.txt"]',
    content,
    flags=re.DOTALL
)

# Update TDA variable to match actual folder (Overlap_0.3_Ncube_2)
content = re.sub(
    r'read_torch_time_series_data\(network, "Overlap_xx_Ncube_x"\)',
    'read_torch_time_series_data(network, "Overlap_0.3_Ncube_2")',
    content
)

with open('models/static_gnn/static_graph_methods.py', 'w') as f:
    f.write(content)

print("✓ Modified static_graph_methods.py for dgd dataset")

In [None]:
# Run GIN (Raw) - using TemporalVectorizedGraph_Tuned data
# First, create a helper function version for raw graphs
import os
os.chdir('/content/GraphPulse')

# Modify script to use raw graphs for GIN
import re
with open('models/static_gnn/static_graph_methods.py', 'r') as f:
    content = f.read()

# Add helper function before main block
helper_func = '''
def read_torch_time_series_data_raw(network):
    """Read raw graph data (without TDA features) for GIN baseline"""
    file_path_temporal = "PygGraphs/TimeSeries/{}/TemporalVectorizedGraph_Tuned/".format(network)
    GraphDataList = []
    import os
    import pickle
    
    if os.path.exists(file_path_temporal):
        files = sorted([f for f in os.listdir(file_path_temporal) if f.endswith(('.txt', '.pkl'))])
        for file in files:
            with open(file_path_temporal + file, 'rb') as f:
                data = pickle.load(f)
                GraphDataList.append(data)
    else:
        raise FileNotFoundError(f"TemporalVectorizedGraph_Tuned not found for {network}")
    return GraphDataList

'''

# Insert helper function
content = content.replace('if __name__ == "__main__":', helper_func + '\nif __name__ == "__main__":')

# Change to use raw data function
content = re.sub(
    r'data = read_torch_time_series_data\(network, "Overlap_0\.3_Ncube_2"\)',
    'data = read_torch_time_series_data_raw(network)',
    content
)

with open('models/static_gnn/static_graph_methods.py', 'w') as f:
    f.write(content)

# Run GIN (Raw)
!cd /content/GraphPulse && python models/static_gnn/static_graph_methods.py

print("✓ GIN (Raw) training completed")

### Model 6: TDA-GIN (Static Baseline - TDA Graphs)

In [None]:
# Restore original and modify for TDA-GIN
import os
os.chdir('/content/GraphPulse')

# Restore from backup
!cp models/static_gnn/static_graph_methods.py.backup models/static_gnn/static_graph_methods.py

# Modify for TDA-GIN (dgd only, correct TDA folder)
import re
with open('models/static_gnn/static_graph_methods.py', 'r') as f:
    content = f.read()

# Replace networkList
content = re.sub(
    r'networkList = \[.*?\]',
    'networkList = ["networkdgd.txt"]',
    content,
    flags=re.DOTALL
)

# Update TDA variable
content = re.sub(
    r'read_torch_time_series_data\(network, "Overlap_xx_Ncube_x"\)',
    'read_torch_time_series_data(network, "Overlap_0.3_Ncube_2")',
    content
)

with open('models/static_gnn/static_graph_methods.py', 'w') as f:
    f.write(content)

# Run TDA-GIN
!cd /content/GraphPulse && python models/static_gnn/static_graph_methods.py

print("✓ TDA-GIN training completed")

## Step 4: Result Extraction

In [None]:
# Zip results
import os
import zipfile
from datetime import datetime

os.chdir('/content/GraphPulse')

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
zip_filename = f'GraphPulse_dgd_results_{timestamp}.zip'

dirs_to_zip = [
    'models/temporal_gnn/saved_models',
    'models/temporal_gnn/data/output',
    'models/rnn/RnnResults',
    'models/static_gnn/GnnResults',
    'GnnResults'
]

with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for dir_path in dirs_to_zip:
        if os.path.exists(dir_path):
            for root, dirs, files in os.walk(dir_path):
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, '/content/GraphPulse')
                    zipf.write(file_path, arcname)

print(f"✓ Zip file created: {zip_filename}")

In [None]:
# Download results
from google.colab import files

files.download(zip_filename)