# Download Dataset

In [None]:
import urllib.request

url = "https://www.dropbox.com/scl/fo/5w0a14icfv4o7t0azrqda/AHECd04T6OAUwcvxwiZXw-4/data/tmall?rlkey=qhx7csgahlcbuppjx4ewa3l0o&subfolder_nav_tracking=1&st=kqkod5je&dl=1"
output_path = "tmall.zip"

print("Downloading...")
urllib.request.urlretrieve(url, output_path)
print("Download complete!")

In [None]:
import zipfile as zip
import os

with zip.ZipFile(output_path, 'r') as zip_ref:
    zip_ref.extractall("tmall_data")
print("Extraction complete!")

# Preprocessing

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torch_geometric.utils import dropout_edge
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedShuffleSplit
import random

# ============================================
# Set Seeds for Reproducibility
# ============================================

def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)


# ============================================
# Model Components (from previous artifact)
# ============================================

class LIFNeuron(nn.Module):
    def __init__(self, decay=0.95, threshold=1.0):
        super().__init__()
        self.decay = decay
        self.threshold = threshold
        
    def forward(self, x, membrane=None):
        if membrane is None:
            membrane = torch.zeros_like(x)
        
        membrane = self.decay * membrane + x
        spike = torch.sigmoid(5 * (membrane - self.threshold))
        
        with torch.no_grad():
            reset_mask = (spike > 0.5).float()
            membrane = membrane - reset_mask * self.threshold
            
        return spike, membrane


class TemporalAttention(nn.Module):
    def __init__(self, time_steps):
        super().__init__()
        self.attention_weights = nn.Parameter(torch.randn(time_steps))
        
    def forward(self, spikes_list):
        # Memory efficient implementation:
        # Avoids creating a large stacked tensor (T, N, C)
        # and avoids creating a large weighted tensor (T, N, C)
        alpha = F.softmax(self.attention_weights, dim=0)
        output = 0
        for t, spike in enumerate(spikes_list):
            output = output + alpha[t] * spike
        return output


class SpikingGCN(nn.Module):
    def __init__(self, in_channels, out_channels, time_steps):
        super().__init__()
        self.conv = GCNConv(in_channels, out_channels)
        self.lif = LIFNeuron(decay=0.95, threshold=1.0)
        self.time_steps = time_steps
        self.temp_attn = TemporalAttention(time_steps)
        
    def forward(self, x, edge_index):
        x = self.conv(x, edge_index)
        
        spikes = []
        membrane = None
        
        for t in range(self.time_steps):
            spike, membrane = self.lif(x, membrane)
            spikes.append(spike)
        
        # Pass list of spikes to attention instead of stacking
        output = self.temp_attn(spikes)
        
        return output


class ImprovedContrastiveLoss(nn.Module):
    def __init__(self, temperature=0.5):
        super().__init__()
        self.temperature = temperature
        
    def forward(self, z1, z2, batch_size=None):
        z1 = F.normalize(z1, dim=1)
        z2 = F.normalize(z2, dim=1)
        
        N = z1.size(0)
        
        if batch_size is not None and batch_size < N:
            indices = torch.randperm(N)[:batch_size]
            z1 = z1[indices]
            z2 = z2[indices]
            N = batch_size
        
        representations = torch.cat([z1, z2], dim=0)
        similarity_matrix = torch.mm(representations, representations.T)
        similarity_matrix = similarity_matrix / self.temperature
        
        mask = torch.eye(2 * N, dtype=torch.bool, device=z1.device)
        similarity_matrix.masked_fill_(mask, -1e9)
        
        labels = torch.cat([
            torch.arange(N, 2*N, device=z1.device),
            torch.arange(N, device=z1.device)
        ])
        
        loss = F.cross_entropy(similarity_matrix, labels)
        
        return loss

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class DyC_DGNN(nn.Module):
    def __init__(self, num_nodes, in_channels, hidden_channels, out_channels, time_steps=10):
        super().__init__()
        
        # Learnable node embeddings - CRITICAL for datasets with random/no features
        self.node_emb = nn.Embedding(num_nodes, in_channels)
        nn.init.xavier_uniform_(self.node_emb.weight)
        
        self.spike_gcn1 = SpikingGCN(in_channels, hidden_channels, time_steps)
        self.spike_gcn2 = SpikingGCN(hidden_channels, hidden_channels, time_steps)
        
        self.projector = nn.Sequential(
            nn.Linear(hidden_channels, hidden_channels),
            nn.ReLU(),
            nn.Linear(hidden_channels, hidden_channels // 2)
        )
        
        self.classifier = nn.Linear(hidden_channels, out_channels)
        self.contrastive_loss_fn = ImprovedContrastiveLoss(temperature=0.5)
        self.alpha = nn.Parameter(torch.tensor(0.5))
        
    def encode(self, x, edge_index):
        x = F.relu(self.spike_gcn1(x, edge_index))
        x = F.dropout(x, p=0.3, training=self.training)
        embeddings = self.spike_gcn2(x, edge_index)
        return embeddings
    
    def forward(self, x, edge_index):
        # Note: x should be passed from outside (usually self.node_emb.weight)
        embeddings = self.encode(x, edge_index)
        logits = self.classifier(embeddings)
        projections = self.projector(embeddings)
        return logits, projections, embeddings
    
    def augment_graph(self, x, edge_index, drop_edge_p=0.3, drop_feat_p=0.2):
        edge_index_aug, _ = dropout_edge(edge_index, p=drop_edge_p, 
                                         force_undirected=True)
        
        x_aug = x.clone()
        mask = torch.rand(x.size(), device=x.device) > drop_feat_p
        x_aug = x_aug * mask.float()
        
        return x_aug, edge_index_aug
    
    def contrastive_loss(self, x, edge_index, batch_size=512):
        x_aug1, edge_index_aug1 = self.augment_graph(x, edge_index)
        x_aug2, edge_index_aug2 = self.augment_graph(x, edge_index)
        
        _, proj1, _ = self.forward(x_aug1, edge_index_aug1)
        _, proj2, _ = self.forward(x_aug2, edge_index_aug2)
        
        if x.size(0) > batch_size:
            indices = torch.randperm(x.size(0))[:batch_size]
            proj1 = proj1[indices]
            proj2 = proj2[indices]
        
        loss = self.contrastive_loss_fn(proj1, proj2)
        
        return loss
    
    def full_loss(self, logits, y, x, edge_index, mask, use_contrastive=True, class_weight=None):
        # Apply class weights to handle imbalance
        cls_loss = F.cross_entropy(logits[mask], y[mask], weight=class_weight)
        
        if use_contrastive and self.training:
            cont_loss = self.contrastive_loss(x, edge_index, batch_size=512)
            alpha = torch.sigmoid(self.alpha)
            total_loss = alpha * cls_loss + (1 - alpha) * cont_loss
            return total_loss, cls_loss.item(), cont_loss.item()
        else:
            return cls_loss, cls_loss.item(), 0.0


# ============================================
# FIXED: Data Loading for Tmall
# ============================================

def load_tmall_data(edges_file, nodes_file):
    """
    Fixed data loading with proper handling of unlabeled nodes
    """
    current_idx = 0
    node_id_to_idx = {}
    labels = []
    
    # Load labeled nodes
    with open(nodes_file, "r") as f:
        for line in f:
            node_id, label = map(int, line.strip().split())
            if node_id not in node_id_to_idx:
                node_id_to_idx[node_id] = current_idx
                labels.append(label)
                current_idx += 1
    
    # Load edges and add unlabeled nodes
    edges = []
    with open(edges_file, "r") as f:
        for line in f:
            parts = line.strip().split()
            src, dst = map(int, parts[:2])
            
            for node in [src, dst]:
                if node not in node_id_to_idx:
                    node_id_to_idx[node] = current_idx
                    labels.append(-1)  # Unlabeled
                    current_idx += 1
            
            edges.append([node_id_to_idx[src], node_id_to_idx[dst]])
    
    # Create tensors
    edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
    # We still create x but it will be ignored by the model in favor of embeddings
    x = torch.randn(len(node_id_to_idx), 16)  
    y = torch.tensor(labels, dtype=torch.long)
    
    data = Data(x=x, edge_index=edge_index, y=y)
    
    # Validation
    assert data.edge_index.max() < data.num_nodes, "Invalid edge indices!"
    
    print(f"Loaded Tmall dataset:")
    print(f"  Nodes: {data.num_nodes}")
    print(f"  Edges: {data.num_edges}")
    print(f"  Labeled nodes: {(y != -1).sum().item()}")
    print(f"  Features: {data.x.shape}")
    
    return data

In [3]:
def create_stratified_split(data, train_ratio, run_seed):
    """
    Fixed stratified split that handles unlabeled nodes
    """
    # Get labeled nodes only
    labeled_mask = data.y != -1
    labeled_indices = torch.where(labeled_mask)[0].numpy()
    labeled_y = data.y[labeled_mask].numpy()
    
    # Stratified split
    sss = StratifiedShuffleSplit(n_splits=1, test_size=1-train_ratio, 
                                 random_state=run_seed)
    train_idx_labeled, temp_idx_labeled = next(sss.split(labeled_indices, labeled_y))
    
    # Second split for val/test
    temp_indices = labeled_indices[temp_idx_labeled]
    temp_y = labeled_y[temp_idx_labeled]
    
    sss_temp = StratifiedShuffleSplit(n_splits=1, test_size=0.5, 
                                      random_state=run_seed)
    val_idx_temp, test_idx_temp = next(sss_temp.split(temp_indices, temp_y))
    
    # Map back to original indices
    train_idx = labeled_indices[train_idx_labeled]
    val_idx = temp_indices[val_idx_temp]
    test_idx = temp_indices[test_idx_temp]
    
    # Create masks
    train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    val_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    
    train_mask[train_idx] = True
    val_mask[val_idx] = True
    test_mask[test_idx] = True
    
    print(f"Split created:")
    print(f"  Train: {train_mask.sum().item()}")
    print(f"  Val: {val_mask.sum().item()}")
    print(f"  Test: {test_mask.sum().item()}")
    
    return train_mask, val_mask, test_mask


def train_model(data, train_mask, val_mask, test_mask, 
                hidden_channels=64, time_steps=5, epochs=100, lr=0.001):
    """
    Fixed training procedure with Class Weights and Learnable Embeddings
    """
    # Initialize model
    num_classes = int(data.y[data.y != -1].max()) + 1
    
    # Calculate class weights
    train_y = data.y[train_mask]
    class_counts = torch.bincount(train_y)
    # Handle potential missing classes in small splits
    if len(class_counts) < num_classes:
        class_counts = torch.cat([class_counts, torch.zeros(num_classes - len(class_counts), dtype=torch.long)])
    
    # Inverse frequency weights
    class_weights = 1.0 / (class_counts.float() + 1e-5)
    class_weights = class_weights / class_weights.sum() * num_classes  # Normalize
    
    print(f"Class weights: {class_weights}")
    
    # Use embedding size of 32
    embedding_dim = 32
    
    model = DyC_DGNN(
        num_nodes=data.num_nodes,
        in_channels=embedding_dim,
        hidden_channels=hidden_channels,
        out_channels=num_classes,
        time_steps=time_steps
    )
    
    # FORCE CPU usage to avoid CUDA version mismatch
    device = torch.device('cpu')
    print(f"Using device: {device}")
    
    model = model.to(device)
    data = data.to(device)
    class_weights = class_weights.to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4)
    
    best_val_f1 = 0
    best_model_state = None
    patience = 20
    patience_counter = 0
    
    for epoch in range(epochs):
        # Training
        model.train()
        optimizer.zero_grad()
        
        # Use learnable embeddings
        x_input = model.node_emb.weight
        
        logits, proj, _ = model(x_input, data.edge_index)
        
        total_loss, cls_loss, cont_loss = model.full_loss(
            logits, data.y, x_input, data.edge_index, 
            train_mask, use_contrastive=True, class_weight=class_weights
        )
        
        total_loss.backward()
        optimizer.step()
        
        # Validation every 5 epochs
        if epoch % 5 == 0:
            model.eval()
            with torch.no_grad():
                x_input = model.node_emb.weight
                logits, _, _ = model(x_input, data.edge_index)
                pred = logits.argmax(dim=1)
                
                val_f1 = f1_score(
                    data.y[val_mask].cpu(),
                    pred[val_mask].cpu(),
                    average='macro',
                    zero_division=0
                )
                
                test_f1 = f1_score(
                    data.y[test_mask].cpu(),
                    pred[test_mask].cpu(),
                    average='macro',
                    zero_division=0
                )
                
                print(f"Epoch {epoch:3d} | "
                      f"Loss: {total_loss:.4f} "
                      f"(Cls: {cls_loss:.4f}, Cont: {cont_loss:.4f}) | "
                      f"Val F1: {val_f1:.4f} | Test F1: {test_f1:.4f}")
                
                if val_f1 > best_val_f1:
                    best_val_f1 = val_f1
                    best_model_state = {k: v.cpu().clone() 
                                       for k, v in model.state_dict().items()}
                    patience_counter = 0
                else:
                    patience_counter += 1
                    
                if patience_counter >= patience:
                    print(f"Early stopping at epoch {epoch}")
                    break
    
    # Load best model and evaluate
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    model.eval()
    with torch.no_grad():
        x_input = model.node_emb.weight
        logits, _, _ = model(x_input, data.edge_index)
        pred = logits.argmax(dim=1)
        
        test_f1_macro = f1_score(
            data.y[test_mask].cpu(),
            pred[test_mask].cpu(),
            average='macro',
            zero_division=0
        )
        
        test_f1_micro = f1_score(
            data.y[test_mask].cpu(),
            pred[test_mask].cpu(),
            average='micro',
            zero_division=0
        )
    
    return test_f1_macro, test_f1_micro

In [4]:
def run_experiments(data, training_ratios=[0.4, 0.6, 0.8], num_runs=5):
    """
    Run complete experiments with multiple runs and ratios
    """
    results_macro = {ratio: [] for ratio in training_ratios}
    results_micro = {ratio: [] for ratio in training_ratios}
    
    for ratio in training_ratios:
        print(f"\n{'='*60}")
        print(f"Training Ratio: {ratio*100}%")
        print(f"{'='*60}")
        
        for run in range(num_runs):
            print(f"\n--- Run {run+1}/{num_runs} ---")
            set_seed(42 + run)
            
            # Create split
            train_mask, val_mask, test_mask = create_stratified_split(
                data, ratio, run_seed=42+run
            )
            
            # Train model
            # Reduced hidden_channels and time_steps to prevent OOM
            test_f1_macro, test_f1_micro = train_model(
                data, train_mask, val_mask, test_mask,
                hidden_channels=32,  # Reduced from 64
                time_steps=3,        # Reduced from 5
                epochs=40,
                lr=0.001
            )
            
            results_macro[ratio].append(test_f1_macro)
            results_micro[ratio].append(test_f1_micro)
            
            print(f"Run {run+1} Results:")
            print(f"  Macro-F1: {test_f1_macro:.4f}")
            print(f"  Micro-F1: {test_f1_micro:.4f}")
    
    # Print final results
    print(f"\n{'='*60}")
    print("FINAL RESULTS")
    print(f"{'='*60}")
    
    print("\nMacro-F1 (Mean ± Std):")
    for ratio in training_ratios:
        scores = results_macro[ratio]
        mean = np.mean(scores) * 100
        std = np.std(scores) * 100
        print(f"  {ratio*100:.0f}% Training: {mean:.2f} ± {std:.2f}")
    
    print("\nMicro-F1 (Mean ± Std):")
    for ratio in training_ratios:
        scores = results_micro[ratio]
        mean = np.mean(scores) * 100
        std = np.std(scores) * 100
        print(f"  {ratio*100:.0f}% Training: {mean:.2f} ± {std:.2f}")

In [None]:
if __name__ == "__main__":
    # Load data
    data = load_tmall_data(
        edges_file="tmall_data/tmall.txt",
        nodes_file="tmall_data/node2label.txt"
    )
    
    # Run experiments
    run_experiments(data, training_ratios=[0.4, 0.6, 0.8], num_runs=5)