In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
from scipy.stats import norm
import matplotlib.pyplot as plt
import json
import torch.optim as optim
import copy
from torch.nn.functional import binary_cross_entropy
from sklearn.metrics import precision_recall_fscore_support
from torch.optim import lr_scheduler
import networkx as nx

SEED = 16
#TRAIN_PATH = "data/q2_train.json"
#VAL_PATH = "data/q2_val.json"

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    

set_seed(SEED)

In [None]:

def load_data_from_json(json_path):
    """
    Loads a JSON describing a heterogeneous graph

    Returns:
      A : torch.FloatTensor, shape (N, N)
          Adjacency matrix for all nodes (0...N-1).
      X1 : torch.FloatTensor, shape (N1, d1)
      X2 : torch.FloatTensor, shape (N2, d2)
      y : torch.LongTensor, shape (N,)
          Labels for each node (0 or 1 for binary).
      mapping : dict
          mapping[i] = {
             "node_type": str ("type1" or "type2"),
             "feat_index": int index into the relevant feature array
          }
        This tells us how adjacency row i corresponds to a row in either type1_features or type2_features.
    """
    with open(json_path, "r") as f:
        data = json.load(f)

    nodes_info = data["nodes"]
    # Sort so adjacency row i matches global_index = i
    nodes_info.sort(key=lambda x: x["global_index"])

    N = len(nodes_info)
    A = torch.zeros((N, N), dtype=torch.float)

    # We'll store raw lists for type1 and type2, then convert to tensors
    type1_list = []
    type2_list = []
    # We'll also store the mapping from adjacency index -> (type, index_in_that_type)
    mapping = {}
    # We'll store labels in a list
    labels_list = []

    # Step 1: first pass to build adjacency
    #         (we'll also note how many type1 vs type2)
    for i, node in enumerate(nodes_info):
        for nbr_idx in node["connected_nodes"]:
            A[i, nbr_idx] = 1.0
            A[nbr_idx, i] = 1.0  # undirected

    # Step 2: second pass, fill feature arrays, mapping
    type1_count = 0
    type2_count = 0

    for i, node in enumerate(nodes_info):
        node_type = node["node_type"]
        feats = node["features"]
        label = node["label"] if node["label"] is not None else 0
        labels_list.append(label)

        if node_type == "type1":
            type1_list.append(feats)
            mapping[i] = {"node_type": "type1", "feat_index": type1_count}
            type1_count += 1
        else:
            type2_list.append(feats)
            mapping[i] = {"node_type": "type2", "feat_index": type2_count}
            type2_count += 1

    # Convert type1 and type2 lists into float tensors
    X1 = torch.tensor(type1_list, dtype=torch.float) if len(type1_list) else torch.empty((0,0))
    X2 = torch.tensor(type2_list, dtype=torch.float) if len(type2_list) else torch.empty((0,0))

    # Convert labels
    y = torch.tensor(labels_list, dtype=torch.long)  # shape (N,)

    return A, X1, X2, y, mapping
