In [2]:
from scipy.io import loadmat
texas = loadmat('data/texas.mat')

In [4]:
texas

{'__header__': b'MATLAB 5.0 MAT-file Platform: posix, Created on: Thu Feb  4 00:14:18 2021',
 '__version__': '1.0',
 '__globals__': [],
 'edge_index': array([[  0,   0,   0,   1,   1,   2,   2,   3,   4,   4,   4,   4,   5,
           6,   7,   7,   7,   8,   9,  10,  10,  11,  11,  11,  12,  13,
          13,  14,  15,  15,  15,  15,  15,  16,  17,  17,  18,  18,  19,
          20,  20,  20,  20,  20,  21,  22,  23,  23,  23,  24,  25,  25,
          25,  26,  27,  28,  28,  28,  29,  29,  29,  29,  30,  30,  30,
          31,  32,  33,  34,  34,  34,  35,  36,  36,  36,  37,  38,  39,
          39,  40,  40,  41,  41,  41,  41,  41,  42,  42,  42,  43,  44,
          44,  45,  45,  46,  46,  47,  47,  47,  48,  49,  50,  50,  50,
          50,  51,  51,  52,  53,  53,  54,  54,  55,  55,  56,  56,  56,
          56,  56,  56,  56,  56,  56,  56,  56,  56,  56,  56,  56,  56,
          56,  56,  56,  56,  56,  56,  56,  56,  56,  56,  56,  56,  56,
          56,  56,  56,  56,  56,  5

In [28]:
import os
import shutil
import scipy.io
import numpy as np

# 定义数据目录
data_dir = "data"

# 获取所有 .mat 文件
mat_files = [f for f in os.listdir(data_dir) if f.endswith(".mat")]

# 遍历每个 .mat 文件
for mat_file in mat_files:
    dataset_name = os.path.splitext(mat_file)[0]  # 去掉 .mat 后缀
    dataset_folder = os.path.join(data_dir, dataset_name)
    os.makedirs(dataset_folder, exist_ok=True)  # 创建对应的文件夹
    
    # 读取 .mat 文件
    mat_path = os.path.join(data_dir, mat_file)
    mat_data = scipy.io.loadmat(mat_path)
    
    # 提取需要的键值
    edge_index = mat_data.get("edge_index", None)
    feats = mat_data.get("node_feat", None)
    labels = mat_data.get("label", None)
    
    # 确保数据存在
    if edge_index is None or feats is None or labels is None:
        print(f"Warning: {mat_file} is missing required fields.")
        continue
    
    # 转换 labels 到 (n,)
    labels = np.squeeze(labels)
    
    # 保存为 .npz 文件
    npz_path = os.path.join(dataset_folder, f"{dataset_name}.npz")
    np.savez(npz_path, edge_index=edge_index, feats=feats, labels=labels)
    
    print(f"Converted {mat_file} to {npz_path}")
    print(f"Shapes - edge_index: {edge_index.shape}, feats: {feats.shape}, labels: {labels.shape}")
    

Converted cornell.mat to data/cornell/cornell.npz
Shapes - edge_index: (2, 478), feats: (183, 1703), labels: (183,)
Converted genius.mat to data/genius/genius.npz
Shapes - edge_index: (2, 984979), feats: (421961, 12), labels: (421961,)
Converted texas.mat to data/texas/texas.npz
Shapes - edge_index: (2, 492), feats: (183, 1703), labels: (183,)
Converted wisconsin.mat to data/wisconsin/wisconsin.npz
Shapes - edge_index: (2, 750), feats: (251, 1703), labels: (251,)
Converted film.mat to data/film/film.npz
Shapes - edge_index: (2, 37526), feats: (7600, 932), labels: (7600,)
Converted chameleon.mat to data/chameleon/chameleon.npz
Shapes - edge_index: (2, 38328), feats: (2277, 2325), labels: (2277,)
Converted squirrel.mat to data/squirrel/squirrel.npz
Shapes - edge_index: (2, 222134), feats: (5201, 2089), labels: (5201,)


In [32]:
data = np.load('data/genius/genius.npz')
edge_index, feat, labels=data['edge_index'], data['feats'], data['labels'] 
num_nodes = labels.shape[0]
graph = GraphConstruct(edge_index, num_nodes) 

In [34]:
def GraphConstruct(edge_index, n):
    graph = []
    for i in range(n):
        edge = []
        graph.append(edge)
    m = edge_index.shape[1]
    for i in range(m):
        u,v=edge_index[0][i], edge_index[1][i]
        graph[u].append(v)
    return graph

In [36]:
graph

[[],
 [17960,
  20144,
  37441,
  38449,
  65554,
  80135,
  107585,
  141455,
  143522,
  180059,
  191279,
  219129,
  233282,
  241216,
  256549,
  289142,
  293700,
  298471,
  327076,
  330202,
  362675,
  364541,
  409196],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [83620, 153001, 168885, 234845],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [17154, 70605, 84901, 151994, 192142, 346977, 370534],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [38998],
 [],
 [],
 [],
 [],
 [],
 [],
 [246845],
 [],
 [],
 [],
 [],
 [],
 [],
 [1264,
  2640,
  8105,
  16225,
  16696,
  17355,
  19440,
  22060,
  29136,
  31908,
  37755,
  38090,
  39520,
  59447,
  64271,
  66500,
  74430,
  76845,
  79950,
  86533,
  87670,
  89245,
  93518,
  93995,
  94676,
  102404,
  107144,
  108780,
  108800,
  111621,
  111713,
  113915,
  119103,
  119986,
  131223,
  136772,
  139338,
  140371,
  140481,
  141843,
  142450,
  145420,
  145928,
  147163,
  149990,
  161997,
  166488,
  168765,
  170100,
  187195,
  