In [1]:
import os
import sys
from importlib import reload
from pathlib import Path

import numpy as np
from torch.nn import ReLU, Linear
from torch_geometric.nn import GCNConv

In [None]:
# NOTE: Boilerplate setup for Jupyter imports

root = Path(os.getcwd()).parent
sys.path.append(
    (root / "src").as_posix()
)

import configs as config_module
import enhancer as enhancer_module
import strategies as encoders_module
import schema.edges as edges_module
import schema.network as network_module
import schema.data as data_module
import utils.metrics as utils_module

reload(edges_module)
reload(config_module)
reload(enhancer_module)
reload(encoders_module)
reload(network_module)
reload(data_module)
reload(utils_module)

PathConfig, TrainConfig = config_module.PathConfig, config_module.TrainConfig
Enhancer = enhancer_module.Enhancer
get_default_encoders = encoders_module.get_default_encoders
GNNConfig, EnhancerData = network_module.NetworkConfig, data_module.EnhancerData
GraphSetup = edges_module.GraphSetup
euclid_metric = utils_module.euclid_dist

In [3]:
path_config = PathConfig(
    data_root=(root / "data").as_posix()
)

with open(path_config.data_root / "processed/np/cora.npz", "rb") as f:
    unpacked = np.load(f)

    # NOTE: Target dimensions
    data = EnhancerData(
        unpacked["data"],
        unpacked["target"].reshape(-1),
        unpacked["spatial"],
    )

In [4]:
gnn_setup = GNNConfig(
    encoder=[
        GCNConv(data.features.shape[1], 256),
        GCNConv(256, 256),
    ],
    estimator=[
        Linear(256, 128),
        ReLU(),
        Linear(128, 128),
        ReLU(),
        Linear(128, len(np.unique(data.target))),
    ]
)

In [None]:
strategies = [
    # encoders_module.ThresholdStrategy(
    #     max_dist=5,
    #     subsample_rate=0.5,
    #     cache_dir=path_config.edge_cache,
    #     cache_id="cora_dist",
    # ),
    # encoders_module.AnchorStrategy(
    #     cluster_sample_rate=0.7,
    #     cache_dir=path_config.edge_cache,
    #     cache_id="cora_repr",
    # ),
    # encoders_module.GridStrategy(
    #     intra_edge_ratio=0.3,
    #     source_inter_ratio=0.01,
    #     k_connectivity=3,
    #     bins=4,
    #     cache_dir=path_config.edge_cache,
    #     cache_id="cora_grid",
    # ),
    encoders_module.KNNStrategy(
        K=5,
        dist_metric=euclid_metric,
        cache_dir=path_config.edge_cache,
        cache_id="cora_knn",
    ),
]

strategies = [
    GraphSetup(e, data)
    for e in strategies
]

result = Enhancer.compare_strategies(gnn_setup, strategies)

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)

In [None]:
print(result)

Option       accuracy_score    f1_score
---------  ----------------  ----------
cora_dist          0.372694    0.155546
cora_repr          0.416974    0.319438


In [None]:
enhancer = Enhancer(
    net_config=gnn_setup,
    strategy=encoders_module.ThresholdStrategy(
        max_dist=5,
        cache_dir=path_config.edge_cache,
        cache_id="cora_dist [max dist = 5]",
    ),
)

In [None]:
gnn, _ = enhancer.fit(data)

In [None]:
enhancer.transform(data).shape

(2708, 256)