In [1]:
import os
import sys
from importlib import reload
from pathlib import Path

import numpy as np
from torch.nn import ReLU, Linear
from torch_geometric.nn import GCNConv

In [2]:
# NOTE: Boilerplate setup for Jupyter imports

root = Path(os.getcwd()).parent
sys.path.append(
    (root / "src").as_posix()
)

import configs as config_module
import enhancer as enhancer_module
import strategies as encoders_module
import schema.task as edges_module
import schema.network as network_module
import schema.data as data_module
import utils.metrics as utils_module

reload(edges_module)
reload(config_module)
reload(enhancer_module)
reload(encoders_module)
reload(network_module)
reload(data_module)
reload(utils_module)

PathConfig, TrainConfig = config_module.PathConfig, config_module.TrainConfig
Enhancer = enhancer_module.Enhancer
get_default_encoders = encoders_module.get_default_encoders
NetworkConfig, EnhancerData = network_module.NetworkConfig, data_module.EnhancerData
Task = edges_module.Task
euclid_metric, haversine_metric = utils_module.euclid_dist, utils_module.haversine_dist
TrainConfig = config_module.TrainConfig

In [4]:
from torch import from_numpy
from torch_geometric.nn import SAGEConv
from utils.metrics import euclid_dist
from sklearn.metrics import mean_squared_error

path_config = PathConfig(data_root="../data")
path_config.target_data = path_config.data_root / "processed/np/melbourne.npz"
with open(path_config.target_data, "rb") as f:
    unpacked = np.load(f)

    # NOTE: Target dimensions
    data = EnhancerData(
        from_numpy(unpacked["data"]     .astype(np.float32)),
        from_numpy(unpacked["target"]   .astype(np.float32)),
        from_numpy(unpacked["spatial"]  .astype(np.float32)),
    )

In [5]:
train_config = TrainConfig()

gnn_setup = NetworkConfig(
    encoder=[
        SAGEConv(data.features.shape[1], 256),
        SAGEConv(256, 256),
    ],
    estimator=[
        Linear(256, 128),
        ReLU(),
        Linear(128, 128),
        ReLU(),
        Linear(128, 1),
    ]
)

In [6]:
knn_strategy = encoders_module.KNNStrategy(
    K=5,
    dist_metric=euclid_dist,
    cache_dir=path_config.edge_cache,
    cache_id="melbourne_knn",
)

threshold_strategy = encoders_module.ThresholdStrategy(
    dist_metric=haversine_metric,
    max_dist=5,
    cache_dir=path_config.edge_cache,
    cache_id="melbourne_threshold",
)

input_strategies = [
    Task(knn_strategy, data),
    Task(threshold_strategy, data),
]

In [8]:
result = Enhancer.process_tasks(gnn_setup, train_config, input_strategies)

ValueError: Signature for first module required

In [7]:
print(result.get_comparison([mean_squared_error]))

Option           mean_squared_error      density    average degree    n connected components    largest component
-------------  --------------------  -----------  ----------------  ------------------------  -------------------
melbourne_knn           1.12104e+12  0.000322345           6.21031                        25                17602


In [8]:
enhancer = Enhancer(
    gnn_setup, train_config, knn_strategy
)

In [None]:
gnn, _ = enhancer.fit(data)
transformed_data = enhancer.transform(data)

GNN training: 100%|██████████| 10/10 [00:11<00:00,  1.14s/epoch, loss=1.87e+11]


In [10]:
edge_index = knn_strategy(data.spatial)

In [11]:
gnn.predict(data.features, edge_index)

tensor([[ 960410.3750],
        [ 930960.6875],
        [1099514.3750],
        ...,
        [ 789871.6250],
        [ 759577.0000],
        [ 669541.6875]])

In [12]:
transformed_data.shape

(19267, 256)