In [1]:
import sys
sys.path.append("./src")

In [None]:
import compute_graph_vectorize.engines.torch as torch_engine
from compute_graph_vectorize.engines.torch.settings import TorchModuleSettings
from compute_graph_vectorize.sources.base import Network
from compute_graph_vectorize.sources.builders import from_neuralogic
from compute_graph_vectorize.sources.neuralogic_settings import NeuralogicSettings
from compute_graph_vectorize.vectorize.pipeline.pipeline import create_vectorized_network_compiler
from compute_graph_vectorize.vectorize.settings import VectorizeSettings, OptimizeSingleUseGathersSettings
from neuralogic.core import Aggregation, R, Template, Transformation, V
from neuralogic.dataset import TensorDataset
from neuralogic.dataset.tensor import Data
from torch_geometric.datasets import TUDataset

### Prepare Dataset and Template (Standard NeuraLogic Stuff)

NeuraLogic backend settings:

In [3]:
n_settings = NeuralogicSettings()

# MANDATORY FOR THE VECTORIZER TO WORK PROPERLY !
n_settings.iso_value_compression = False
n_settings.chain_pruning = False

In [4]:
def build_template(num_features: int, output_size: int, dim: int = 10):
    template = Template()

    # template += (R.atom_embed(V.X)[dim, num_features] <= R.node_feature(V.X)) | [Transformation.IDENTITY]
    # template += R.atom_embed / 1 | [Transformation.IDENTITY]

    template += (R.l1_embed(V.X) <= (R.node_feature(V.Y)[dim, num_features], R._edge(V.Y, V.X))) | [
        Aggregation.SUM,
        Transformation.IDENTITY,
    ]
    template += R.l1_embed / 1 | [Transformation.RELU]

    template += (R.l2_embed(V.X) <= (R.l1_embed(V.Y)[dim, dim], R._edge(V.Y, V.X))) | [
        Aggregation.SUM,
        Transformation.IDENTITY,
    ]
    template += R.l2_embed / 1 | [Transformation.IDENTITY]

    template += (R.predict[output_size, dim] <= R.l2_embed(V.X)) | [Aggregation.AVG, Transformation.IDENTITY]
    template += R.predict / 0 | [Transformation.SIGMOID]

    return template

In [5]:
pyg_dataset = TUDataset(root="./datasets", name="MUTAG")

num_node_features = pyg_dataset.num_node_features

dataset = TensorDataset(
    data=[Data.from_pyg(data)[0] for data in pyg_dataset],
    number_of_classes=num_node_features,
)

template = build_template(num_features=num_node_features, output_size=1, dim=10)

# build the dataset in neuralogic
built_dataset = template.build(n_settings).build_dataset(dataset)


### Build a Network representation

This is essentially an API that accesses the internal neural network of the NeuraLogic Java backend. Other implementations of this API can be provided to support backends other than NeuraLogic. An implementation for Python dictionaries also exists (in case you want to write out the whole neural network in Python).

Please see the Network API documentation in `compute_graph_vectorize/sources/base.py`.

The NeuraLogic implementation already performs basic vectorization (grouping into layers). It also ensures that the layers are topologically ordered, so that if layer2 has layer1 on input, then layer1 comes before layer2 in `network.layers`.

Build the Network container:

In [6]:
samples = built_dataset.samples[:] # you can filter out samples here

network: Network = from_neuralogic(samples, n_settings)
network

NetworkImpl((node_feature__f: FactLayer -> (length: 3371)), (l1_embed__wr: WeightedRuleLayer -> (length: 7442)), (l1_embed__ag: AggregationLayer -> (length: 3371)), (l1_embed__a: AtomLayer -> (length: 3371)), (l2_embed__wr: WeightedRuleLayer -> (length: 7442)), (l2_embed__ag: AggregationLayer -> (length: 3371)), (l2_embed__a: AtomLayer -> (length: 3371)), (predict__r: RuleLayer -> (length: 3371)), (predict__ag: AggregationLayer -> (length: 188)), (predict__wa: WeightedAtomLayer -> (length: 188)))

Below are some examples of how the Network API can be used:

In [7]:
network.layers

LayerDefinitionsImpl(node_feature__f, l1_embed__wr, l1_embed__ag, l1_embed__a, l2_embed__wr, l2_embed__ag, l2_embed__a, predict__r, predict__ag, predict__wa)

In [8]:
print(network['node_feature__f'])
print(network['node_feature__f'].inputs)
print(network['node_feature__f'].ordinals)
print(network['l1_embed__wr'])
print(network['l1_embed__wr'].inputs)
print(network['l1_embed__wr'].inputs.ordinals)
print(network['l1_embed__wr'].inputs.inputs)
print(network['l1_embed__wr'].inputs.inputs.ordinals)

_LayerNeurons(length: 3371)
_Neurons(length: 0)
_Ordinals((node_feature__f, 0), (node_feature__f, 1), (node_feature__f, 2), (node_feature__f, 3), (node_feature__f, 4), ... (length: 3371))
_LayerNeurons(length: 7442)
_Neurons(length: 7442)
_Ordinals((node_feature__f, 0), (node_feature__f, 1), (node_feature__f, 0), (node_feature__f, 2), (node_feature__f, 3), ... (length: 7442))
_Neurons(length: 0)
_Ordinals( (length: 0))


### Compile The Network Into Vectorized/Optimized Equivalent

We will not be using the Network API directly; we will be using the Compiler.

Vectorizer/compiler settings:

In [9]:
# You may try some of the different presets for the optimize_single_use_gathers optimization:
# (Other optimizations can be left default)
v_settings = VectorizeSettings(
    #optimize_single_use_gathers=OptimizeSingleUseGathersSettings.preset("agg_true_unlimited")
)

In [10]:
# WARNING: Very verbose when True! Prints the whole network along every optimization step.
debug_prints = False

Build the compiler:

In [11]:
compiler = create_vectorized_network_compiler(
    v_settings,
    forward_pass_runner=torch_engine.torch_simple_forward_pass_runner,  # needed for ISO compression
    debug_prints=debug_prints,
)

Compile the network:

In [12]:
vectorized_network = compiler(network)
vectorized_network

VectorizedOpSeqNetwork(
  (fact_layers): {
    (f_node_feature__f__0): FactLayer([1, 7, 1], [1, 7, 1], [1, 7, 1], ... (size: 7), count=7, shape=[7, 1])
  }
  (weights): {
    (w_000): LearnableWeight(value=array((1, 10, 7)))
    (w_001): LearnableWeight(value=array((1, 10, 10)))
    (w_002): LearnableWeight(value=array((1, 1, 10)))
  }
  (batches): {
    (0): OpSeqBatch(
      (layers): {
        (l_predict__wa): OperationSeq(
          expected_count=188
          (layer_refs): LayerRefs(<f|f_node_feature__f__0>)
          (0): Linear(
            (weight_ops): OperationSeq(
              expected_count=None
              (layer_refs): LayerRefs(<w|w_000>)
            )
          )
          (1): GenericGather(ordinals=[0, 0, 0, ... (size: 75)])
          (2): UnevenReduce(counts=[2, 3, 3, ... (size: 28)], total=75, reduce='sum')
          (3): Transform(transform='relu')
          (4): Linear(
            (weight_ops): OperationSeq(
              expected_count=None
              (la

We may look into some internals:

In [13]:
print(vectorized_network.fact_layers['f_node_feature__f__0'].facts[0].value.flatten())
print(vectorized_network.fact_layers['f_node_feature__f__0'].facts[1].value.flatten())
print(vectorized_network.fact_layers['f_node_feature__f__0'].facts[2].value.flatten())
print(vectorized_network.fact_layers['f_node_feature__f__0'].facts[3].value.flatten())
print(vectorized_network.fact_layers['f_node_feature__f__0'].facts[4].value.flatten())
print(vectorized_network.fact_layers['f_node_feature__f__0'].facts[5].value.flatten())
print(vectorized_network.fact_layers['f_node_feature__f__0'].facts[6].value.flatten())

[1. 0. 0. 0. 0. 0. 0.]
[0. 1. 0. 0. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0.]
[0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 0. 0. 0. 1.]


In [14]:
print(vectorized_network.batches[0].layers['l_predict__wa'][1].ordinals)

[0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 0, 2, 2, 2, 0, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0, 2, 0, 1, 1, 0, 0, 4, 0, 0, 2, 5, 0, 0, 0, 0, 3, 3, 3, 0, 3, 1, 0, 2, 2, 0, 0, 6, 0, 0, 5, 0, 2, 1, 2, 1, 0, 2, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 6, 0, 0]


#### Optional Exercise

Go back and try the different presets of the `optimize_single_use_gathers` optimization.
You will see how it affects the different gather/reduce operations in the network above.

You can also try changing its individual configuration options manually, as well as changing other configuration options as well, to see how it affects the resulting vectorized network.

For example, you may see what happens when you disable some optimizations.

### Train in PyTorch (on any PyTorch-supported hardware)

PyTorch engine settings:

In [15]:
t_settings = TorchModuleSettings()

In [16]:
# Enable debug for easier understanding of exceptions during the forward pass
debug = False

In [17]:
# If true: each forward pass returns only the final output tensor.
# If false: each forward pass returns a dict of layer name -> tensor.
final_layer_only = True

Build the PyTorch model:

In [18]:
torch_model = torch_engine.build_torch_model(
        vectorized_network,
        t_settings,
        debug=debug,
        final_layer_only=final_layer_only
)
torch_model

NetworkModule(
  (params_module): NetworkParams(
    (params): ParameterDict(
        (f_node_feature__f__0): Parameter containing: [torch.FloatTensor of size 7x7x1]
        (w_000): Parameter containing: [torch.FloatTensor of size 1x10x7]
        (w_001): Parameter containing: [torch.FloatTensor of size 1x10x10]
        (w_002): Parameter containing: [torch.FloatTensor of size 1x1x10]
    )
  )
  (batch_modules): ModuleList(
    (0): Sequential(
      (0): LayerModule(
        out_key: l_predict__wa,
        expected_count: 188,
        (the_modules): ModuleList(
          (0): RetrieveRefModule(f_node_feature__f__0)
          (1): LinearModule(
            (retrieve_weights): Sequential(
              (0): RetrieveRefModule(w_000)
            )
          )
          (2): GenericGatherModule([0, 0, 0, ... (size: 75)])
          (3): SegmentCSR(reduce=sum, count=28)
          (4): ReLU()
          (5): LinearModule(
            (retrieve_weights): Sequential(
              (0): Retriev

Run a forward pass on it:

In [19]:
output = torch_model(batch=0) # Note: Currently batching is not supported, but the data model is ready for it.
print(output.shape)

torch.Size([188, 1, 1])


You can now run your typical PyTorch training pipeline on top of `torch_model`.

If you are having problems on your hardware, try setting `t_settings.reduce_method = "scatter"` (before building the torch model).
If you are still having problems on your hardware, it may be because gather/scatter operations are not supported on it in PyTorch.

### Extract Weights Back

In [20]:
weights = torch_engine.extract_weights_from_torch_model(torch_model)
print({k: v.shape for k, v in weights.items()})

{'000': torch.Size([1, 10, 7]), '001': torch.Size([1, 10, 10]), '002': torch.Size([1, 1, 10])}
