# Relational Concept Bottleneck Models

First of all we need to create a dataset. For this we will use a standard dataset used for graph neural networks, the Cora dataset. The Cora dataset consists of 2708 scientific publications classified into one of seven classes. The citation network consists of 5429 links. Each publication in the dataset is described by a 0/1-valued word vector indicating the absence/presence of the corresponding word from the dictionary. The dictionary consists of 1433 unique words.

We first pre-train a graph neural network model on the Cora dataset. We will use the PyTorch Geometric library to load the dataset and create the model. The model is a Graph Convolutional Network (GCN) trained using the standard cross-entropy loss.

In [22]:
import sys
sys.path.append('..')

In [23]:
import torch
from datasets.gnn_benchmarks import gnn_benchmark_dataset
_, (train_data, queries_train), _, (test_data, queries_test), num_classes, manifold_ids = gnn_benchmark_dataset('CORA', perc_train=1.0, pretrain_seed = 0)
train_dl = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, pin_memory=True)

We inspect the data:

In [24]:
# these are input features
print(train_data.tensors[0].shape)
train_data.tensors[0]

torch.Size([1, 2708, 16])


tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 2.6401],
         [3.2565, 0.0000, 0.0000,  ..., 0.9476, 3.2784, 2.2935],
         [2.0837, 0.0000, 0.0000,  ..., 0.4678, 2.2164, 2.2745],
         ...,
         [1.7258, 0.0000, 2.4617,  ..., 0.7127, 1.5019, 0.6909],
         [0.0000, 0.2022, 0.0000,  ..., 0.0000, 0.0000, 2.0309],
         [0.0000, 0.2505, 0.0000,  ..., 0.0309, 0.0000, 1.7998]]],
       grad_fn=<UnsqueezeBackward0>)

In [25]:
# these are concept labels to predict
print(train_data.tensors[1].shape)
train_data.tensors[1][0, :10]

torch.Size([1, 980])


tensor([0, 0, 0, 1, 0, 0, 0, 0, 0, 0])

In [26]:
# these are class labels to predict (same as concept labels!)
print(train_data.tensors[2].shape)
train_data.tensors[1][0, :10]

torch.Size([1, 980])


tensor([0, 0, 0, 1, 0, 0, 0, 0, 0, 0])

In [27]:
# these are connections in the citation graph (e.g., "paper 0 cites paper 633")
manifold_ids

array([['0', '633'],
       ['0', '1862'],
       ['0', '2582'],
       ...,
       ['2707', '598'],
       ['2707', '1473'],
       ['2707', '2706']], dtype='<U21')

Create a domain of documents:

In [92]:
from rcbm.logic.commons import Domain
n_samples = train_data.tensors[0].shape[1]
n_features = train_data.tensors[0].shape[2]
documents = Domain("documents", [f'{i}' for i in torch.arange(n_features).tolist()])
print(f'Number of constants: {len(documents.constants)}')
documents.constants[:10]

Number of constants: 16


['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [93]:
from rcbm.logic.commons import Rule
body = ['class%d(X)' % c for c in range(num_classes)]
head = ['class%d(Y)' % c for c in range(num_classes)]
rule = Rule("phi", body=body+head, head=head, var2domain={"X": "documents", "Y": "documents"})
rule.body

[('class0', 'X'),
 ('class1', 'X'),
 ('class2', 'X'),
 ('class3', 'X'),
 ('class4', 'X'),
 ('class5', 'X'),
 ('class6', 'X'),
 ('class0', 'Y'),
 ('class1', 'Y'),
 ('class2', 'Y'),
 ('class3', 'Y'),
 ('class4', 'Y'),
 ('class5', 'Y'),
 ('class6', 'Y')]

In [94]:
rule.head

[('class0', 'Y'),
 ('class1', 'Y'),
 ('class2', 'Y'),
 ('class3', 'Y'),
 ('class4', 'Y'),
 ('class5', 'Y'),
 ('class6', 'Y')]

In [95]:
rule2 = Rule("mutex", body=body, head=[], var2domain={"X": "documents"})
rule2

mutex: ('class0', 'X'),('class1', 'X'),('class2', 'X'),('class3', 'X'),('class4', 'X'),('class5', 'X'),('class6', 'X') -> 

In [96]:
from rcbm.logic.grounding import DomainGrounder
grounder = DomainGrounder({"documents": documents.constants}, [rule, rule2], manifolds_per_rule={"phi": manifold_ids})
groundings = grounder.ground()
groundings['phi'][0][0]

((('class0', '65'),
  ('class1', '65'),
  ('class2', '65'),
  ('class3', '65'),
  ('class4', '65'),
  ('class5', '65'),
  ('class6', '65')),
 (('class0', '239'),
  ('class1', '239'),
  ('class2', '239'),
  ('class3', '239'),
  ('class4', '239'),
  ('class5', '239'),
  ('class6', '239'),
  ('class0', '65'),
  ('class1', '65'),
  ('class2', '65'),
  ('class3', '65'),
  ('class4', '65'),
  ('class5', '65'),
  ('class6', '65')))

In [97]:
groundings['mutex'][0][0]

((),
 (('class0', '0'),
  ('class1', '0'),
  ('class2', '0'),
  ('class3', '0'),
  ('class4', '0'),
  ('class5', '0'),
  ('class6', '0')))

In [98]:
from rcbm.logic.indexing import DictBasedIndexer
from rcbm.logic.semantics import ProductTNorm
logic = ProductTNorm()
indexer = DictBasedIndexer(groundings, {"tasks": queries_train, "concepts": queries_train}, logic=logic)

In [120]:
emb_size = 16
n_concepts = len(rule.body)
n_classes = len(rule.head)

encoder = torch.nn.Sequential(
    torch.nn.Linear(n_features, emb_size),
    torch.nn.LeakyReLU(),
)
relation_classifiers = {}
for relation_name, relation_arity in indexer.relations_arity.items():
    relation_classifiers[relation_name] = torch.nn.Sequential(
        torch.nn.Linear(emb_size, emb_size),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(emb_size, 1),
        torch.nn.Sigmoid()
    )
reasoner = torch.nn.Sequential(
    torch.nn.Linear(n_concepts, emb_size),
    torch.nn.LeakyReLU(),
    torch.nn.Linear(emb_size, n_classes),
    torch.nn.Sigmoid()
)
model = torch.nn.Sequential(encoder, *relation_classifiers.values(), reasoner)
model

Sequential(
  (0): Sequential(
    (0): Linear(in_features=16, out_features=16, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (1): Sequential(
    (0): Linear(in_features=16, out_features=16, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=16, out_features=1, bias=True)
    (3): Sigmoid()
  )
  (2): Sequential(
    (0): Linear(in_features=16, out_features=16, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=16, out_features=1, bias=True)
    (3): Sigmoid()
  )
  (3): Sequential(
    (0): Linear(in_features=16, out_features=16, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=16, out_features=1, bias=True)
    (3): Sigmoid()
  )
  (4): Sequential(
    (0): Linear(in_features=16, out_features=16, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=16, out_features=1, bias=True)
    (3): Sigmoid()
  )
  (5): Sequential(
    (0): Linear(in_features=16, out_featur

## Training

In [121]:
X = train_data.tensors[0].squeeze(0)
embeddings = encoder(X)
embeddings.shape

torch.Size([2708, 16])

In [125]:
# relation/concept predictions
concept_predictions = indexer.predict_relations(encoders=relation_classifiers, embeddings=embeddings)
concept_predictions.shape, concept_predictions

(torch.Size([18956, 1]),
 tensor([[0.5043],
         [0.4720],
         [0.4785],
         ...,
         [0.4782],
         [0.4772],
         [0.4793]], grad_fn=<CatBackward0>))

In [130]:
c_preds = indexer.gather_and_concatenate(concept_predictions, indexer.indexed_queries["concepts"], 0)
c_preds.shape, c_preds[:10]

(torch.Size([980, 1]),
 tensor([[0.5043],
         [0.4648],
         [0.5644],
         [0.4980],
         [0.5405],
         [0.5061],
         [0.4710],
         [0.4720],
         [0.4271],
         [0.5483]], grad_fn=<SliceBackward0>))

In [132]:
y_preds = indexer.gather_and_concatenate(concept_predictions, indexer.indexed_queries["tasks"], 0)
y_preds.shape, y_preds[:10]

(torch.Size([980, 1]),
 tensor([[0.5043],
         [0.4648],
         [0.5644],
         [0.4980],
         [0.5405],
         [0.5061],
         [0.4710],
         [0.4720],
         [0.4271],
         [0.5483]], grad_fn=<SliceBackward0>))