In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from tqdm import tqdm
from collections import Counter
import kgbench as kg
import fire, sys
import math

from kgbench import load, tic, toc, d


import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors


#
from torch_geometric.utils import to_networkx
import networkx as nx

  from .autonotebook import tqdm as notebook_tqdm


### EDGE INDEXES instead of adjacency matrix
- In particular get edge index where indexed are relation-specific
- In order to capture the different relations: for each different relation - edge position + id relation*num_relation


In [2]:
data = kg.load('aifb', torch=True) 
print(f'Number of entities: {data.num_entities}') #data.i2e
print(f'Number of classes: {data.num_classes}')
print(f'Types of relations: {data.num_relations}') #data.i2r
data.triples.shape

loaded data aifb (0.1717s).
Number of entities: 8285
Number of classes: 4
Types of relations: 45


torch.Size([29043, 3])

In [3]:

data.y = np.concatenate((np.array(data.training), np.array(data.withheld)), axis=0)
data.y.shape
print(dir(data))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_datatypes', '_dt_g2l', '_dt_l2g', 'datatype_g2l', 'datatype_l2g', 'datatypes', 'dgl', 'e2i', 'final', 'get_images', 'get_images_batch', 'get_strings', 'get_strings_batch', 'i2e', 'i2r', 'name', 'num_classes', 'num_entities', 'num_relations', 'pyg', 'r2i', 'torch', 'training', 'triples', 'withheld', 'y']


In [4]:
def num_entities(triples):
    a = list(np.unique((triples[:,0])))
    b = list(np.unique((triples[:,2])))
    c = a + b
    return len(np.unique(c))


num_entities(data.triples)

8285

Get indexes (as if we were in the GNN case - namely only one type of relation)

In [5]:
def edge_index_oneadj(triples):
    edge_index = torch.stack((triples[:, 0], triples[:, 2]),dim=0)
    return edge_index

edge_index = edge_index_oneadj(data.triples)


Function to find n-hop neighborhood starting from edge indexes for a specific node

In [70]:
def find_n_hop_neighbors(edge_index, n, node=None):
    # create dictionary of node neighborhoods
    neighborhoods = {}
    for i in range(edge_index.max().item() + 1):
        neighborhoods[i] = set()

    # find 1-hop neighbors and corresponding edges
    edges = []
    for j in range(edge_index.shape[1]):
        src, dst = edge_index[0, j].item(), edge_index[1, j].item()
        neighborhoods[src].add(dst)
        neighborhoods[dst].add(src)
        edges.append((src, dst))

    # find n-hop neighbors for the specified node or all nodes

    for k in range(2, n+1):
        new_neighbors = set()
        for neighbor in neighborhoods[node]:
            new_neighbors.update(neighborhoods[neighbor])
        neighborhoods[node].update(new_neighbors)
    sub_edges = []
    for edge in edges:
        src, dst = edge
        if src in neighborhoods[node] and dst in neighborhoods[node] or src == node or dst == node:
            sub_edges.append(edge)
            
    sub_edges_tensor = torch.tensor([sub_edges[i] for i in range(len(sub_edges))]).t()        

    #return {node: sub_edges}, {node: neighborhoods[node]}, sub_edges_tensor
    return sub_edges, neighborhoods[node], sub_edges_tensor


# test with n=2
node = 5744
n = 2
sub_edges, neighborhoods, sub_edges_tensor = find_n_hop_neighbors(edge_index, n=2, node=2)
print(f'neighborhood nodes at {n} hop for node {node}: \n',neighborhoods)


neighborhood nodes at 2 hop for node 5744: 
 {2, 395, 7084, 1740, 2732, 3119, 4176, 530, 5206, 1847, 5238, 5372, 5502, 5759}


I define a function to map the neighbors to the triples

In [7]:
def match_to_triples(tensor1, tensor2):
    """
    tensor1: sub_edge tensor: edges of the neighborhood - transpose!!
    tensor2: data.triples: all edges
    """
    matching = []
    for i,i2 in zip(tensor1[:,0],tensor1[:,1]):
        for j,j1,j2, index in zip(tensor2[:,0],tensor2[:,1],  tensor2[:,2], range(len(tensor2[:,0]))):
            if i == j and i2 == j2:
                matching.append(tensor2[index])

    result = torch.stack(matching)
    return result


print(f'neighborhood edges original triples at {n} hop for node {node}: \n ',match_to_triples(sub_edges_tensor.t(), data.triples))



neighborhood edges original triples at 2 hop for node 2: 
  tensor([[5206,   41, 5238],
        [5372,   18, 5759],
        [5502,   31, 7084],
        [5759,   30, 7084],
        [7084,    0, 1740],
        [7084,    1, 3119],
        [7084,    2, 5759],
        [7084,    8,    2],
        [7084,   17, 5372],
        [7084,   19,  530],
        [7084,   22, 4176],
        [7084,   24, 1847],
        [7084,   33, 2732],
        [7084,   37,  395],
        [7084,   39, 5206],
        [7084,   39, 5238]])


## DATA PREPARATION  FROM RGCN
- enrich: add inverse rel and self loops
- adj: get hor_indexes, ver_indexes
- sum sparse
- I add function to directly make: hor_graph, ver_graph

In [8]:
def enrich(triples : torch.Tensor, n : int, r: int):
    """
    Enriches the given triples with self-loops and inverse relations.

    """
    cuda = triples.is_cuda

    inverses = torch.cat([
        triples[:, 2:],
        triples[:, 1:2] + r,
        triples[:, :1]
    ], dim=1)

    selfloops = torch.cat([
        torch.arange(n, dtype=torch.long,  device=d(cuda))[:, None],
        torch.full((n, 1), fill_value=2*r),
        torch.arange(n, dtype=torch.long, device=d(cuda))[:, None],
    ], dim=1)

    return torch.cat([triples, inverses, selfloops], dim=0)

def sum_sparse(indices, values, size, row=True):
    """
    Sum the rows or columns of a sparse matrix, and redistribute the
    results back to the non-sparse row/column entries

    :return:
    """

    ST = torch.cuda.sparse.FloatTensor if indices.is_cuda else torch.sparse.FloatTensor

    assert len(indices.size()) == 2

    k, r = indices.size()

    if not row:
        # transpose the matrix
        indices = torch.cat([indices[:, 1:2], indices[:, 0:1]], dim=1)
        size = size[1], size[0]

    ones = torch.ones((size[1], 1), device=d(indices))

    smatrix = ST(indices.t(), values, size=size)
    sums = torch.mm(smatrix, ones) # row/column sums

    sums = sums[indices[:, 0]]

    assert sums.size() == (k, 1)

    return sums.view(k)

def adj(triples, num_nodes, num_rels, cuda=False, vertical=True):
    """
     Computes a sparse adjacency matrix for the given graph (the adjacency matrices of all
     relations are stacked vertically).

     :param edges: List representing the triples
     :param i2r: list of relations
     :param i2n: list of nodes
     :return: sparse tensor
    """
    r, n = num_rels, num_nodes
    size = (r * n, n) if vertical else (n, r * n)

    from_indices = []
    upto_indices = []

    for s, p, o in triples:

        offset = p.item() * n
        print(offset)

        if vertical:
            s = offset + s.item()
        else:
            o = offset + o.item()

        from_indices.append(s)
        upto_indices.append(o)
        

    indices = torch.tensor([from_indices, upto_indices], dtype=torch.long, device=d(cuda))


    assert indices.size(1) == len(triples)
    assert indices[0, :].max() < size[0], f'{indices[0, :].max()}, {size}, {r}'
    assert indices[1, :].max() < size[1], f'{indices[1, :].max()}, {size}, {r}'

    return indices.t(), size





triples = enrich(data.triples, data.num_entities, data.num_relations)
indices, size = adj(triples, data.num_entities, triples.shape[0], cuda=False, vertical=True)



323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970

Here is where the magic happens: 

Instead of having one adjacency matrix we have 2 sparse tensor with indexes where indexes have a semantic meaning 

In [9]:
def hor_ver_graph(triples, n, r):
    #triples = enrich(triples_small, n, r)

    hor_ind, hor_size = adj(triples, n, 2*r+1, vertical=False)
    ver_ind, ver_size = adj(triples, n, 2*r+1, vertical=True)
    #number of relations is 2*r+1 because we added the inverse and self loop

    _, rn = hor_size #horizontally stacked adjacency matrix size
    print(hor_size)
    r = rn // n #number of relations enriched divided by number of nodes

    vals = torch.ones(ver_ind.size(0), dtype=torch.float) #number of enriched triples
    vals = vals / sum_sparse(ver_ind, vals, ver_size) #normalize the values by the number of edges

    hor_graph = torch.sparse.FloatTensor(indices=hor_ind.t(), values=vals, size=hor_size) #size: n,r, emb


    ver_graph = torch.sparse.FloatTensor(indices=ver_ind.t(), values=vals, size=ver_size)

    return hor_graph, ver_graph

hor_graph, ver_graph = hor_ver_graph(data.triples, data.num_entities, data.num_relations)
hor_graph

323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970
364540
323115
347970

tensor(indices=tensor([[  4938,   4938,   4938,  ...,   7628,   7628,   8052],
                       [331296, 356145, 369832,  ..., 306895, 328335, 331293]]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]),
       size=(8285, 753935), nnz=29043, layout=torch.sparse_coo)

In [10]:
print(data.num_entities,triples.shape[0])
data.num_relations
print(hor_graph.shape[1]%91 )#remainder is indeed 0 becasue we have 91 relations
print(hor_graph.shape[1]//91) #quotient is indeed 8285 because we have 8285 nodes
91*8285==hor_graph.shape[1]

8285 66371
0
8285


True

## OK
SO for the forward of the training instead of workiong with the adjacency we indeed work with hor and ver graph

question:
- Do the training with them and then map back to who the neighbors are 
- so for the sake of experimenting ; lets imagine we want to explain prediction at node 2 with 2 hops - what do we do then?
- It means that we want to extract the neighborhood of node 2 and then using the neighbor info to build the sub_adj which in this case must be a sub_hor and ver graph

In [11]:
#we need: triples, num entities, num relations - all enriched -- of the neighborhood
triples = enrich(data.triples, data.num_entities, data.num_relations)
edge_index = edge_index_oneadj(data.triples)
sub_edges, neighborhoods, sub_edges_tensor = find_n_hop_neighbors(edge_index, n=2, node=2)
sub_triples = match_to_triples(sub_edges_tensor.t(), triples)
sub_num_entities = data.num_entities#num_entities(sub_triples)

sub_num_relations = data.num_relations#len(np.unique(sub_triples[:,1]))

In [12]:
sub_hor_graph, sub_ver_graph = hor_ver_graph(sub_triples, sub_num_entities, sub_num_relations)

339685
149130
256835
248550
389395
0
8285
16570
621375
66280
140845
157415
182270
198840
273405
306545
323115
323115
339685
149130
256835
248550
389395
0
8285
16570
621375
66280
140845
157415
182270
198840
273405
306545
323115
323115
(8285, 753935)


In [13]:
print(sub_hor_graph)
print(sub_ver_graph)

tensor(indices=tensor([[  5206,   5372,   5502,   5759,   5759,   7084,   7084,
                          7084,   7084,   7084,   7084,   7084,   7084,   7084,
                          7084,   7084,   7084,   7084],
                       [344923, 154889, 263919, 255634, 396479,   1740,  11404,
                         22329, 627134,  66282, 146217, 157945, 186446, 200687,
                        276137, 306940, 328321, 328353]]),
       values=tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
                      1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
                      1.0000, 1.0000, 0.5000, 0.5000]),
       size=(8285, 753935), nnz=18, layout=torch.sparse_coo)
tensor(indices=tensor([[344891, 154502, 262337, 254309, 395154,   7084,  15369,
                         23654, 628459,  73364, 147929, 164499, 189354, 205924,
                        280489, 313629, 330199, 330199],
                       [  5238,   5759,   7084,   7084,   7084,   1740,   

Ok so now we created the 'sub_adj' in terms of hor and ver graph - we need to find a way to apply a mask to this!

In [14]:
num_nodes = sub_hor_graph.coalesce().values().shape[0] #len(neighborhoods)
diag_mask = torch.ones(num_nodes, num_nodes) - torch.eye(num_nodes)

In [29]:
def construct_edge_mask( num_nodes, init_strategy="normal", const_val=1.0):
    """
    Construct edge mask
    input;
        num_nodes: number of nodes in the neighborhood
        init_strategy: initialization strategy for the mask
        const_val: constant value for the mask
    output:
        mask: edge mask    
    """
    mask = nn.Parameter(torch.FloatTensor(num_nodes))  #initialize the mask
    if init_strategy == "normal":
        std = nn.init.calculate_gain("relu") * math.sqrt(
            2.0 / (num_nodes + num_nodes)
        )
        with torch.no_grad():
            mask.normal_(1.0, std)
    elif init_strategy == "const":
        nn.init.constant_(mask, const_val)
    return torch.tensor(mask)
mask = construct_edge_mask(num_nodes, init_strategy="normal", const_val=1.0)
print('mask is:\n', mask)
mask.shape



mask is:
 tensor([0.8058, 0.9847, 0.4180, 0.8984, 1.1969, 0.9595, 1.4275, 1.0989, 1.6875,
        0.6012, 0.8823, 1.0217, 0.6042, 0.8319, 0.7804, 0.9532, 0.9079, 0.2953])


  return torch.tensor(mask)


torch.Size([18])

In [30]:
def _masked_adj(mask,adj, diag_mask, graph):
    """ Masked adjacency matrix 
    input: edge_mask, sub_adj, diag_mask
    output: masked_adj
    """
    sym_mask = mask
    sym_mask = torch.sigmoid(mask)
    
    sym_mask = (sym_mask + sym_mask.t()) / 2
    adj = torch.tensor(adj)
    masked_adj = adj * sym_mask

    #return masked_adj #* diag_mask
    return torch.sparse.FloatTensor(indices=graph.coalesce().indices(), values= masked_adj, size=graph.coalesce().size())


masked_hor = _masked_adj(mask, sub_hor_graph.coalesce().values(), diag_mask, sub_hor_graph)
masked_ver = _masked_adj(mask, sub_ver_graph.coalesce().values(), diag_mask, sub_ver_graph)

print('masked_hor is:\n', masked_hor)
print('masked_ver is:\n', masked_ver)

masked_hor is:
 tensor(indices=tensor([[  5206,   5372,   5502,   5759,   5759,   7084,   7084,
                          7084,   7084,   7084,   7084,   7084,   7084,   7084,
                          7084,   7084,   7084,   7084],
                       [344923, 154889, 263919, 255634, 396479,   1740,  11404,
                         22329,  66282, 146217, 157945, 186446, 200687, 276137,
                        306940, 328321, 328353, 627134]]),
       values=tensor([0.6912, 0.7280, 0.6030, 0.7106, 0.7680, 0.7230, 0.8065,
                      0.7500, 0.8439, 0.6459, 0.7073, 0.7353, 0.6466, 0.6967,
                      0.6858, 0.3609, 0.3563, 0.5733]),
       size=(8285, 753935), nnz=18, layout=torch.sparse_coo)
masked_ver is:
 tensor(indices=tensor([[  7084,  15369,  23654,  73364, 147929, 154502, 164499,
                        189354, 205924, 254309, 262337, 280489, 313629, 330199,
                        330199, 344891, 395154, 628459],
                       [  1740,   3119,   

  adj = torch.tensor(adj)


In [17]:
class RGCN(nn.Module):
    """
    Classic RGCN
    """

    def __init__(self, triples, n, r, numcls, emb=16, bases=None):

        super().__init__()

        self.emb = emb
        self.bases = bases
        self.numcls = numcls

        self.triples = enrich(triples, n, r)

        # horizontally and vertically stacked versions of the adjacency graph
        hor_ind, hor_size = adj(self.triples, n, 2*r+1, vertical=False)
        ver_ind, ver_size = adj(self.triples, n, 2*r+1, vertical=True)
        #number of relations is 2*r+1 because we added the inverse and self loop

        _, rn = hor_size #horizontally stacked adjacency matrix size
        r = rn // n #number of relations enriched divided by number of nodes

        vals = torch.ones(ver_ind.size(0), dtype=torch.float) #number of enriched triples
        vals = vals / sum_sparse(ver_ind, vals, ver_size) #normalize the values by the number of edges

        hor_graph = torch.sparse.FloatTensor(indices=hor_ind.t(), values=vals, size=hor_size) #size: n,r, emb
        
        
        self.register_buffer('hor_graph', hor_graph)

        ver_graph = torch.sparse.FloatTensor(indices=ver_ind.t(), values=vals, size=ver_size)
        self.register_buffer('ver_graph', ver_graph)

        # layer 1 weights
        if bases is None:
            self.weights1 = nn.Parameter(torch.FloatTensor(r, n, emb))
            nn.init.xavier_uniform_(self.weights1, gain=nn.init.calculate_gain('relu'))

            self.bases1 = None
        else:
            self.comps1 = nn.Parameter(torch.FloatTensor(r, bases))
            nn.init.xavier_uniform_(self.comps1, gain=nn.init.calculate_gain('relu'))

            self.bases1 = nn.Parameter(torch.FloatTensor(bases, n, emb))
            nn.init.xavier_uniform_(self.bases1, gain=nn.init.calculate_gain('relu'))

        # layer 2 weights
        if bases is None:

            self.weights2 = nn.Parameter(torch.FloatTensor(r, emb, numcls) )
            nn.init.xavier_uniform_(self.weights2, gain=nn.init.calculate_gain('relu'))

            self.bases2 = None
        else:
            self.comps2 = nn.Parameter(torch.FloatTensor(r, bases))
            nn.init.xavier_uniform_(self.comps2, gain=nn.init.calculate_gain('relu'))

            self.bases2 = nn.Parameter(torch.FloatTensor(bases, emb, numcls))
            nn.init.xavier_uniform_(self.bases2, gain=nn.init.calculate_gain('relu'))

        self.bias1 = nn.Parameter(torch.FloatTensor(emb).zero_())
        self.bias2 = nn.Parameter(torch.FloatTensor(numcls).zero_())

    def forward2(self, hor_graph, ver_graph):


        ## Layer 1

        n, rn = hor_graph.size() #horizontally stacked adjacency matrix size
        r = rn // n
        e = self.emb
        b, c = self.bases, self.numcls

        if self.bases1 is not None:
            # weights = torch.einsum('rb, bij -> rij', self.comps1, self.bases1)
            weights = torch.mm(self.comps1, self.bases1.view(b, n*e)).view(r, n, e)
        else:
            weights = self.weights1

        assert weights.size() == (r, n, e) #r relations, n nodes, e embedding size

        # Apply weights and sum over relations
        #hidden layer
        h = torch.mm(hor_graph, weights.view(r*n, e))  #matmul with horizontally stacked adjacency matrix and initialized weights
        assert h.size() == (n, e)

        h = F.relu(h + self.bias1) #apply non linearity and add bias

        ## Layer 2

        # Multiply adjacencies by hidden
        h = torch.mm(ver_graph, h) # sparse mm
        h = h.view(r, n, e) # new dim for the relations

        if self.bases2 is not None:
            # weights = torch.einsum('rb, bij -> rij', self.comps2, self.bases2)
            weights = torch.mm(self.comps2, self.bases2.view(b, e * c)).view(r, e, c)
        else:
            weights = self.weights2

        # Apply weights, sum over relations
        # h = torch.einsum('rhc, rnh -> nc', weights, h)
        h = torch.bmm(h, weights).sum(dim=0)

        assert h.size() == (n, c)

        return h + self.bias2 # -- softmax is applied in the loss
    



    def forward(self):


        ## Layer 1

        n, rn = self.hor_graph.size() #horizontally stacked adjacency matrix size
        r = rn // n
        e = self.emb
        b, c = self.bases, self.numcls

        if self.bases1 is not None:
            # weights = torch.einsum('rb, bij -> rij', self.comps1, self.bases1)
            weights = torch.mm(self.comps1, self.bases1.view(b, n*e)).view(r, n, e)
        else:
            weights = self.weights1

        assert weights.size() == (r, n, e) #r relations, n nodes, e embedding size

        # Apply weights and sum over relations
        #hidden layer
        h = torch.mm(self.hor_graph, weights.view(r*n, e))  #matmul with horizontally stacked adjacency matrix and initialized weights
        assert h.size() == (n, e)

        h = F.relu(h + self.bias1) #apply non linearity and add bias

        ## Layer 2

        # Multiply adjacencies by hidden
        h = torch.mm(self.ver_graph, h) # sparse mm
        h = h.view(r, n, e) # new dim for the relations

        if self.bases2 is not None:
            # weights = torch.einsum('rb, bij -> rij', self.comps2, self.bases2)
            weights = torch.mm(self.comps2, self.bases2.view(b, e * c)).view(r, e, c)
        else:
            weights = self.weights2

        # Apply weights, sum over relations
        # h = torch.einsum('rhc, rnh -> nc', weights, h)
        h = torch.bmm(h, weights).sum(dim=0)

        assert h.size() == (n, c)

        return h + self.bias2 # -- softmax is applied in the loss

    def penalty(self, p=2):
        """
        L2 penalty on the weights
        """
        assert p==2

        if self.bases is None:
            return self.weights1.pow(2).sum()

        return self.comps1.pow(p).sum() + self.bases1.pow(p).sum()
model = torch.load('aifb_chk/model_aifb')
pred = model.forward2(sub_hor_graph, sub_ver_graph)

In [89]:
pred_label = torch.load('aifb_chk/prediction_aifb')

def new_index(self):
    idxw, clsw = self.data.withheld[:, 0], self.data.withheld[:, 1]
    idxw, clsw = idxw.long(), clsw.long()
    idxw_list = list(idxw)
    self.new_node_idx = idxw_list.index(self.node_idx)
    return self.new_node_idx 

1

In [88]:
data.withheld

tensor([[5757,    2],
        [5797,    2],
        [5678,    0],
        [5900,    2],
        [5677,    2],
        [5731,    1],
        [5724,    0],
        [5791,    2],
        [5699,    0],
        [5857,    3],
        [5752,    3],
        [5688,    0],
        [5702,    0],
        [5714,    0],
        [5905,    1],
        [5795,    3],
        [5811,    2],
        [5708,    0],
        [5843,    0],
        [5873,    0],
        [5697,    0],
        [5753,    3],
        [5831,    2],
        [5839,    2],
        [5783,    0],
        [5755,    2],
        [5808,    1],
        [5844,    2],
        [5798,    3],
        [5701,    0],
        [5845,    0],
        [5861,    2],
        [5778,    0],
        [5854,    3],
        [5785,    1]], dtype=torch.int32)

Prediction with the masked adjacency matrix 

In [63]:
pred_mask = model.forward2(masked_hor, masked_ver)

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0) # only difference
np.argmax(softmax(pred_mask[5759].detach().numpy()))
softmax(pred_mask[4176].detach().numpy())


array([0.28584126, 0.23059443, 0.25147924, 0.23208514], dtype=float32)

In [65]:
def softmax(pred, index):
    """Compute softmax values for each sets of scores in x."""
    x_index = pred[index].detach().numpy()
    e_x = np.exp(x_index - np.max(x_index))
    return e_x / e_x.sum(axis=0),  np.argmax(e_x / e_x.sum(axis=0))

softmax(pred_mask, 2)

(array([0.28584126, 0.23059443, 0.25147924, 0.23208514], dtype=float32), 0)

In [69]:
data.training

tensor([[5744,    2],
        [5867,    1],
        [5860,    1],
        [5686,    0],
        [5712,    0],
        [5729,    0],
        [5723,    0],
        [5743,    0],
        [5762,    2],
        [5765,    0],
        [5759,    0],
        [5883,    2],
        [5707,    0],
        [5748,    0],
        [5703,    0],
        [5717,    0],
        [5874,    2],
        [5889,    1],
        [5902,    2],
        [5777,    1],
        [5815,    1],
        [5758,    2],
        [5769,    0],
        [5775,    2],
        [5691,    0],
        [5834,    2],
        [5835,    2],
        [5732,    0],
        [5904,    2],
        [5745,    0],
        [5859,    0],
        [5884,    2],
        [5746,    0],
        [5790,    2],
        [5882,    3],
        [5742,    1],
        [5771,    1],
        [5827,    2],
        [5888,    3],
        [5700,    0],
        [5694,    0],
        [5821,    2],
        [5751,    1],
        [5710,    0],
        [5911,    0],
        [5

Prediction with the subgraph only  but not masked

In [40]:
softmax(pred[5759].detach().numpy())

array([0.9462887 , 0.02213145, 0.00236647, 0.02921332], dtype=float32)

Prediction with original all graph

In [41]:
pred_original = model.forward2(hor_graph, ver_graph)
softmax(pred_original[5759].detach().numpy())

array([0.521083  , 0.38938808, 0.04961503, 0.03991386], dtype=float32)

like wtf why does it look like that if we consider the whole graph the probability of being correct is lower??????

In this function we map the indices of the subgraph aka the nodes of interest to their class (for the ones we have information about as its not for all)

In [None]:
def match_to_classes(tensor1, tensor2):
    """
    tensor1: sub graph indices
    tensor2: data.y labelsss
    """
    matching = []
    for i in (tensor1[:,0]):
        for j, index in zip(tensor2[:,0],range(len(tensor2[:,0]))):
            if i == j:
                matching.append(tensor2[index])
    return matching
    # result = torch.stack(matching)
    # return result

match_to_classes(sub_hor_graph.coalesce().indices().t(), data.y)

[array([5759,    0], dtype=int32), array([5759,    0], dtype=int32)]

Ok so we have the masked edges for the GNN Explanation - lets simplify the loss to work with edges only (no interest in the features for now - but ye I guess we need to figure out the labels)

In [None]:
def loss_fc(edge_mask, masked_adj,adj, pred, pred_label,label, node_idx, epoch, print=False):
    """
    Args:
        pred: y_e :  prediction made by current model
        pred_label: y_hat : the label predicted by the original model.
    """
    #PRED LOSS
    pred_label_node = pred_label[node_idx] #pred label is the prediction made by the original model
    gt_label_node = label[node_idx]

    logit = pred[gt_label_node] #pred is the prediction made by the current model

    pred_loss = -torch.log(logit) #this is basically taking the cross entropy loss

    # MASK SIZE EDGE LOSS
    
    mask = edge_mask
    mask = torch.sigmoid(mask)

    size_loss = 0.005 * torch.sum(mask)

    
    #MASK SIZE FEATURE LOSS
    feat_mask = (torch.sigmoid(feat_mask))
    feat_size_loss = 1.0 * torch.mean(feat_mask)

    # EDGE MASK ENTROPY LOSS
    mask_ent = -mask * torch.log(mask) - (1 - mask) * torch.log(1 - mask)
    mask_ent_loss = 1.0 * torch.mean(mask_ent)
    
    # FEATURE MASK ENTROPY LOSS
    feat_mask_ent = - feat_mask * torch.log(feat_mask) - (1 - feat_mask) * torch.log(1 - feat_mask)

    feat_mask_ent_loss = 0.1  * torch.mean(feat_mask_ent)

    # LAPLACIAN LOSS
    D = torch.diag(torch.sum(masked_adj, 0))
    m_adj = masked_adj 
    L = D - m_adj

    pred_label_t = torch.tensor(pred_label, dtype=torch.float)


    lap_loss = ( 1.0
        * (pred_label_t @ L @ pred_label_t)
        / torch.Tensor(adj).numel())


    loss = pred_loss + size_loss  + mask_ent_loss + feat_size_loss + lap_loss
    if print== True:
        print("optimization/size_loss", size_loss, epoch)
        print("optimization/feat_size_loss", feat_size_loss, epoch)
        print("optimization/mask_ent_loss", mask_ent_loss, epoch)
        print(
            "optimization/feat_mask_ent_loss", mask_ent_loss, epoch
        )

        print("optimization/pred_loss", pred_loss, epoch)
        print("optimization/lap_loss", lap_loss, epoch)
        print("optimization/overall_loss", loss, epoch)
    return loss

In [None]:
mask.shape
sub_edges_tensor%45


def get_adjacency(indices):
    #data.edge_index = edge_index_oneadj(data)
    adj = torch.zeros(45, 45)
    for edge in indices.t():
        adj[edge[0]][edge[1]] = 1
        adj[edge[1]][edge[0]] = 1
        #adj[edge[0]][edge[0]] = 1

    return adj

adj_matrix = get_adjacency(sub_edges_tensor%45)
adj_matrix

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])