In [1]:
import torch
import torch_geometric
from torch_geometric.datasets import MoleculeNet


In [2]:
def unsqueeze_y(data):
    data.y = data.y.squeeze(1)
    return data

In [3]:
dataset = MoleculeNet(root="./dataset/MoleculeNety1", name='HIV', pre_transform=unsqueeze_y)

In [4]:
print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')


Dataset: HIV(41127):
Number of graphs: 41127
Number of features: 9
Number of classes: 2

Data(x=[19, 9], edge_index=[2, 40], edge_attr=[40, 3], smiles='CCC1=[O+][Cu-3]2([O+]=C(CC)C1)[O+]=C(CC)CC(CC)=[O+]2', y=[1])
Number of nodes: 19
Number of edges: 40
Average node degree: 2.11
Has isolated nodes: False
Has self-loops: False
Is undirected: True


In [5]:
import sys
sys.path.insert(1, '/home/sam/Documents/network/supernode/HIV_test/')

In [6]:
from data.transformation import AddSupernodes
from data.concepts import *

In [7]:
dataset = MoleculeNet(root="./dataset/MoleculeNety1", name='HIV', pre_transform=unsqueeze_y)

In [8]:
concepts_list = [
       {"name": "GCB", "fun": cycle_basis, "args": [], "features" : [2]},
       {"name": "GMC", "fun": max_cliques, "args": []},
    ]
data = AddSupernodes(concepts_list)(dataset[0])
data

Data(x=[21], edge_index=[2, 64], y=[1], ntype=[21], S=[21], edge_S=[64, 1])