In [1]:
import torch 
from torch import nn
from torch.nn import init
import dgl
from tqdm import tqdm
EPS = 1e-15

Using backend: pytorch


In [2]:
from dgl import DGLGraph
from dgl.data import citation_graph as citegrh
from dgl.nn import GraphConv

In [3]:
class Encoder(nn.Module): # layer
    def __init__(self, in_channels, hidden_channels):
        super(Encoder, self).__init__()
        self.conv = GraphConv(in_channels, hidden_channels)
        self.prelu = nn.PReLU(hidden_channels)

    def forward(self, g,feat):
        with g.local_scope():
            h = self.conv(g, feat)
            h = self.prelu(h)
        return h

In [4]:
def corruption(x):
    return x[torch.randperm(x.size(0))]

In [5]:
from dgl.data import citation_graph as citegrh
def load_cora_data():
    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.BoolTensor(data.train_mask)
    test_mask = torch.BoolTensor(data.test_mask)
    g = DGLGraph(data.graph)
    return g, features, labels, train_mask, test_mask
g, features, labels, train_mask, test_mask = load_cora_data()

Finished data loading and preprocessing.
  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000


In [6]:
from models import DeepGraphInfomax

In [10]:
model = DeepGraphInfomax(hidden_feats=512,encoder=Encoder(features.shape[1],512),
                        summary = lambda z, *args,**kwargs:torch.sigmoid(
                            z.mean(dim=0)),
                        corruption=corruption)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model

DeepGraphInfomax(
  (encoder): Encoder(
    (conv): GraphConv(in=1433, out=512, normalization=both, activation=None)
    (prelu): PReLU(num_parameters=512)
  )
)

In [11]:
def train():
    model.train()
    optimizer.zero_grad()
    pos_z, neg_z, summary = model(g, features)
    loss = model.loss(pos_z, neg_z, summary)
    loss.backward()
    optimizer.step()
    return loss.item()

In [12]:
for epoch in tqdm(range( 1,250)):
    loss = train()
    if epoch % 50 ==0:
        print('Epoch: {:03d}, Loss: {:.4f}'.format(epoch, loss))

 20%|██        | 50/249 [00:13<00:50,  3.91it/s]

Epoch: 050, Loss: 0.7017


 40%|████      | 100/249 [00:26<00:38,  3.88it/s]

Epoch: 100, Loss: 0.3570


 60%|██████    | 150/249 [00:39<00:28,  3.44it/s]

Epoch: 150, Loss: 0.2089


 80%|████████  | 200/249 [00:54<00:14,  3.47it/s]

Epoch: 200, Loss: 0.1347


100%|██████████| 249/249 [01:08<00:00,  3.65it/s]


In [13]:
from sklearn.linear_model import LogisticRegression
def test( train_z, train_y, test_z, test_y, solver='lbfgs',
         multi_class='auto', *args, **kwargs):
    r"""Evaluates latent space quality via a logistic regression downstream
    task."""
    clf = LogisticRegression(solver=solver, multi_class=multi_class, *args,
                             **kwargs).fit(train_z.detach().cpu().numpy(),
                                           train_y.detach().cpu().numpy())
    return clf.score(test_z.detach().cpu().numpy(),
                     test_y.detach().cpu().numpy())

In [14]:
def test_latent():
    model.eval()
    z, _, _ = model(g, features)
    acc = test(z[train_mask], labels[train_mask],
                     z[test_mask], labels[test_mask], max_iter=150)
    return acc

In [15]:
test_latent()

0.718