In [42]:
from load_dataset import load_dataset

In [43]:
import numpy as np
import dgl
from dgl.data import DGLDataset
import torch
from sklearn.model_selection import train_test_split

In [44]:
f, r = load_dataset('data/nx_network_data','gossipcop')

In [45]:
target_labels = np.concatenate([np.ones(len(f)), np.zeros(len(r))], axis=0)

In [46]:
len(target_labels)

10629

In [47]:
f_arr = np.array(f,dtype=object)

In [48]:
len(f_arr)

3684

In [49]:
r_arr = np.array(r,dtype=object)

In [50]:
len(r_arr)

6945

In [51]:
dataset = np.concatenate((f_arr, r_arr), axis=0)

In [52]:
dataset

array([<networkx.classes.digraph.DiGraph object at 0x0000020C1738F250>,
       <networkx.classes.digraph.DiGraph object at 0x0000020C1738FC40>,
       <networkx.classes.digraph.DiGraph object at 0x0000020C17FAEE80>,
       ...,
       <networkx.classes.digraph.DiGraph object at 0x0000020D8F211AC0>,
       <networkx.classes.digraph.DiGraph object at 0x0000020D8F211EE0>,
       <networkx.classes.digraph.DiGraph object at 0x0000020D8F229BE0>],
      dtype=object)

In [53]:
X_train, X_test, y_train, y_test = train_test_split(dataset, target_labels, stratify=target_labels,
                                                        test_size=0.2, random_state=42)

In [54]:

class FakeNewsDataset(DGLDataset):
    def __init__(self,data,targets):
        self.data = data
        self.targets = targets
        super().__init__(name='fakenews')


    def process(self):
        #edges = pd.read_csv('./graph_edges.csv')
        #properties = pd.read_csv('./graph_properties.csv')
        self.graphs = []
        self.labels = []

        for n,target in zip(self.data,self.targets):
        
            g = dgl.from_networkx(n)
            g = dgl.add_self_loop(g)
            self.graphs.append(g)
            self.labels.append(target)
            

        # Convert the label list to tensor for saving.
        self.labels = torch.LongTensor(self.labels)

    def __getitem__(self, i):
        return self.graphs[i], self.labels[i]

    def __len__(self):
        return len(self.graphs)


In [55]:
X_train_dataset = FakeNewsDataset(X_train,y_train)

  self.labels = torch.LongTensor(self.labels)


In [56]:
graph, label = X_train_dataset[0]
print(graph, label)

Graph(num_nodes=91, num_edges=181,
      ndata_schemes={}
      edata_schemes={}) tensor(0)


In [57]:
from dgl.nn.pytorch import GraphConv

In [58]:
import torch.nn as nn
import torch.nn.functional as F

class Classifier(nn.Module):
    def __init__(self, in_dim, hidden_dim, n_classes):
        super(Classifier, self).__init__()
        self.conv1 = GraphConv(in_dim, hidden_dim)
        self.conv2 = GraphConv(hidden_dim, hidden_dim)
        self.classify = nn.Linear(hidden_dim, n_classes)

    def forward(self, g):
        # Use node degree as the initial node feature. For undirected graphs, the in-degree
        # is the same as the out_degree.
        h = torch.tensor([1]).view(-1, 1)##g.in_degrees().view(-1, 1).float()
        # Perform graph convolution and activation function.
        h = F.relu(self.conv1(g, h))
        h = F.relu(self.conv2(g, h))
        g.ndata['h'] = h
        # Calculate graph representation by averaging all the node representations.
        hg = dgl.mean_nodes(g, 'h')
        return self.classify(hg)

In [59]:
import torch.optim as optim
from dgl.dataloading import GraphDataLoader
data_loader = GraphDataLoader(X_train_dataset, batch_size=64, shuffle=True)

In [60]:
model = Classifier(1, 64, 2)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.train()

Classifier(
  (conv1): GraphConv(in=1, out=64, normalization=both, activation=None)
  (conv2): GraphConv(in=64, out=64, normalization=both, activation=None)
  (classify): Linear(in_features=64, out_features=2, bias=True)
)

In [61]:
epoch_losses = []
for epoch in range(100):
    epoch_loss = 0
    for iter, (bg, label) in enumerate(data_loader):
        prediction = model(bg)
        loss = loss_func(prediction, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.detach().item()
    epoch_loss /= (iter + 1)
    print('Epoch {}, loss {:.4f}'.format(epoch, epoch_loss))
    epoch_losses.append(epoch_loss)

Epoch 0, loss 0.6570
Epoch 1, loss 0.6472
Epoch 2, loss 0.6224
Epoch 3, loss 0.5681
Epoch 4, loss 0.5100
Epoch 5, loss 0.4739
Epoch 6, loss 0.4582
Epoch 7, loss 0.4496
Epoch 8, loss 0.4475
Epoch 9, loss 0.4445
Epoch 10, loss 0.4424
Epoch 11, loss 0.4434
Epoch 12, loss 0.4426
Epoch 13, loss 0.4435
Epoch 14, loss 0.4415
Epoch 15, loss 0.4406
Epoch 16, loss 0.4398
Epoch 17, loss 0.4403
Epoch 18, loss 0.4402
Epoch 19, loss 0.4402
Epoch 20, loss 0.4399
Epoch 21, loss 0.4398
Epoch 22, loss 0.4390
Epoch 23, loss 0.4386
Epoch 24, loss 0.4385
Epoch 25, loss 0.4389
Epoch 26, loss 0.4391
Epoch 27, loss 0.4385
Epoch 28, loss 0.4392
Epoch 29, loss 0.4375
Epoch 30, loss 0.4378
Epoch 31, loss 0.4371
Epoch 32, loss 0.4387
Epoch 33, loss 0.4371
Epoch 34, loss 0.4362
Epoch 35, loss 0.4358
Epoch 36, loss 0.4367
Epoch 37, loss 0.4365
Epoch 38, loss 0.4356
Epoch 39, loss 0.4360
Epoch 40, loss 0.4354
Epoch 41, loss 0.4371
Epoch 42, loss 0.4354
Epoch 43, loss 0.4357
Epoch 44, loss 0.4338
Epoch 45, loss 0.434

In [62]:
X_test_dataset = FakeNewsDataset(X_test,y_test)

  self.labels = torch.LongTensor(self.labels)


In [63]:
model.eval()
# Convert a list of tuples to two lists
test_X, test_Y = map(list, zip(*X_test_dataset))
test_bg = dgl.batch(test_X)
test_Y = torch.tensor(test_Y).float().view(-1, 1)
probs_Y = torch.softmax(model(test_bg), 1)
sampled_Y = torch.multinomial(probs_Y, 1)
argmax_Y = torch.max(probs_Y, 1)[1].view(-1, 1)
print('Accuracy of sampled predictions on the test set: {:.4f}%'.format(
    (test_Y == sampled_Y.float()).sum().item() / len(test_Y) * 100))
print('Accuracy of argmax predictions on the test set: {:4f}%'.format(
    (test_Y == argmax_Y.float()).sum().item() / len(test_Y) * 100))

Accuracy of sampled predictions on the test set: 74.7413%
Accuracy of argmax predictions on the test set: 80.103481%
