In [17]:
from load_dataset import load_dataset

In [18]:
import numpy as np
import dgl
from dgl.data import DGLDataset
import torch
from sklearn.model_selection import train_test_split

In [19]:
f, r = load_dataset('data/nx_network_data','politifact')

In [20]:
target_labels = np.concatenate([np.ones(len(f)), np.zeros(len(r))], axis=0)

In [21]:
len(target_labels)

628

In [22]:
f_arr = np.array(f,dtype=object)

In [23]:
len(f_arr)

351

In [24]:
r_arr = np.array(r,dtype=object)

In [25]:
len(r_arr)

277

In [26]:
dataset = np.concatenate((f_arr, r_arr), axis=0)

In [27]:
dataset

array([<networkx.classes.digraph.DiGraph object at 0x0000013E702A8AC0>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E702A8820>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E2424AEE0>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E702A8850>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E2423E790>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E6FFC4C40>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E72112CD0>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E72112DC0>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E72112F10>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E705D2040>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E705D2130>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E705D2160>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E705D2250>,
       <networkx.classes.digraph.DiGraph object at 0x0000013E705

In [28]:
X_train, X_test, y_train, y_test = train_test_split(dataset, target_labels, stratify=target_labels,
                                                        test_size=0.2, random_state=42)

In [29]:

class FakeNewsDataset(DGLDataset):
    def __init__(self,data,targets):
        self.data = data
        self.targets = targets
        super().__init__(name='fakenews')


    def process(self):
        #edges = pd.read_csv('./graph_edges.csv')
        #properties = pd.read_csv('./graph_properties.csv')
        self.graphs = []
        self.labels = []

        for n,target in zip(self.data,self.targets):
        
            g = dgl.from_networkx(n)
            g = dgl.add_self_loop(g)
            self.graphs.append(g)
            self.labels.append(target)
            

        # Convert the label list to tensor for saving.
        self.labels = torch.LongTensor(self.labels)

    def __getitem__(self, i):
        return self.graphs[i], self.labels[i]

    def __len__(self):
        return len(self.graphs)


In [30]:
X_train_dataset = FakeNewsDataset(X_train,y_train)

  self.labels = torch.LongTensor(self.labels)


In [31]:
graph, label = X_train_dataset[0]
print(graph, label)

Graph(num_nodes=28, num_edges=55,
      ndata_schemes={}
      edata_schemes={}) tensor(1)


In [32]:
from dgl.nn.pytorch import GraphConv

In [33]:
import torch.nn as nn
import torch.nn.functional as F

class Classifier(nn.Module):
    def __init__(self, in_dim, hidden_dim, n_classes):
        super(Classifier, self).__init__()
        self.conv1 = GraphConv(in_dim, hidden_dim)
        self.conv2 = GraphConv(hidden_dim, hidden_dim)
        self.classify = nn.Linear(hidden_dim, n_classes)

    def forward(self, g):
        # Use node degree as the initial node feature. For undirected graphs, the in-degree
        # is the same as the out_degree.
        h = torch.tensor([1]).view(-1, 1)##g.in_degrees().view(-1, 1).float()
        # Perform graph convolution and activation function.
        h = F.relu(self.conv1(g, h))
        h = F.relu(self.conv2(g, h))
        g.ndata['h'] = h
        # Calculate graph representation by averaging all the node representations.
        hg = dgl.mean_nodes(g, 'h')
        return self.classify(hg)

In [34]:
import torch.optim as optim
from dgl.dataloading import GraphDataLoader
data_loader = GraphDataLoader(X_train_dataset, batch_size=8, shuffle=True)

In [35]:
model = Classifier(1, 64, 2)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.train()

Classifier(
  (conv1): GraphConv(in=1, out=64, normalization=both, activation=None)
  (conv2): GraphConv(in=64, out=64, normalization=both, activation=None)
  (classify): Linear(in_features=64, out_features=2, bias=True)
)

In [36]:
epoch_losses = []
for epoch in range(100):
    epoch_loss = 0
    for iter, (bg, label) in enumerate(data_loader):
        prediction = model(bg)
        loss = loss_func(prediction, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.detach().item()
    epoch_loss /= (iter + 1)
    print('Epoch {}, loss {:.4f}'.format(epoch, epoch_loss))
    epoch_losses.append(epoch_loss)

  Variable._execution_engine.run_backward(


Epoch 0, loss 0.6898
Epoch 1, loss 0.6878
Epoch 2, loss 0.6879
Epoch 3, loss 0.6868
Epoch 4, loss 0.6871
Epoch 5, loss 0.6869
Epoch 6, loss 0.6871
Epoch 7, loss 0.6865
Epoch 8, loss 0.6869
Epoch 9, loss 0.6868
Epoch 10, loss 0.6864
Epoch 11, loss 0.6866
Epoch 12, loss 0.6865
Epoch 13, loss 0.6868
Epoch 14, loss 0.6866
Epoch 15, loss 0.6867
Epoch 16, loss 0.6863
Epoch 17, loss 0.6863
Epoch 18, loss 0.6863
Epoch 19, loss 0.6863
Epoch 20, loss 0.6869
Epoch 21, loss 0.6862
Epoch 22, loss 0.6864
Epoch 23, loss 0.6867
Epoch 24, loss 0.6870
Epoch 25, loss 0.6866
Epoch 26, loss 0.6861
Epoch 27, loss 0.6861
Epoch 28, loss 0.6864
Epoch 29, loss 0.6862
Epoch 30, loss 0.6863
Epoch 31, loss 0.6867
Epoch 32, loss 0.6861
Epoch 33, loss 0.6861
Epoch 34, loss 0.6862
Epoch 35, loss 0.6862
Epoch 36, loss 0.6864
Epoch 37, loss 0.6863
Epoch 38, loss 0.6864
Epoch 39, loss 0.6864
Epoch 40, loss 0.6866
Epoch 41, loss 0.6863
Epoch 42, loss 0.6865
Epoch 43, loss 0.6860
Epoch 44, loss 0.6866
Epoch 45, loss 0.686

In [37]:
X_test_dataset = FakeNewsDataset(X_test,y_test)

  self.labels = torch.LongTensor(self.labels)


In [38]:
model.eval()
# Convert a list of tuples to two lists
test_X, test_Y = map(list, zip(*X_test_dataset))
test_bg = dgl.batch(test_X)
test_Y = torch.tensor(test_Y).float().view(-1, 1)
probs_Y = torch.softmax(model(test_bg), 1)
sampled_Y = torch.multinomial(probs_Y, 1)
argmax_Y = torch.max(probs_Y, 1)[1].view(-1, 1)
print('Accuracy of sampled predictions on the test set: {:.4f}%'.format(
    (test_Y == sampled_Y.float()).sum().item() / len(test_Y) * 100))
print('Accuracy of argmax predictions on the test set: {:4f}%'.format(
    (test_Y == argmax_Y.float()).sum().item() / len(test_Y) * 100))

Accuracy of sampled predictions on the test set: 39.6825%
Accuracy of argmax predictions on the test set: 55.555556%
