In [1]:
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
import os
import os.path as osp
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import networkx as nx

import random

import yaml
from datetime import datetime

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Sequential as Seq, Linear, ReLU
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_networkx

from multiprocessing import Process


In [6]:
RESULT_DIR = '/home/simay/GNN-exp-pipeline/result/'
MODEL_DIR = '/home/simay/GNN-exp-pipeline/experiments/model/'
DATA_DIR = '/home/simay/GNN-exp-pipeline/data/'

config_file = '/home/simay/GNN-exp-pipeline/config/test_config.yml'

In [7]:
#function that takes in data and transforms/gets features from networkx graphs
data_list = []
first_part = []

def avg(pr):
    values = [v for _, v in pr.items()]
    avg = sum(values)/len(values)
    return avg

def stnd_dev(pr):
    values = [v for _, v in pr.items()]
    stnd_dev = np.std(values)
    return stnd_dev

def count_triangles(graph):
    undirected = graph.to_undirected()
    
    triangles = len(nx.triangles(undirected))
    all_cliques= nx.enumerate_all_cliques(undirected)
    triad_cycles= [x for x in all_cliques if len(x)==3]
    
    if triangles == 0:
        return 0
    if len(triad_cycles) == 0:
        return 0
    
    ratio = len(triad_cycles)/triangles
    return(ratio)

def runInParallel(*fns):
    proc = []
    for fn in fns:
        p = Process(target=fn)
        p.start()
        proc.append(p)
    for p in proc:
        p.join()
    
def data_manip(wico):  
    
    for data in wico:
        graph = to_networkx(data)
    
        #source nodes
        source = [x for x in graph.nodes() if graph.out_degree(x)==1 and graph.in_degree(x)==0]
        #target nodes
        sink = [x for x in graph.nodes() if graph.out_degree(x)==0 and graph.in_degree(x)==1]
        #connected components
        largest = max(nx.strongly_connected_components(graph), key=len)
        
    
        #page rank features
        pr = nx.pagerank(graph, alpha=0.85)
        avg_pr = avg(pr)
        stnd_pr = stnd_dev(pr)
        #triangles = count_triangles(graph)
        
        #graph feature list
        data_list.append([torch.Tensor(((nx.number_of_nodes(graph), nx.number_of_edges(graph), nx.average_clustering(graph), 
                                         len(largest), len(source), len(sink), max(pr), min(pr), avg_pr, stnd_pr))), torch.Tensor((data.y, 1 ,1))]) 
    return(data_list)
    

In [8]:
# full_wico.pt is the list of Torch Data objects representing each graph in the wico dataset

# Example list of data objects with component shapes:
#    [Data(x=[90, 3], edge_index=[2, 165], y=1),
#     Data(x=[47, 3], edge_index=[2, 107], y=1), 
#     Data(x=[92, 3], edge_index=[2, 289], y=1)]

full_wico_pyg = 'full_wico.pt'
wico = torch.load(DATA_DIR + full_wico_pyg)

data_manip(wico)

train_loader = DataLoader(data_list[:int(len(data_list)*0.9)], batch_size = 32, shuffle = True)
test_loader = DataLoader(data_list[int(len(data_list)*0.9):], batch_size = 32, shuffle = True)

In [9]:
#skeleton
class Network(nn.Module):
    
    def __init__(self):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(10, 32)
        self.b1 = nn.BatchNorm1d(32)
        self.fc2 = nn.Linear(32, 16)
        self.b2 = nn.BatchNorm1d(16)
        self.fc3 = nn.Linear(16, 8)
        self.b3 = nn.BatchNorm1d(8)
        self.fc4 = nn.Linear(8, 3)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.b1(x)
        x = F.relu(self.fc2(x))
        x = self.b2(x)
        x = F.relu(self.fc3(x))
        x = self.b3(x)
        x = F.relu(self.fc4(x))
        return x
   
net = Network()

In [10]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(net.parameters(), lr = 0.001, momentum = 0.9)

In [11]:
for epoch in range(8):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):

        #get the inputs
        inputs, label = data
         
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, label)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 16 == 15:   
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 16))
            running_loss = 0.0

print('Finished Training')

[1,    16] loss: 0.663
[1,    32] loss: 0.657
[1,    48] loss: 0.653
[1,    64] loss: 0.648
[1,    80] loss: 0.646
[1,    96] loss: 0.641
[2,    16] loss: 0.635
[2,    32] loss: 0.627
[2,    48] loss: 0.619
[2,    64] loss: 0.610
[2,    80] loss: 0.605
[2,    96] loss: 0.599
[3,    16] loss: 0.594
[3,    32] loss: 0.592
[3,    48] loss: 0.586
[3,    64] loss: 0.581
[3,    80] loss: 0.579
[3,    96] loss: 0.575
[4,    16] loss: 0.568
[4,    32] loss: 0.563
[4,    48] loss: 0.561
[4,    64] loss: 0.557
[4,    80] loss: 0.553
[4,    96] loss: 0.551
[5,    16] loss: 0.545
[5,    32] loss: 0.544
[5,    48] loss: 0.539
[5,    64] loss: 0.536
[5,    80] loss: 0.534
[5,    96] loss: 0.531
[6,    16] loss: 0.526
[6,    32] loss: 0.522
[6,    48] loss: 0.518
[6,    64] loss: 0.515
[6,    80] loss: 0.513
[6,    96] loss: 0.514
[7,    16] loss: 0.509
[7,    32] loss: 0.505
[7,    48] loss: 0.500
[7,    64] loss: 0.496
[7,    80] loss: 0.496
[7,    96] loss: 0.488
[8,    16] loss: 0.486
[8,    32] 

In [12]:
correct = 0
total = 0

with torch.no_grad():
    for data in test_loader:
        inputs, label = data
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += label.size(0)
        for i in range (32):
            correct += (predicted[i] == label[i][0]).sum().item()
            i+=1

print('Accuracy of the network: %d %%' % (
    100 * correct / total))

Accuracy of the network: 47 %


In [None]:
def count_triangles(graph, i):
    #turns graph into an adjacency dictionary
    adj_dict = [(n, nbrdict) for n, nbrdict in graph.adjacency()]
    nodes = len(adj_dict)
    matrix = np.zeros((nodes, nodes))
    key_list = []
    
    #turns dictionary into an adjacency list
    for a in range(nodes):
        if len(adj_dict[a][1]) != 0:
            key_list = []
            for key in adj_dict[a][1].keys():
                key_list.append(key)
            for b in range(len(key_list)):
                matrix[a][key_list[b]] = 1 

    #counts triangles in adjacency list
    count = 0 
    for i in range(nodes):
        for j in range(nodes):
            for k in range(nodes):
                if(i != j and i != k and j != k and matrix[i][j] and matrix[j][k] and matrix[k][i]):
                    count+= 1
                    
    count = count // 3
    data_list[i][0].append(count)
    return count

In [39]:
#function that takes in data and transforms/gets features from networkx graphs
data_list = []
first_part = []

def avg(pr):
    values = [v for _, v in pr.items()]
    avg = sum(values)/len(values)
    first_part.append(avg)
    return avg

def stnd_dev(pr):
    values = [v for _, v in pr.items()]
    stnd_dev = np.std(values)
    first_part.append(stnd_dev)
    return stnd_dev

def count_triangles(graph):
    #turn graph to undirected
    undirected = graph.to_undirected()
    
    triangles = len(nx.triangles(undirected))
    all_cliques= nx.enumerate_all_cliques(undirected)
    triad_cycles= [x for x in all_cliques if len(x)==3]
    
    if triangles == 0:
        return 0
    if len(triad_cycles) == 0:
        return 0
    
    ratio = len(triad_cycles)/triangles
    return(ratio)

def runInParallel(*fns):
    proc = []
    for fn in fns:
        p = Process(target=fn)
        p.start()
        proc.append(p)
    for p in proc:
        p.join()
    
def data_manip(wico):  
    i = 0
    for data in wico:
        graph = to_networkx(data)
    
        #source nodes
        source = [x for x in graph.nodes() if graph.out_degree(x)==1 and graph.in_degree(x)==0]
        #target nodes
        sink = [x for x in graph.nodes() if graph.out_degree(x)==0 and graph.in_degree(x)==1]
        #connected components
        largest = max(nx.strongly_connected_components(graph), key=len)
        
    
        #page rank features
        pr = nx.pagerank(graph, alpha=0.85)
        first_part.append(nx.number_of_nodes(graph))
        first_part.append(nx.number_of_nodes(graph))
        first_part.append(nx.number_of_edges(graph))
        first_part.append(nx.average_clustering(graph))
        first_part.append(len(largest))
        first_part.append(len(source))
        first_part.append(len(sink))
        first_part.append(max(pr))
        first_part.append(min(pr))
        runInParallel(avg(pr), stnd_dev(pr), count_triangles(graph))
        print(first_part)
        #avg_pr = avg(pr)
        #stnd_pr = stnd_dev(pr)
        #triangles = count_triangles(graph)
        
        #graph feature list
        another = torch.tensor(((first_part)))
        data_list.append([another, torch.tensor((data.y, 1 ,1))])
        #data_list.append([torch.Tensor(((nx.number_of_nodes(graph), nx.number_of_edges(graph), nx.average_clustering(graph),  
                                       #  len(largest), len(source), len(sink), max(pr), min(pr)))), 
    return(data_list)

In [None]:
nx.is_directed_acyclic_graph(graph)

In [40]:
dag=0
non_dag=0
for data in wico:
    graph = to_networkx(data)
    count_triangles(graph)
    #if nx.is_directed_acyclic_graph(graph):
     #   dag+=1
    #else:
    #    non_dag+=1
        
#print(dag)
#print(non_dag)

0.4888888888888889
0.8936170212765957
1.6195652173913044
2.9565217391304346
2.5416666666666665
no cycle
0.05319148936170213
0.15384615384615385
0.7391304347826086
0.21212121212121213
0.30851063829787234
1.511111111111111
1.0277777777777777
0.011111111111111112
0.034482758620689655
0.9736842105263158
0.2
no cycle
1.5730337078651686
1.3076923076923077
0.8484848484848485
0.5769230769230769
0.10989010989010989
0.14285714285714285
0.4
no cycle
0.25
no cycle
0.34782608695652173
0.16666666666666666
no cycle
no cycle
1.2
0.40625
1.1219512195121952
3.9381443298969074
0.09090909090909091
no cycle
0.21568627450980393
no cycle
0.5353535353535354
no cycle
0.07317073170731707
0.17391304347826086
0.41818181818181815
2.4545454545454546
0.38461538461538464
no cycle
22.59259259259259
0.05
1.6292134831460674
no cycle
0.011235955056179775
0.14285714285714285
1.5757575757575757
0.2631578947368421
no cycle
0.7157894736842105
no cycle
2.9166666666666665
no cycle
30.16216216216216
0.27906976744186046
no cycle

KeyboardInterrupt: 