In [1]:
import pyTigerGraph as tg 
import cfg
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import pyTigerGraph as tg
import dgl
import networkx as nx
from heapq import nlargest, nsmallest

from gcn import GCN

In [2]:
conn = tg.TigerGraphConnection(ipAddress="https://crunchml.i.tgcloud.io", graphname="CrunchBasePre_2013", password=cfg.password, apiToken=cfg.token)

In [3]:
results = conn.runInstalledQuery("companyLinks", {}, sizeLimit=300000000)["results"][0]["@@tupleRecords"]
print(results[:3])
sample = random.choices(results, k=5000)


[{'src': 'footballunited', 'dest': 'phuser'}, {'src': 'morningpapers', 'dest': 'phuser'}, {'src': 'phuser', 'dest': 'footballunited'}]


In [4]:
compToNum = {} # translation dictionary for company name to number (for dgl)
numToComp = {} # translation dictionary for number to company name
i = 0
def createEdgeList(result): # returns tuple of number version of edge
    global i
    if result["src"] in compToNum:
        fromKey = compToNum[result["src"]]
    else:
        compToNum[result["src"]] = i
        numToComp[i] = result["src"]
        fromKey = i
        i+=1
    if result["dest"] in compToNum:
        toKey = compToNum[result["dest"]]
    else:
        compToNum[result["dest"]] = i
        numToComp[i] = result["dest"]
        toKey = i
        i+=1
    return (fromKey, toKey)

edges = [createEdgeList(thing) for thing in sample]
print(edges[:5])

[(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]


In [15]:
numEpochs = 100
learningRate = 0.01

In [5]:
g = nx.Graph()
g.add_edges_from(edges)


G = dgl.DGLGraph(g)

In [6]:
G.ndata["feat"] = torch.eye(G.number_of_nodes())

print(G.nodes[2].data['feat'])


tensor([[0., 0., 1.,  ..., 0., 0., 0.]])


In [8]:
compIPO = 0
compNonIPO = 0
i = 0
while((not(compIPO) or not(compNonIPO)) and (i<G.number_of_nodes())):
    result = conn.runInstalledQuery("checkIPO", {"norm_name":numToComp[i]})["results"][0]["result"]
    if result == True:
        compIPO = i
    else:
        compNonIPO = i
    i += 1

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
True


In [11]:
net = GCN(G.number_of_nodes(), 20, 2) #Two layer GCN
inputs = G.ndata["feat"]
labeled_nodes = torch.tensor([compNonIPO, compIPO])  # only the liked movies and the disliked movies are labelled
labels = torch.tensor([0, 1])  # their labels are different
optimizer = torch.optim.Adam(net.parameters(), lr=learningRate)

In [16]:
all_logits = []
for epoch in range(numEpochs):
    logits = net(G, inputs)
    # we save the logits for visualization later
    all_logits.append(logits.detach())
    logp = F.log_softmax(logits, 1)
    # we only compute loss for labeled nodes
    loss = F.nll_loss(logp[labeled_nodes], labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print('Epoch %d | Loss: %6.3e' % (epoch, loss.item()))

Epoch 0 | Loss: 1.827e-01
Epoch 1 | Loss: 1.632e-01
Epoch 2 | Loss: 1.453e-01
Epoch 3 | Loss: 1.289e-01
Epoch 4 | Loss: 1.141e-01
Epoch 5 | Loss: 1.009e-01
Epoch 6 | Loss: 8.925e-02
Epoch 7 | Loss: 7.887e-02
Epoch 8 | Loss: 6.967e-02
Epoch 9 | Loss: 6.160e-02
Epoch 10 | Loss: 5.461e-02
Epoch 11 | Loss: 4.848e-02
Epoch 12 | Loss: 4.311e-02
Epoch 13 | Loss: 3.842e-02
Epoch 14 | Loss: 3.437e-02
Epoch 15 | Loss: 3.085e-02
Epoch 16 | Loss: 2.776e-02
Epoch 17 | Loss: 2.506e-02
Epoch 18 | Loss: 2.271e-02
Epoch 19 | Loss: 2.067e-02
Epoch 20 | Loss: 1.888e-02
Epoch 21 | Loss: 1.730e-02
Epoch 22 | Loss: 1.591e-02
Epoch 23 | Loss: 1.468e-02
Epoch 24 | Loss: 1.360e-02
Epoch 25 | Loss: 1.264e-02
Epoch 26 | Loss: 1.179e-02
Epoch 27 | Loss: 1.104e-02
Epoch 28 | Loss: 1.036e-02
Epoch 29 | Loss: 9.747e-03
Epoch 30 | Loss: 9.200e-03
Epoch 31 | Loss: 8.710e-03
Epoch 32 | Loss: 8.266e-03
Epoch 33 | Loss: 7.864e-03
Epoch 34 | Loss: 7.496e-03
Epoch 35 | Loss: 7.161e-03
Epoch 36 | Loss: 6.856e-03
Epoch 37 | 

In [24]:
predictions = list(all_logits[numEpochs-1])
predictIPO = []
predictNonIPO = []

a=0
for company in predictions:
    if company[1] >= company[0]:
        predictIPO.append(numToComp[a])
    else:
        predictNonIPO.append(numToComp[a])
    a += 1

trueIPO = 0
falseIPO = 0
trueNonIPO = 0
falseNonIPO = 0


print(len(predictIPO))
for prediction in predictIPO:
    result = conn.runInstalledQuery("checkIPO", {"norm_name":prediction})["results"][0]["result"]
    if result == True:
        trueIPO += 1
    else:
        falseIPO += 1

print("True IPO: ", trueIPO)
print("False IPO: ", falseIPO)

print(len(predictNonIPO))
for prediction in predictNonIPO:
    result = conn.runInstalledQuery("checkIPO", {"norm_name":prediction})["results"][0]["result"]
    if result == False:
        trueNonIPO += 1
    else:
        falseNonIPO += 1
print("True Non-IPO: ", trueNonIPO)
print("False Non-IPO: ", falseNonIPO)

888
True IPO:  17
False IPO:  871
3939
True Non-IPO:  43
False Non-IPO:  3896


In [28]:
accuracy = (trueNonIPO+trueIPO)/(len(predictions))
print(accuracy)

0.8106484358814999
