In [1]:
import pyTigerGraph as tg 
import cfg
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import pyTigerGraph as tg
import dgl
import networkx as nx
from heapq import nlargest, nsmallest

from gcn import GCN

In [2]:
conn = tg.TigerGraphConnection(ipAddress="https://crunchml.i.tgcloud.io", graphname="CrunchBasePre_2013", password=cfg.password, apiToken=cfg.token)

In [4]:
print(conn.runInstalledQuery("helpCheckIPO", {"norm_name":"linkedin"}))

results = conn.runInstalledQuery("companyLinks", {}, sizeLimit=512000000, timeout=320000)["results"][0]["@@tupleRecords"]
print(results[:3])
   



{'version': {'edition': 'enterprise', 'api': 'v2', 'schema': 0}, 'error': False, 'message': '', 'results': [{'result': True}]}
[{'src': 'FootballUnited', 'dest': 'Phuser'}, {'src': 'MorningPapers.com', 'dest': 'Phuser'}, {'src': 'Phuser', 'dest': 'FootballUnited'}]


In [5]:
sample = random.choices(results, k=200)

compToNum = {} # translation dictionary for company name to number (for dgl)
numToComp = {} # translation dictionary for number to company name
i = 0
def createEdgeList(result): # returns tuple of number version of edge
    global i
    if result["src"] in compToNum:
        fromKey = compToNum[result["src"]]
    else:
        compToNum[result["src"]] = i
        numToComp[i] = result["src"]
        fromKey = i
        i+=1
    if result["dest"] in compToNum:
        toKey = compToNum[result["dest"]]
    else:
        compToNum[result["dest"]] = i
        numToComp[i] = result["dest"]
        toKey = i
        i+=1
    return (fromKey, toKey)

edges = [createEdgeList(thing) for thing in sample]
print(edges[:5])

[(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)]


In [6]:
numEpochs = 200
learningRate = 0.01

In [7]:
g = nx.Graph()
g.add_edges_from(edges)


G = dgl.DGLGraph(g)

In [8]:
G.ndata["feat"] = torch.eye(G.number_of_nodes())

print(G.nodes[2].data['feat'])


tensor([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0.,

In [9]:
compIPO = 0
compNonIPO = 0
i = 0
while((not(compIPO) or not(compNonIPO)) and (i<G.number_of_nodes())):
    result = conn.runInstalledQuery("checkIPO", {"norm_name":numToComp[i]})["results"][0]["result"]
    if result == True:
        compIPO = i
    else:
        compNonIPO = i
    i += 1

In [10]:
net = GCN(G.number_of_nodes(), 16, 2) #Two layer GCN
inputs = G.ndata["feat"]
labeled_nodes = torch.tensor([compNonIPO, compIPO])  # only the liked movies and the disliked movies are labelled
labels = torch.tensor([0, 1])  # their labels are different
optimizer = torch.optim.Adam(net.parameters(), lr=learningRate)

In [16]:
all_logits = []
for epoch in range(numEpochs):
    logits = net(G, inputs)
    # we save the logits for visualization later
    all_logits.append(logits.detach())
    logp = F.log_softmax(logits, 1)
    # we only compute loss for labeled nodes
    loss = F.nll_loss(logp[labeled_nodes], labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print('Epoch %d | Loss: %6.3e' % (epoch, loss.item()))

Epoch 0 | Loss: 1.250e-04
Epoch 1 | Loss: 1.246e-04
Epoch 2 | Loss: 1.243e-04
Epoch 3 | Loss: 1.239e-04
Epoch 4 | Loss: 1.236e-04
Epoch 5 | Loss: 1.231e-04
Epoch 6 | Loss: 1.228e-04
Epoch 7 | Loss: 1.225e-04
Epoch 8 | Loss: 1.221e-04
Epoch 9 | Loss: 1.218e-04
Epoch 10 | Loss: 1.215e-04
Epoch 11 | Loss: 1.210e-04
Epoch 12 | Loss: 1.208e-04
Epoch 13 | Loss: 1.204e-04
Epoch 14 | Loss: 1.200e-04
Epoch 15 | Loss: 1.197e-04
Epoch 16 | Loss: 1.194e-04
Epoch 17 | Loss: 1.191e-04
Epoch 18 | Loss: 1.187e-04
Epoch 19 | Loss: 1.184e-04
Epoch 20 | Loss: 1.181e-04
Epoch 21 | Loss: 1.177e-04
Epoch 22 | Loss: 1.174e-04
Epoch 23 | Loss: 1.171e-04
Epoch 24 | Loss: 1.167e-04
Epoch 25 | Loss: 1.164e-04
Epoch 26 | Loss: 1.161e-04
Epoch 27 | Loss: 1.157e-04
Epoch 28 | Loss: 1.154e-04
Epoch 29 | Loss: 1.151e-04
Epoch 30 | Loss: 1.148e-04
Epoch 31 | Loss: 1.145e-04
Epoch 32 | Loss: 1.142e-04
Epoch 33 | Loss: 1.139e-04
Epoch 34 | Loss: 1.135e-04
Epoch 35 | Loss: 1.132e-04
Epoch 36 | Loss: 1.129e-04
Epoch 37 | 

In [17]:
predictions = list(all_logits[numEpochs-1])
predictIPO = []
predictNonIPO = []

a=0
for company in predictions:
    if company[1] >= company[0]:
        predictIPO.append(numToComp[a])
    else:
        predictNonIPO.append(numToComp[a])
    a += 1

trueIPO = 0
falseIPO = 0
trueNonIPO = 0
falseNonIPO = 0


print(len(predictIPO))
for prediction in predictIPO:
    result = conn.runInstalledQuery("checkIPO", {"norm_name":prediction})["results"][0]["result"]
    if result == True:
        trueIPO += 1
    else:
        falseIPO += 1

print("True IPO: ", trueIPO)
print("False IPO: ", falseIPO)

print(len(predictNonIPO))
for prediction in predictNonIPO:
    result = conn.runInstalledQuery("checkIPO", {"norm_name":prediction})["results"][0]["result"]
    if result == False:
        trueNonIPO += 1
    else:
        falseNonIPO += 1
print("True Non-IPO: ", trueNonIPO)
print("False Non-IPO: ", falseNonIPO)

267
True IPO:  1
False IPO:  266
128
True Non-IPO:  128
False Non-IPO:  0


In [18]:
accuracy = (trueNonIPO+trueIPO)/(len(predictions))
print(accuracy)

0.3265822784810127
