In [1]:
from pyTigerGraph import TigerGraphConnection
import pyTigerGraph as tg
import json
import torch
import numpy as np
import pandas as pd
import warnings
import torch
import torch.nn.functional as F
from pyTigerGraph.gds.metrics import Accumulator, Accuracy
import matplotlib.pyplot as plt

In [23]:
def connection():
    with open("../../configs/tigergraph_config_1.json", "r") as config:
        args = json.load(config)
    
    conn = tg.TigerGraphConnection(
        host=args["host"],
        graphname=args["graphname"],
        username=args["username"],
        password=args["password"],
        gsqlSecret=args["gsqlSecret"],
        certPath=args["certPath"]
    )

    if not connected(conn):
        raise ConnectionError("Failed to connect to GSQL")
    return conn

def connected(conn):
    if not conn.echo() == "Hello GSQL":
        return False
    return True

In [24]:
conn = connection()

In [25]:
# https://dev.tigergraph.com/forum/t/tigergraph-python-connection-issue/2776
# echo | openssl s_client  -connect 140ae89e8e714a18a7a51df2beca1811.i.tgcloud.io:443 |  sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' > 'C:\Users\ryand\DSC180B\dsc180b-project\my-cert.txt'
# have to put my-cert.txt in C:\User\ryand\.gsql\my-cert.txt
print(conn.gsql("LS"))

---- Global vertices, edges, and all graphs
Vertex Types:
- VERTEX Specialty(PRIMARY_ID id STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="false"
- VERTEX SubSpecialty(PRIMARY_ID id STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="false"
- VERTEX Prescriber(PRIMARY_ID Prescriber_id STRING, pageRank FLOAT, communityId INT, articleRank FLOAT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="false"
- VERTEX Claim(PRIMARY_ID Claim_id STRING, rx_fill_date DATETIME, ICD10Code STRING, ICD10CodeDescription STRING, CodeGroupTitle STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="false"
- VERTEX Patient(PRIMARY_ID Patient_id STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="false"
Edge Types:
- DIRECTED EDGE submitted_by(FROM Claim, TO Prescriber) WITH REVERSE_EDGE="reverse_submitted_by"
- DIRECTED EDGE reverse_submitted_by(FROM Prescriber, TO Claim) WITH REVERSE_EDGE="submitted_by"
- DIRECTED EDGE associa

In [28]:
with open("../../configs/tigergraph_config_1.json", "r") as config:
        args = json.load(config)

conn.getToken(args['gsqlSecret'])

('ribbnnl33rik6d729koph9u4u2l7aq8h', 1678218479, '2023-03-07 19:47:59')

In [29]:
# Number of vertices for every vertex type
conn.getVertexCount('*')

{'Paper': 2708}

In [30]:
# Number of edges for every type
conn.getEdgeCount()

{'Cite': 10556}

## Feature Engineering

In [31]:
f = conn.gds.featurizer()
f.installAlgorithm("tg_pagerank")
f.installAlgorithm("tg_closeness_cent")
f.installAlgorithm("tg_betweenness_cent")
# f.installAlgorithm("tg_label_prop")
# f.installAlgorithm("tg_maximal_indep_set")

Installing and optimizing the queries, it might take a minute
Installing and optimizing the queries, it might take a minute
Installing and optimizing the queries, it might take a minute


'tg_betweenness_cent'

In [17]:
# only combination that works on our current data
# how influential a comment is
tg_pagerank_params = {
  "v_type": "Comment",
  "e_type": "replied_to",
  "result_attr": "pagerank",
}
results = pd.json_normalize(f.runAlgorithm("tg_pagerank",tg_pagerank_params)[0]['@@top_scores_heap'])
results

Unnamed: 0,Vertex_ID,score
0,c1920o8,41.80026
1,c19z9dl,41.12798
2,c18dyjj,34.89922
3,c18dqrr,32.40517
4,c18gp4r,30.50136
...,...,...
95,c19zapt,10.75795
96,c17jgqc,10.70455
97,c1a2kjk,10.69053
98,c193cb5,10.58636


In [42]:
tg_pagerank_params = {
  "v_type": "Paper",
  "e_type": "Cite",
  "result_attr": "pagerank",
}
results = pd.json_normalize(f.runAlgorithm("tg_pagerank",tg_pagerank_params)[0]['@@top_scores_heap'])
results

Unnamed: 0,Vertex_ID,score
0,1358,33.06401
1,1701,16.89220
2,1986,14.46646
3,306,13.72520
4,1810,9.81973
...,...,...
95,2238,2.28008
96,210,2.27165
97,702,2.26007
98,364,2.25291


In [22]:
tg_closeness_params = {
    "v_type": "User",
    "e_type": ["replied_to", "reverse_wrote"],
    "reverse_e_type": ["reverse_replied_to"],
    "result_attr": "closeness"
}
results = pd.json_normalize(f.runAlgorithm("tg_closeness_cent", tg_closeness_params))
results
f.runAlgorithm("tg_closeness_cent", tg_closeness_params)

HTTPError: 422 Client Error: query is disabled for url: https://scott-ucds-hc.i.tgcloud.io:443/restpp/query/Reddit/tg_closeness_cent

In [39]:
tg_closeness_params = {
    "v_type": "Paper",
    "e_type": "Cite",
    "reverse_e_type": "Cite",
    "result_attr": "closeness",
    "top_k": 100
}
# results = pd.json_normalize(f.runAlgorithm("tg_closeness_cent", tg_closeness_params))
# results
f.runAlgorithm("tg_closeness_cent", tg_closeness_params)

[{'top_scores': []}]

In [20]:
tg_betweenness_params = {
    "v_type": "Comment",
    "e_type": ["replied_to"],
    "reverse_e_type": ["reverse_replied_to"],
    "result_attr": "betweenness"
}
results = pd.json_normalize(f.runAlgorithm("tg_betweenness_cent", tg_betweenness_params))
results

HTTPError: 422 Client Error: query is disabled for url: https://scott-ucds-hc.i.tgcloud.io:443/restpp/query/Reddit/tg_betweenness_cent

In [44]:
tg_betweenness_params = {
    "v_type": "Paper",
    "e_type": "Cite",
    "reverse_e_type": "Cite",
    "result_attr": "betweenness"
}
# results = pd.json_normalize(f.runAlgorithm("tg_betweenness_cent", tg_betweenness_params))
# results
f.runAlgorithm("tg_betweenness_cent", tg_betweenness_params)

  and should_run_async(code)


[{'top_scores': []}]

## Data Loader

In [44]:
graph_loader = conn.gds.graphLoader(
    num_batches=1,
    v_extra_feats=["x"],
    e_extra_feats=[],
    output_format = "PyG")

Installing and optimizing queries. It might take a minute if this is the first time you use this loader.
Query installation finished.
