# Sample queries exploring Graph structure in GEMD database


In [111]:
import SciServer.CasJobs as cj
from pprint import pprint
import json
import pandas

In [112]:
DATABASE='GEMD'

count for each node how many distinct nodes can be reached from it

In [113]:
%%time
sql="""
with gr as (
select c.uid as root_uid
,      c.gemd_type as root_type
,      0 as level
,      cast(NULL as varchar(64)) as endpoint_uid
,      c.uid as from_uid, cast(NULL as bigint) as edge_id, cast(NULL as varchar(64)) as gemd_ref
,      cast(gemd_type+c.uid as varchar(max)) as [path]
  from GEMDObject c
 union all
select gr.root_uid, gr.root_type, gr.level+1, e.to_uid
,      e.to_uid, e.id, e.gemd_ref
,      gr.path+'==>'+e.gemd_ref+':'+e.to_uid
  from gr
  join GEMDEdge e on e.from_uid=gr.from_uid
where gr.level < 16
)
select root_uid, root_type, count(distinct endpoint_uid) as num_out_nodes
  from gr
group by root_type, root_uid
 order by num_out_nodes desc
"""
nodes_out=cj.executeQuery(sql, DATABASE)

CPU times: user 967 ms, sys: 140 ms, total: 1.11 s
Wall time: 17.3 s


In [114]:
nodes_out.head(10)

Unnamed: 0,root_uid,root_type,num_out_nodes
0,adaa778f-62d8-476f-a774-4a3877a53d05,ingredient_run,13
1,e190aa30-a7d1-44c1-89a6-3262939803c4,ingredient_run,13
2,daea63f1-9ca9-4027-9c03-a1ac6c178342,ingredient_run,13
3,b8610aac-5b37-4b22-b17b-00262286fce7,ingredient_run,13
4,7d62501f-7a29-4c85-9fab-a869f6923bb2,ingredient_run,13
5,9cab251d-ff37-428c-97e2-77c725e825d6,ingredient_run,13
6,3ccc45be-c8b0-455c-a8e6-1f07472ff40c,ingredient_run,13
7,0b6a2404-634d-4868-be85-5fa90597b7c5,ingredient_run,13
8,300dc1dc-0fb0-40c5-a690-3a8c6545404d,ingredient_run,13
9,04b9166d-5cb6-4a0d-9e83-383a3fbfd6ab,ingredient_run,13


count for each node how many distinct nodes can reach it

In [115]:
%%time
sql="""
with gr as (
select c.uid as root_uid
,      c.gemd_type as root_type
,      0 as level
,      cast(NULL as varchar(64)) as endpoint_uid
,      c.uid as from_uid, cast(NULL as bigint) as edge_id, cast(NULL as varchar(64)) as gemd_ref
,      cast(gemd_type+c.uid as varchar(max)) as [path]
  from GEMDObject c
 union all
select gr.root_uid, gr.root_type, gr.level+1, e.to_uid
,      e.to_uid, e.id, e.gemd_ref
,      gr.path+'==>'+e.gemd_ref+':'+e.to_uid
  from gr
  join GEMDEdge e on e.from_uid=gr.from_uid
where gr.level < 16
)
select endpoint_uid, count(distinct root_uid) as num_in_nodes
  from gr
group by endpoint_uid
 order by num_in_nodes desc
"""
nodes_in=cj.executeQuery(sql, DATABASE)

CPU times: user 128 ms, sys: 38.7 ms, total: 166 ms
Wall time: 33.7 s


## visualize
Goal is to get a visualization vapability ala that supported by 
<a href="http://www.thebrain.com" target="_blank">http://www.thebrain.com</a>. \
This allows one to trace edges from a starting node, restricting views to only that node and nodes 1 or 2 steps away. \
Needs code development!

In [116]:
import networkx as nx
G = nx.Graph()

take a node from nodes_out and find the graph reachable from it

In [117]:
UID = nodes_out.iloc[0].root_uid
UID
# UID = '01b44457-6ebe-43bc-b316-66fcff35957e'

'adaa778f-62d8-476f-a774-4a3877a53d05'

In [118]:
sql=f"""
with gr as (
select c.uid as node_uid
,      c.gemd_type as node_type
,      c.context as node_context
,      cast(c.gemd_type+' ['+c.uid+']' as varchar(128)) as Target
,      cast(-1 as bigint) as edge_id
,      cast(NULL as varchar(64)) as from_uid
,      cast(NULL as varchar(128)) as Source
,      0 as level
  from GEMDObject c
  where uid='{UID}'
 union all
select c.uid as node_uid
,      c.gemd_type as node_type
,      c.context as node_context
,      cast(c.gemd_type+' ['+c.uid+']' as varchar(128)) as Target
,      e.id as edge_id
,      gr.node_uid as from_uid
,      gr.Target as Source
,      gr.level+1 as level
  from gr
  join GEMDEdge e on e.from_uid=gr.node_uid
  join GEMDObject c on c.uid=e.to_uid
where gr.level < 16
)
select Source,Target
--,node_type,node_context
  from gr
 where Source is not null
"""
gr=cj.executeQuery(sql,DATABASE)
gr

Unnamed: 0,Source,Target
0,ingredient_run [adaa778f-62d8-476f-a774-4a3877...,material_run [5c6e208a-7c91-49d5-a6ba-023911c3...
1,ingredient_run [adaa778f-62d8-476f-a774-4a3877...,ingredient_spec [276a2071-594e-4c36-bf81-a867c...
2,ingredient_run [adaa778f-62d8-476f-a774-4a3877...,process_run [58d4bf30-de0d-4972-8600-7dd33e71b...
3,process_run [58d4bf30-de0d-4972-8600-7dd33e71b...,process_spec [5c649b97-6770-487e-834d-c228a314...
4,process_spec [5c649b97-6770-487e-834d-c228a314...,process_template [ee2196b4-b0eb-4c47-b2ff-4ba3...
5,ingredient_spec [276a2071-594e-4c36-bf81-a867c...,material_spec [7595a295-533f-406a-8b0a-eccd4b1...
6,ingredient_spec [276a2071-594e-4c36-bf81-a867c...,process_spec [5c649b97-6770-487e-834d-c228a314...
7,process_spec [5c649b97-6770-487e-834d-c228a314...,process_template [ee2196b4-b0eb-4c47-b2ff-4ba3...
8,material_spec [7595a295-533f-406a-8b0a-eccd4b1...,process_spec [ed82e185-d722-4b3a-a72a-5c80d42a...
9,material_spec [7595a295-533f-406a-8b0a-eccd4b1...,material_template [44b8ff23-00dd-4551-8196-68c...


In [119]:
UID = 'adaa778f-62d8-476f-a774-4a3877a53d05'
sql=f"""
declare @uid varchar(40) = '{UID}'
;
with gr as (
select cast(NULL as varchar(64)) as Source_uid
,      cast(NULL as varchar(32)) as source_type
,      c.uid Target_uid
,      c.gemd_type as Target_type
,      cast(NULL as bigint) as edge_id
,      cast(NULL as varchar(64)) as gemd_ref 
,      0 as level
  from GEMDObject c
  where uid=@UID
 union all
select gr.Target_uid as Source_uid
,      gr.target_type as source_type
,      c.uid as Target_uid
,      c.gemd_type as target_type
,      e.id as edge_id
,      e.gemd_ref
,      gr.level+1 as level
  from gr
  join GEMDEdge e on e.from_uid=gr.Target_uid
  join GEMDObject c on c.uid=e.to_uid
where gr.level < 16
), edges as (
select Source_uid,Target_uid, source_type, target_type,gemd_ref, level
  from gr
 where Source_uid is not null
)
, nodes as (
select source_uid as node_uid, source_type as node_type
  from edges 
union
select Target_uid , target_type
from edges
)
select n.node_uid, n.node_type, max(a.attributes ) as attributes
,     (select e.target_uid , e.gemd_ref
          from edges e
         where e.source_uid=n.node_uid
           for json path) as out_edges
  from nodes n
  join GEMDObjectAttributes a on a.uid=n.node_uid
group by node_uid,node_type
"""
df = cj.executeQuery(sql,"GEMD",format='pandas')

In [120]:
nodes = []
edges = {}
nodes_d = {}
for t in df.itertuples():
    atts = {"node_type":t.node_type}
    if t.attributes is not None and len(t.attributes.strip()) > 0:
        tatts = json.loads(t.attributes)
        atts.update(tatts)
    LABEL = t.node_type+':'+t.node_uid
    node = (LABEL,atts)
    nodes.append(node)
    nodes_d[t.node_uid] = node
    edges[t.node_uid] = t.out_edges

In [121]:
all_edges = []
for k,node in nodes_d.items():
    try:
        out_edges = json.loads(edges[k])
        oe = [(node[0],nodes_d[e['target_uid']][0]) for e in out_edges]
        all_edges+=oe
    except:
        pass

In [122]:
G = nx.DiGraph()
G.add_nodes_from(nodes)
G.add_edges_from(all_edges)

In [123]:
GraphWidget(graph=G)

GraphWidget(layout=Layout(height='500px', width='100%'))

In [18]:
G=nx.from_pandas_edgelist(gr,'Source','Target',create_using=nx.DiGraph)#,'node_type')

# yfiles_jupyter_graphs

In [None]:
%pip install yfiles_jupyter_graphs

In [1]:
%pip install igraph

Note: you may need to restart the kernel to use updated packages.


In [2]:
from yfiles_jupyter_graphs import GraphWidget

In [12]:
from datetime import datetime
import string
import random

In [9]:
datetime.now()

datetime.datetime(2023, 6, 22, 10, 19, 14, 350645)

In [3]:
import networkx  as nx #import Graph, DiGraph, MultiGraph, MultiDiGraph, path_graph
import igraph

In [51]:
# def make_graph(graph_class):
G = nx.DiGraph()

In [43]:
for i in range(1,10):
    G.add_node(f"foo-{i}", time=datetime.now(), name=''.join(random.choices(string.ascii_uppercase + string.digits, k=10)))
    # G.add_node((f"foo-{i}", {"time":datetime.now(), "name":''.join(random.choices(string.ascii_uppercase + string.digits, k=10))}))
for n in G.nodes:
    G.add_edges_from([(n,f"foo-{o}") for o in random.sample(range(1,10),3)])

In [50]:
nodes = [(f"foo-{i}", {"time":datetime.now(), 
                       ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)):i}) for i in range(1,10)]

In [52]:
G.add_nodes_from(nodes)

In [20]:
for n in G.nodes:
    G.add_edges_from([(n,f"foo-{o}") for o in random.sample(range(1,10),3)])

In [53]:
GraphWidget(graph = G)

GraphWidget(layout=Layout(height='500px', width='100%'))

In [4]:
        
    G.add_nodes_from([2, 3])
    H = nx.path_graph(10)
    G.add_nodes_from(H)
    G.add_edge(1, 2)
    G.add_edges_from([(1, 2), (1, 3)])
    G.add_edges_from(H.edges)
    G.add_node(1, time="5pm")
    G.add_nodes_from([3], time="2pm")
    for n in G.nodes:
        G.add_edge("origin", n)
    G.nodes[1]["room"] = 714
    G.add_edge(1, 2, weight=4.7)
    G.add_edges_from([(3, 4), (4, 5)], color="red")
    G.add_edges_from([(1, 2, {"color": "blue"}), (2, 3, {"weight": 8})])
    G[1][2]["weight"] = 4.7
    G.edges[1, 2]["weight"] = 4
    return G

In [5]:
nxG = make_graph(nx.Graph)

In [6]:
GraphWidget(graph = nxG)

GraphWidget(layout=Layout(height='500px', width='100%'))

# neo4j graph

In [20]:
import neo4j
import neo4j.graph as ng

In [29]:
nG = ng.Graph()

In [30]:
n1 = ng.Node(nG,"1","1")
n2 = ng.Node(nG,"2","2")

In [31]:
for n in nG.nodes:
    print(n)
   

In [19]:
nG.__dict__#nodes["1"] = ng.Node(nG,"1","1")

{'_nodes': {},
 '_legacy_nodes': {},
 '_relationships': {},
 '_legacy_relationships': {},
 '_relationship_types': {},
 '_node_set_view': <neo4j.graph.EntitySetView at 0x7f83fb998550>,
 '_relationship_set_view': <neo4j.graph.EntitySetView at 0x7f83fb998460>}

In [22]:
neo4j.Graph()

AttributeError: module neo4j has no attribute Graph

# igraph

In [32]:
import igraph

In [37]:
f = igraph.Graph([(0,1), (0,2), (2,3), (3,4), (4,2), (2,5), (5,0), (6,3), (5,6)])
f.vs["id"] = ["Alice", "Bob", "Claire", "Dennis", "Esther", "Frank", "George"]
f.vs["name"] = ["Alice", "Bob", "Claire", "Dennis", "Esther", "Frank", "George"]
f.vs["age"] = [25, 31, 18, 47, 22, 23, 50]
f.vs["gender"] = ["f", "m", "f", "m", "f", "m", "m"]
f.es["is_formal"] = [False, False, True, True, True, False, True, False, False]

In [38]:
GraphWidget(graph = f)

GraphWidget(layout=Layout(height='500px', width='100%'))