In [None]:
# default_exp utils

# Utils

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
import json
import pandas as pd
import ipycytoscape
from collections import OrderedDict

def parse_result_row_graph(jsonobj):
    '''
    Parse Neo4j HTTP API Graph Result row
    
    input: element of result list
    output: tuple of nodes and edges
    '''
    def parse_nodes(gobj):
        node_dict={}
        for n in gobj['nodes']:
            _id=n['id']
            label=n['labels'][0]
            props=n['properties']
            props['label']=label
            
            keys=props.keys()
            headcols=['identifier','name','label']
            orderedkeys=headcols+list(set(keys)-set(headcols))
            orderedobj={k: props[k] for k in orderedkeys}
            
            node_dict[_id]=orderedobj

        return node_dict

    def parse_edges(gobj, node_dict):
        edges=[]
        for r in gobj['relationships']:
            _type=r['type']
            start_id=r['startNode']
            end_id=r['endNode']

            start_node=node_dict[start_id]
            end_node=node_dict[end_id]

            edge_dict={
                'type': _type,
                'start_identifier':start_node['identifier'],
                'start_name': start_node['name'],
                'end_identifier':end_node['identifier'],
                'end_name': end_node['name']
            }

            # inject the rest properties
            propobj=r['properties']
            for k in propobj:
                edge_dict[k]=propobj[k]

            edges.append(edge_dict)

        return edges
    
    gobj=jsonobj['graph']
    
    node_dict=parse_nodes(gobj)    
    nodes=[node_dict[k] for k in node_dict]
    
    edges=parse_edges(gobj, node_dict)

    return nodes, edges

def drop_duplicates(items):
    df=pd.DataFrame(items)
    df=df.drop_duplicates()
    items=df.to_dict(orient='records')
    
    return items

def parse_result_graph(data):
    '''
    Parse Neo4j HTTP API Graph Result
    '''
    nodes=[]
    edges=[]
    for item in data:
        _nodes, _edges = parse_result_row_graph(item)
        nodes=nodes+_nodes
        edges=edges+_edges
    
    nodes=drop_duplicates(nodes)
    edges=drop_duplicates(edges)
    
    return nodes, edges

def graph_renderer(neo4j_output):
    output=neo4j_output['results'][0]
    nodes, edges = parse_result_graph(output['data'])
    output = {'nodes':nodes, 'edges':edges}

    return output

def row_renderer(neo4j_output):
    output=neo4j_output['results'][0]
    cols=output['columns']
    df=pd.DataFrame([e['row'] for e in output['data']], columns=cols)
    output=df.to_dict(orient='records')

    return output

def get_graph_for_ipycytoscape(neo4j_graph_dict):
    g={}
    g['nodes']=[{'data': {
        'id':n['identifier'], 
        'name':n['name']
    }} for n in neo4j_graph_dict['nodes']]
    
    g['edges']=[{'data': {
        'source':e['start_identifier'], 
        'target':e['end_identifier']
    }} for e in neo4j_graph_dict['edges']]
    
    return g

def draw(graphs):
    style = [
        {
            'selector': 'node', 
            'style': {
                'label': 'data(name)',
                'background-color': 'rgb(241, 102, 103)'
            }
        },
        {
            'selector': 'edge', 
            'style': {
                'target-arrow-shape': 'triangle',
                'font-size': '8px',
                'curve-style': 'bezier',
                'label': 'data(dwpc)',
            }
        },
    ]
    widget = ipycytoscape.CytoscapeWidget()
    gs=[get_graph_for_ipycytoscape(g) for g in graphs]
    for g in gs:
        widget.graph.add_graph_from_json(g, directed=True) 
    widget.set_style(style)
    
    return widget    

SyntaxError: invalid syntax (<ipython-input-31-c3b3ceb1c6a3>, line 114)

In [None]:
sample_output=json.loads("""[
{"row": [[{"identifier": "10013", "license": "CCO 1.0", "chromosome": "X", "name": "HDAC6", "description": "histone deacetylase 6", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/10013"}, {"action_score": 943.0, "homology": 0.0, "experiments": 483.0, "textmining_transferred": 642.0, "combined_score": 923.0, "source": "String", "version": "v11.5", "license": "CC BY 4.0", "experiments_transferred": 86.0, "database": 600.0, "unbiased": "False", "action": "catalysis;reaction", "database_transferred": 0.0, "textmining": 0.0}, {"identifier": "3326", "license": "CCO 1.0", "chromosome": "6", "name": "HSP90AB1", "description": "heat shock protein 90 alpha family class B member 1", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/3326"}]], "meta": [[{"id": 6967, "type": "node", "deleted": false}, {"id": 7460908, "type": "relationship", "deleted": false}, {"id": 73199, "type": "node", "deleted": false}]], "graph": {"nodes": [{"id": "6967", "labels": ["Gene"], "properties": {"identifier": "10013", "license": "CCO 1.0", "chromosome": "X", "name": "HDAC6", "description": "histone deacetylase 6", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/10013"}}, {"id": "73199", "labels": ["Gene"], "properties": {"identifier": "3326", "license": "CCO 1.0", "chromosome": "6", "name": "HSP90AB1", "description": "heat shock protein 90 alpha family class B member 1", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/3326"}}], "relationships": [{"id": "7460908", "type": "INTERACTS_GiG2", "startNode": "73199", "endNode": "6967", "properties": {"action_score": 943.0, "homology": 0.0, "experiments": 483.0, "textmining_transferred": 642.0, "combined_score": 923.0, "source": "String", "version": "v11.5", "license": "CC BY 4.0", "experiments_transferred": 86.0, "database": 600.0, "unbiased": "False", "action": "catalysis;reaction", "database_transferred": 0.0, "textmining": 0.0}}]}}
,{"row": [[{"identifier": "10013", "license": "CCO 1.0", "chromosome": "X", "name": "HDAC6", "description": "histone deacetylase 6", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/10013"}, {"action_score": 911.0, "homology": 0.0, "experiments": 457.0, "textmining_transferred": 358.0, "combined_score": 987.0, "source": "String", "version": "v11.5", "license": "CC BY 4.0", "experiments_transferred": 260.0, "database": 800.0, "unbiased": "False", "action": "catalysis;reaction", "database_transferred": 0.0, "textmining": 795.0}, {"identifier": "7415", "license": "CCO 1.0", "chromosome": "9", "name": "VCP", "description": "valosin containing protein", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/7415"}]], "meta": [[{"id": 6967, "type": "node", "deleted": false}, {"id": 7452444, "type": "relationship", "deleted": false}, {"id": 83549, "type": "node", "deleted": false}]], "graph": {"nodes": [{"id": "6967", "labels": ["Gene"], "properties": {"identifier": "10013", "license": "CCO 1.0", "chromosome": "X", "name": "HDAC6", "description": "histone deacetylase 6", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/10013"}}, {"id": "83549", "labels": ["Gene"], "properties": {"identifier": "7415", "license": "CCO 1.0", "chromosome": "9", "name": "VCP", "description": "valosin containing protein", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/7415"}}], "relationships": [{"id": "7452444", "type": "INTERACTS_GiG2", "startNode": "83549", "endNode": "6967", "properties": {"action_score": 911.0, "homology": 0.0, "experiments": 457.0, "textmining_transferred": 358.0, "combined_score": 987.0, "source": "String", "version": "v11.5", "license": "CC BY 4.0", "experiments_transferred": 260.0, "database": 800.0, "unbiased": "False", "action": "catalysis;reaction", "database_transferred": 0.0, "textmining": 795.0}}]}}
,{"row": [[{"identifier": "10013", "license": "CCO 1.0", "chromosome": "X", "name": "HDAC6", "description": "histone deacetylase 6", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/10013"}, {"action_score": 900.0, "homology": 0.0, "experiments": 483.0, "textmining_transferred": 127.0, "combined_score": 955.0, "source": "String", "version": "v11.5", "license": "CC BY 4.0", "experiments_transferred": 136.0, "database": 900.0, "unbiased": "False", "action": "binding", "database_transferred": 0.0, "textmining": 0.0}, {"identifier": "10376", "license": "CCO 1.0", "chromosome": "12", "name": "TUBA1B", "description": "tubulin alpha 1b", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/10376"}]], "meta": [[{"id": 6967, "type": "node", "deleted": false}, {"id": 7402346, "type": "relationship", "deleted": false}, {"id": 50348, "type": "node", "deleted": false}]], "graph": {"nodes": [{"id": "6967", "labels": ["Gene"], "properties": {"identifier": "10013", "license": "CCO 1.0", "chromosome": "X", "name": "HDAC6", "description": "histone deacetylase 6", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/10013"}}, {"id": "50348", "labels": ["Gene"], "properties": {"identifier": "10376", "license": "CCO 1.0", "chromosome": "12", "name": "TUBA1B", "description": "tubulin alpha 1b", "source": "Entrez Gene:210321", "url": "http://identifiers.org/ncbigene/10376"}}], "relationships": [{"id": "7402346", "type": "INTERACTS_GiG2", "startNode": "50348", "endNode": "6967", "properties": {"action_score": 900.0, "homology": 0.0, "experiments": 483.0, "textmining_transferred": 127.0, "combined_score": 955.0, "source": "String", "version": "v11.5", "license": "CC BY 4.0", "experiments_transferred": 136.0, "database": 900.0, "unbiased": "False", "action": "binding", "database_transferred": 0.0, "textmining": 0.0}}]}}
]""")

In [None]:
sample_output['results'][0]

{'columns': ['source_name',
  'target_name',
  'dwpc',
  'pattern',
  'type_pattern',
  'pubmed_ids'],
 'data': [{'row': ['ELL2',
    'LMNA',
    1.586439689767536e-06,
    "(:Gene{identifier:'22936'})-[:INTERACTS_GiG3]->(:Gene{identifier:'5430'})-[:INTERACTS_GiG3]->(:Gene{identifier:'1111'})-[:INTERACTS_GiG3]->(:Gene{identifier:'4000'})",
    'GiG3GiG3GiG3',
    '22137580;22231121;24997600;2944599;9407024;;22012619;22988298;24554720;24662513;25429106;25561469;25712099;21712546;21851590;23911959;24275569;24719451;25850435;27251275;21712546;21851590;23911959;24275569;24719451;25850435;27251275'],
   'meta': [None, None, None, None, None, None],
   'graph': {'nodes': [], 'relationships': []}},
  {'row': ['ELL2',
    'IL7R',
    9.206391529650128e-07,
    "(:Gene{identifier:'22936'})-[:INTERACTS_GiG3]->(:Gene{identifier:'5430'})-[:INTERACTS_GiG3]->(:Gene{identifier:'672'})-[:INTERACTS_GiG3]->(:Gene{identifier:'3575'})",
    'GiG3GiG3GiG3',
    '22137580;22231121;24997600;2944599;9407024;;

## `parse_result_row_graph` Examples

In [None]:
nodes, edges = parse_result_row_graph(sample_output['results'][0]['data'])

TypeError: list indices must be integers or slices, not str

In [None]:
nodes

[{'identifier': '10013',
  'name': 'HDAC6',
  'label': 'Gene',
  'description': 'histone deacetylase 6',
  'source': 'Entrez Gene:210321',
  'license': 'CCO 1.0',
  'chromosome': 'X',
  'url': 'http://identifiers.org/ncbigene/10013'},
 {'identifier': '3326',
  'name': 'HSP90AB1',
  'label': 'Gene',
  'description': 'heat shock protein 90 alpha family class B member 1',
  'source': 'Entrez Gene:210321',
  'license': 'CCO 1.0',
  'chromosome': '6',
  'url': 'http://identifiers.org/ncbigene/3326'}]

In [None]:
edges

[{'type': 'INTERACTS_GiG2',
  'start_identifier': '3326',
  'start_name': 'HSP90AB1',
  'end_identifier': '10013',
  'end_name': 'HDAC6',
  'action_score': 943.0,
  'homology': 0.0,
  'experiments': 483.0,
  'textmining_transferred': 642.0,
  'combined_score': 923.0,
  'source': 'String',
  'version': 'v11.5',
  'license': 'CC BY 4.0',
  'experiments_transferred': 86.0,
  'database': 600.0,
  'unbiased': 'False',
  'action': 'catalysis;reaction',
  'database_transferred': 0.0,
  'textmining': 0.0}]

## `parse_result_row_graph` Tests

In [None]:
assert len(nodes) == 2

In [None]:
assert len(edges) == 1

In [None]:
for node in nodes:
    assert 'identifier' in node, 'identifier must be provided.'
    assert 'name' in node, 'name must be provided.'
    assert 'label' in node, 'label must be provided.'

In [None]:
for edge in edges:
    assert 'start_identifier' in edge, 'start_identifier must be provided.'
    assert 'start_name' in edge, 'start_name must be provided.'
    assert 'end_identifier' in edge, 'end_identifier must be provided.'
    assert 'end_name' in edge, 'end_name must be provided.'
    assert 'type' in edge, 'type must be provided.'

## `parse_result_graph` Example

In [None]:
nodes, edges=parse_result_graph(sample_output)

In [None]:
nodes

[{'identifier': '10013',
  'name': 'HDAC6',
  'label': 'Gene',
  'description': 'histone deacetylase 6',
  'source': 'Entrez Gene:210321',
  'license': 'CCO 1.0',
  'chromosome': 'X',
  'url': 'http://identifiers.org/ncbigene/10013'},
 {'identifier': '3326',
  'name': 'HSP90AB1',
  'label': 'Gene',
  'description': 'heat shock protein 90 alpha family class B member 1',
  'source': 'Entrez Gene:210321',
  'license': 'CCO 1.0',
  'chromosome': '6',
  'url': 'http://identifiers.org/ncbigene/3326'},
 {'identifier': '7415',
  'name': 'VCP',
  'label': 'Gene',
  'description': 'valosin containing protein',
  'source': 'Entrez Gene:210321',
  'license': 'CCO 1.0',
  'chromosome': '9',
  'url': 'http://identifiers.org/ncbigene/7415'},
 {'identifier': '10376',
  'name': 'TUBA1B',
  'label': 'Gene',
  'description': 'tubulin alpha 1b',
  'source': 'Entrez Gene:210321',
  'license': 'CCO 1.0',
  'chromosome': '12',
  'url': 'http://identifiers.org/ncbigene/10376'}]

In [None]:
edges

[{'type': 'INTERACTS_GiG2',
  'start_identifier': '3326',
  'start_name': 'HSP90AB1',
  'end_identifier': '10013',
  'end_name': 'HDAC6',
  'action_score': 943.0,
  'homology': 0.0,
  'experiments': 483.0,
  'textmining_transferred': 642.0,
  'combined_score': 923.0,
  'source': 'String',
  'version': 'v11.5',
  'license': 'CC BY 4.0',
  'experiments_transferred': 86.0,
  'database': 600.0,
  'unbiased': 'False',
  'action': 'catalysis;reaction',
  'database_transferred': 0.0,
  'textmining': 0.0},
 {'type': 'INTERACTS_GiG2',
  'start_identifier': '7415',
  'start_name': 'VCP',
  'end_identifier': '10013',
  'end_name': 'HDAC6',
  'action_score': 911.0,
  'homology': 0.0,
  'experiments': 457.0,
  'textmining_transferred': 358.0,
  'combined_score': 987.0,
  'source': 'String',
  'version': 'v11.5',
  'license': 'CC BY 4.0',
  'experiments_transferred': 260.0,
  'database': 800.0,
  'unbiased': 'False',
  'action': 'catalysis;reaction',
  'database_transferred': 0.0,
  'textmining': 79

In [None]:
df=pd.DataFrame(nodes)
df

Unnamed: 0,identifier,name,label,description,source,license,chromosome,url
0,10013,HDAC6,Gene,histone deacetylase 6,Entrez Gene:210321,CCO 1.0,X,http://identifiers.org/ncbigene/10013
1,3326,HSP90AB1,Gene,heat shock protein 90 alpha family class B mem...,Entrez Gene:210321,CCO 1.0,6,http://identifiers.org/ncbigene/3326
2,7415,VCP,Gene,valosin containing protein,Entrez Gene:210321,CCO 1.0,9,http://identifiers.org/ncbigene/7415
3,10376,TUBA1B,Gene,tubulin alpha 1b,Entrez Gene:210321,CCO 1.0,12,http://identifiers.org/ncbigene/10376


In [None]:
pd.DataFrame(edges)

Unnamed: 0,type,start_identifier,start_name,end_identifier,end_name,action_score,homology,experiments,textmining_transferred,combined_score,source,version,license,experiments_transferred,database,unbiased,action,database_transferred,textmining
0,INTERACTS_GiG2,3326,HSP90AB1,10013,HDAC6,943.0,0.0,483.0,642.0,923.0,String,v11.5,CC BY 4.0,86.0,600.0,False,catalysis;reaction,0.0,0.0
1,INTERACTS_GiG2,7415,VCP,10013,HDAC6,911.0,0.0,457.0,358.0,987.0,String,v11.5,CC BY 4.0,260.0,800.0,False,catalysis;reaction,0.0,795.0
2,INTERACTS_GiG2,10376,TUBA1B,10013,HDAC6,900.0,0.0,483.0,127.0,955.0,String,v11.5,CC BY 4.0,136.0,900.0,False,binding,0.0,0.0


## `parse_result_graph` Test

In [None]:
assert len(nodes) == 4 
assert len(edges) == 3

## `graph_renderer` Test

## `row_renderer` Test