## NCI-PID Node Identifier Reorganization

This notebook modifies the nodes in a network (or networks) in a set of the NCI-PID pathway networks as they were structured in November 2017

This task was motivated by the needs of the CRAVAT/MuPIT application and their copy of the NCI-PID pathways.

(But it is a general improvement for these networks and probably for other PathwayCommons EBS derived networks)

The changes for each node representing:

* Node name = HUGO gene symbol
* represents attribute = NCBI gene id
* Make the former name be an alias


<hr>
Imports, username, password

In [15]:
import ndex2
import json
import requests
my_username = "drh"
my_password = "drh"
my_server = 'public.ndexbio.org'

Network or NetworkSet UUID

In [2]:
uuids = ["6e798e11-6186-11e5-8ac5-06603eb7f303"] # for one or more individually specified networks
set_uuid = None # get uuids from set
# if set_uuid: --- TBD when 

    

mygene.info access function

In [45]:
def query_mygene_x(q, tax_id='9606', entrezonly=True):
    if entrezonly:
        r = requests.get('http://mygene.info/v3/query?q='+q+'&species='+tax_id+'&entrezonly=true')
    else:
        r = requests.get('http://mygene.info/v3/query?q='+q+'&species='+tax_id)
    result = r.json()
    hits = result.get("hits")
    if hits and len(hits) > 0:
        return hits[0]
    return False

def query_batch(query_string, tax_id='9606', scopes="symbol, entrezgene, alias, uniprot", fields="symbol, entrezgene"):
    data = {'species': tax_id,
            'scopes': scopes,
            'fields': fields,
            'q': query_string}
    r = requests.post('http://mygene.info/v3/query', data)
    json = r.json()
    return json

def query_mygene(q):
    hits = query_batch(q)
    for hit in hits:
        symbol = hit.get('symbol')
        id = hit.get('entrezgene')
        if symbol and id:
            return (symbol, id)
    return None

# per node update method
def update_node (node, nicecx):
    print("\nnode %s" % node.get_name())
    aliases = nicecx.get_node_attribute(node, "alias")
    #print("aliases: %s" % aliases)
    # if aliases:
        # aliases.push(name)
    # else:
        # aliases = [name]
    
    hit = query_mygene(node.get_name())
    if hit:
        print("hit: %s" % json.dumps(hit, indent=4))
    else:
        succeed = False
        for alias in aliases:
            # assume uniprot
            id = alias.split(':')[-1]
            hit = query_mygene(id)
            if hit:
                print("hit: %s" % json.dumps(hit, indent=4))
                succeed = True
                break
        if not succeed:
            print("no gene hit for node %s " % node.get_name())

In [46]:
# TBD: create output network set

# iteration over networks
for network_uuid in uuids:
    # load network in NiceCX
    ncx = ndex2.create_nice_cx_from_server(server=my_server, uuid=network_uuid)
    for id, node in ncx.get_nodes():
        update_node(node, ncx)
    # output network (TBD: in output set)
    #print("writing %s " % ncx.get_name())
    #ncx.upload_to(my_server, my_username, my_password)
    


node GTP
hit: [
    "MTG1",
    92170
]

node RHOA
hit: [
    "RHOA",
    387
]

node VCAM1
hit: [
    "VCAM1",
    7412
]

node FAK1
hit: [
    "PTK2",
    5747
]

node ITB1
hit: [
    "ITGB1",
    3688
]

node MADCA
hit: [
    "MADCAM1",
    8174
]

node ITB7
hit: [
    "ITGB7",
    3695
]

node ITA4
hit: [
    "ITGA4",
    3676
]

node PAXI
hit: [
    "PXN",
    5829
]
