# SIGNOR Pipeline

This notebook creates a Network Set of SIGNOR pathway networks using the latest data downloaded via the SIGNOR REST service. SIGNOR is updated regularly and so it also demonstrates how to update the networks in the set.

## Imports Python Packages Needed in the Pipeline

In [88]:
import ndex2 # The ndex2 Python client
import itertools # convenient iteration utilities 
import requests
import json
import pandas as pd
import io
from datetime import datetime
import networkx as nx
print("hello")

hello


## NDEx Access

Check access to NDEx via your account and password. 

In [48]:
from os.path import isfile, expanduser
my_server = "public.ndexbio.org"
my_username = None
my_password = None
my_ndex = None    
config_file = expanduser("~/ndex_tutorial_config.json")
save_tutorial_networks_to_my_account = True

if(isfile(config_file)):
    file = open(config_file, "r")
    data = json.load(file)
    file.close()
    if data.get("password") and data.get("username"):
        my_username = data.get("username")
        my_password = data.get("password")
    else:
        print("Error: " + config_file + " does not define username and password")
else:
    print("Error: " + config_file + " was not found")

    #
try:
    # Test the connection
    my_ndex=ndex2.client.Ndex2(my_server, my_username, my_password)
    my_ndex.update_status()
    networks = my_ndex.status.get("networkCount")
    users = my_ndex.status.get("userCount")
    groups = my_ndex.status.get("groupCount")
    print("my_ndex client: %s networks, %s users, %s groups" % (networks, users, groups))
    # TODO - check that user can access private account details

except Exception as inst:
    print("Could not access account %s with password %s" % (my_account, my_password))
    print(inst.args)
    


my_ndex client: 17663 networks, 906 users, 90 groups


## Get the Pathway Ids from the Mapping fFle:

In [6]:
def get_signor_network_ids():
    path = "signor-path_mapping_file.txt"
    return pd.read_csv(path, sep="\t")

network_id_dataframe = get_signor_network_ids()
network_id_dataframe

Unnamed: 0,pathway_id,pathway_name
0,SIGNOR-AC,Adipogenesis
1,SIGNOR-Adipogenesis: BMP,Adipogenesis: BMP
2,SIGNOR-AMPK,AMPK Signaling
3,SIGNOR-Autophagy,Autophagy
4,SIGNOR-DR,Death Receptor Signaling
5,SIGNOR-EcmSynthesis,ECM: synthesis
6,SIGNOR-EGF,EGFR
7,SIGNOR-EosCCL11,Eosinophil: CCL11
8,SIGNOR-EosCCR1,Eosinophil: CCR1
9,SIGNOR-EosIL5,Eosinophil: IL5


## Function to Get SIGNOR Pathway Data and Create a Network

In [156]:
# human_tax_id = "9606"

def get_signor_network(pathway_id):
    # TODO - add context (normalize?)
    signor_context = [{
        'ncbigene': 'http://identifiers.org/ncbigene/',
        'hgnc.symbol': 'http://identifiers.org/hgnc.symbol/',
        'uniprot': 'http://identifiers.org/uniprot/',
        'cas': 'http://identifiers.org/cas/'}]
    # ncx.set_context(context)

    # parameters = human_tax_id + "organism=" + organism_id + "&id=" + pathway_id
    # pathway_data = requests.get("http://signor.uniroma2.it/getData.php?" + parameters)
    url = "http://signor.uniroma2.it/getPathwayData.php?pathway=" + pathway_id + "&relations=only"
    # print(url)
    response = requests.get(url)
    pathway_data = response.text
    # converters={'CUSTOMER': str, 'ORDER NO': str}
    converters = {}
    usecols = ["entitya", "typea", "ida", "entityb", "typeb", "idb", "effect", "mechanism", "residue", "sequence", "tax_id", "cell_data", "tissue_data", "pmid", "direct", "notes", "annotator", "sentence"]
    for col in usecols:
        converters[col] = str
    dataframe = pd.read_csv(io.StringIO(pathway_data), 
                            sep='\t',
                            converters = converters,
                            usecols = usecols
                           )


    # TODO: filter dataframe to remove rows that are not human
    #return dataframe
    human_dataframe = dataframe.loc[dataframe["tax_id"] == "9606"]
    #return human_dataframe
    rename = {}
    for column_name in human_dataframe.columns:
        rename[column_name]= column_name.upper()
    human_dataframe = human_dataframe.rename(columns=rename)
    # df = df.rename(columns={'oldName1': 'newName1', 'oldName2': 'newName2'})
    #return human_dataframe

    network = ndex2.create_nice_cx_from_pandas(
        human_dataframe,
        source_field="ENTITYA", 
        target_field="ENTITYB", 
        source_node_attr=[
            #"regulator_location",
            "TYPEA",
            "IDA" #,
            #"databasea"
        ], 
        target_node_attr=[
            #"target_location",
            "TYPEB",
            "IDB" #,
            #"databaseb"
        ], 
        edge_attr=[
            "MECHANISM",
            "RESIDUE",
            "SEQUENCE",
            "TAX_ID",
            "CELL_DATA",
            "TISSUE_DATA",
            #"modulator_complex",
            #"target_complex",
            #"modificationa",
            #"modaseq",
            #"modificationb",
            #"modbseq",
            "PMID",
            "DIRECT",
            "NOTES",
            "ANNOTATOR",
            "SENTENCE" #,
            #"signor_id"
        ], 
        edge_interaction="EFFECT")
    network.set_network_attribute("SIGNOR_ID", values=pathway_id)
    # Fix values for "DIRECT"
    for edge_id, edge in network.get_edges():
        direct = network.get_edge_attribute(edge_id, "DIRECT")
        # print(direct)
        if direct:
            if direct == "t":
                network.set_edge_attribute(edge, "DIRECT", "YES")
            else:
                network.set_edge_attribute(edge, "DIRECT", "NO")
        
    return network
    
network = get_signor_network("SIGNOR-MM")
#network.get_summary()
network

0


<nicecxModel.NiceCXNetwork.NiceCXNetwork at 0x1136bb198>

## Function to Get the Description and Other Info About the Pathway

In [157]:
def add_pathway_info(network):
    url = "http://signor.uniroma2.it/getPathwayData.php?pathway=" + str(network.get_network_attribute("SIGNOR_ID"))
    print(url)
    response = requests.get(url)
    pathway_info = response.text
    dataframe = pd.read_csv(io.StringIO(pathway_info), sep='\t')
    #return dataframe
    network.set_name(dataframe.iat[0,1])
    network.set_network_attribute("author", dataframe.iat[0, 3])
    network.set_network_attribute("description", dataframe.iat[0, 2])
    
add_pathway_info(network)
print(network.get_network_attribute("description"))


http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-MM
Melanoma is a skin cancer. It might exist as distinct subtypes associated with the activation of the MAPK and the PI3K pathways even if there is an association between distinct melanoma subtypes and molecular somatic events. Mucosal, acral, and to a lesser extent, lentigo malignant melanomas, can have increased copies of CDK4, and CCND1 (cyclinD), as well as mutations in KIT receptor. NRAS is mutated in about 18% of melanomas, and seems to be more frequently activated in nodular melanomas and melanomas due to chronic sun damage. BRAF has a recurrent V600E mutation (Gain of function) in about 50–70% of melanomas, however, this mutational event is frequently reported in benign pigmented naevi, and is not fully sufficient to induce a malignant transformation. MEK1 and MEK2 are downstream from RAS and RAF, on the same MAPK pathway. Activating mutations of MEK1 and MEK2 are found in 8% of melanomas. The PI3K pathway is activated

## Create the Merge Plan for SIGNOR Networks

The SIGNOR table has two columns that specify node type - one for the source and one for the target of the edge. These type attributes are therefore redundant and should be merged into one attribute. We define a plan for the attributes to merge.

In [158]:
# merge "TYPEA" and "TYPEB" to "TYPE"
merge_plan = [
    {
        "attribute1": "TYPEA",
        "attribute2": "TYPEB",
        "new_attribute": "TYPE"
    }
]

## Specify the Network Containing the Visual Style to Apply to the SIGNOR Networks 

In [159]:
# Use the visual properties of network ... to style each output network
# http://www.ndexbio.org/#/network/d3c5ca09-bb42-11e7-94d3-0ac135e8bacf
cytoscape_visual_properties_template_id = "d3c5ca09-bb42-11e7-94d3-0ac135e8bacf"

## Function to Apply a Layout to the Network

In [160]:
def cartesian(G):
    return [{'cartesianLayout': [
        {'node': n, 'x': float(G.pos[n][0]), 'y': float(G.pos[n][1])}
        for n in G.pos
        ]}]

def apply_spring_layout(network):
    my_networkx = network.to_networkx()
    #nx.drawing.spring_layout(my_networkx)
    my_networkx.pos = nx.drawing.circular_layout(my_networkx)
    cartesian_aspect = cartesian(my_networkx)
    network.set_opaque_aspect("cartesianCoordinates", cartesian_aspect)

## Function to Print a Sample of Nodes in the Network
#### (For a Spot-Check when Debugging)

In [161]:
def spot_check_nodes(network, attribute_name):
    for id, node in itertools.islice(network.get_nodes(), 5):
        attribute_value = my_network.get_node_attribute(node, attribute_name)
        print("%s: %s = %s" % (node.get_name(), attribute_name, attribute_value))

## Function to Upload the Network

In [162]:
def upload_signor_network(network, server, username, password, update_uuid=False):
    if update_uuid:
        message = network.update_to(update_uuid, server, username, password)      
    else:
        message = network.upload_to(server, username, password)
    return(message)

## Function to Process One Signor Pathway ID

In [163]:
def process_signor_id(id, cytoscape_visual_properties_template_id, merge_plan, server, username, password):
    network = get_signor_network(id)
    add_pathway_info(network)
    for merge_spec in merge_plan:
        network.merge_node_attributes(
            merge_spec.get("attribute1"), 
            merge_spec.get("attribute2"), 
            merge_spec.get("new_attribute"))
    network.apply_template(
        username=username, 
        password=password, 
        server=server, 
        uuid=cytoscape_visual_properties_template_id)
    apply_layout(network)
    return upload_signor_network(network, server, username, password)

process_signor_id("SIGNOR-MM", cytoscape_visual_properties_template_id, merge_plan, my_server, my_username, my_password)

0
http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-MM


'http://public.ndexbio.org/v2/network/64536ccf-e6b4-11e7-adc1-0ac135e8bacf'

## Create a Network Set to Collect the Output Networks

In [133]:
net_set_url = my_ndex.create_networkset('Signor Networks' + str(datetime.now()), 
                                              'Networks from Signor using data obtained by SIGNOR REST API')
net_set_uuid = net_set_url.split('/')[-1]
print('Network set uuid: ' + net_set_uuid)

Network set uuid: 4916735f-e6b0-11e7-adc1-0ac135e8bacf


## Process the SIGNOR Pathway IDs

In [136]:
count = 0
limit = 5
signor_uuids = []
for pathway_id in network_id_dataframe['pathway_id']:
    upload_message = process_signor_id(pathway_id, cytoscape_visual_properties_template_id, merge_plan, my_server, my_username, my_password)
    print(upload_message)
    network_uuid = upload_message.split('/')[-1]
    signor_uuids.append(network_uuid)
    if limit:
        count += 1
        if count >= limit:
            break

#spot_check_nodes(signor_uuids[0], "TYPE")
print('Adding networks to network set')
my_ndex.add_networks_to_networkset(net_set_uuid, signor_uuids)        
print('Done')

0
http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-AC
http://public.ndexbio.org/v2/network/81ba448e-e6b0-11e7-adc1-0ac135e8bacf
0
http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-Adipogenesis: BMP
http://public.ndexbio.org/v2/network/827927c1-e6b0-11e7-adc1-0ac135e8bacf
0
http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-AMPK
http://public.ndexbio.org/v2/network/83446704-e6b0-11e7-adc1-0ac135e8bacf
0
http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-Autophagy
http://public.ndexbio.org/v2/network/84012757-e6b0-11e7-adc1-0ac135e8bacf
0
http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-DR
http://public.ndexbio.org/v2/network/84c7aba9-e6b0-11e7-adc1-0ac135e8bacf
Adding networks to network set
Done
