# SIGNOR Pipeline

This notebook creates a Network Set of SIGNOR pathway networks using the latest data downloaded via the SIGNOR REST service. SIGNOR is updated regularly and so it also demonstrates how to update the networks in the set.

## Imports Python Packages Needed in the Pipeline

In [None]:
#!pip uninstall ndexutil -y
#!pip install ndexutil --no-cache-dir

In [40]:
import ndex2 # The ndex2 Python client
import itertools # convenient iteration utilities 
import requests
import json
import pandas as pd
import io
import sys
import jsonschema
import os
import nicecxModel
#from nicecxModel.cx.aspects import ATTRIBUTE_DATA_TYPE
from datetime import datetime
import networkx as nx
sys.path.append('../../resources')
from tutorial_utils import load_tutorial_config


In [2]:
import ndexutil.tsv.tsv2nicecx as t2n

## NDEx Access

Configure your NDEx connection based on server, account and password. 

See ["Using NDEx Tutorial Config Files.ipynb"] for more information.

In [65]:
# get the connection parameters from the ndex_tutorial_config.json file in your home directory.
# edit the line below to specify a different connection in the config file
my_server, my_username, my_password = load_tutorial_config("main")

# alternatively, edit and uncomment these lines to set the connection parameters manually 
# my_server = "public.ndexbio.org"
# my_username = None
# my_password = None

my_ndex=ndex2.client.Ndex2(my_server, my_username, my_password)

## Get the Pathway Ids from the Mapping File:

In [66]:
def get_signor_network_ids():
    path = "signor-path_mapping_file.txt"
    return pd.read_csv(path, sep="\t")

network_id_dataframe = get_signor_network_ids()
# network_id_dataframe

## Get the Load Plan:

(TODO: validate load plan with jsonschema)

In [67]:
try:
    path_to_load_plan = 'signor_load_plan.json'
    load_plan = None
    with open(path_to_load_plan, 'r') as lp:
        load_plan = json.load(lp)
        
except jsonschema.ValidationError as e1:
    print("Failed to parse the loading plan: " + e1.message)
    print('at path: ' + str(e1.absolute_path))
    print("in block: ")
    print(e1.instance)

## Function to Create NiceCX from SIGNOR Pathway Data

In [68]:
# human_tax_id = "9606"

def get_signor_network(pathway_id, load_plan):
    # TODO - add context (normalize?)
    signor_context = [{
        'ncbigene': 'http://identifiers.org/ncbigene/',
        'hgnc.symbol': 'http://identifiers.org/hgnc.symbol/',
        'uniprot': 'http://identifiers.org/uniprot/',
        'cas': 'http://identifiers.org/cas/'}]
    # ncx.set_context(context)

    # parameters = human_tax_id + "organism=" + organism_id + "&id=" + pathway_id
    # pathway_data = requests.get("http://signor.uniroma2.it/getData.php?" + parameters)
    url = "http://signor.uniroma2.it/getPathwayData.php?pathway=" + pathway_id + "&relations=only"
    # print(url)
    response = requests.get(url)
    pathway_data = response.text
    
    #header = [h.strip() for h in pathway_data.readline().split('\t')]
    #print(pathway_data)
    # converters={'CUSTOMER': str, 'ORDER NO': str}
    #converters = {}
    usecols = ["entitya", "typea", "ida", "entityb", "typeb", "idb", "effect", "mechanism", "residue", "sequence", "tax_id", "cell_data", "tissue_data", "pmid", "direct", "notes", "annotator", "sentence"]
    #usecols = ["ENTITYA", "TYPEA", "IDA", "ENTITYB", "TYPEB", "IDB", "EFFECT", "MECHANISM", "RESIDUR", 
    #           "SEQUENCE", "TAX_ID", "CELL_DATA", "TISSUE_DATA", "PMID", "DIRECT", "NOTES", "ANNOTATOR", "SETENCE"]
    #for col in usecols:
    #    converters[col] = str
    # dataframe = pd.read_csv(io.StringIO(pathway_data), sep='\t',converters = converters,usecols = usecols)
    dataframe = pd.read_csv(io.StringIO(pathway_data), 
                dtype=str, 
                na_filter=False, 
                delimiter='\t', 
                engine='python')
                #names=usecols)
    
    #print(dataframe)
    # filter dataframe to remove rows that are not human
    human_dataframe = dataframe.loc[dataframe["tax_id"] == "9606"]

    #print(human_dataframe)
    # upcase column names
    rename = {}
    for column_name in human_dataframe.columns:
        rename[column_name]= column_name.upper()
    
    human_dataframe = human_dataframe.rename(columns=rename)

    
    # df = df.rename(columns={'oldName1': 'newName1', 'oldName2': 'newName2'})
    #return human_dataframe
    
    network = t2n.convert_pandas_to_nice_cx_with_load_plan(human_dataframe, load_plan)

    #network.set_network_attribute("SIGNOR_ID", values=pathway_id)

    # Fix values for "DIRECT"
    for edge_id, edge in network.get_edges():
        direct = network.get_edge_attribute(edge_id, "DIRECT")
        # print(direct)
        if direct:
            if direct == "t":
                network.set_edge_attribute(edge, "DIRECT", "YES")
            else:
                network.set_edge_attribute(edge, "DIRECT", "NO")
                
    # Set prefixes for represents based on the "DATABASE" attribute
    #
    #   Note that this is a good example of a situation that calls
    #   for custom code and does not justify an extension to the load_plan
    #   Cases of this type are too variable. Custom code is easier.
    #
    for node_id, node in network.get_nodes():
        database = network.get_node_attribute(node_id, "DATABASE")
        represents = node.get_node_represents()
        if database == "UNIPROT":
            represents = "uniprot:" + represents
            node.set_node_represents(represents)
        if database == "SIGNOR":
            represents = "signor" + represents
            node.set_node_represents(represents)
        # in all other cases, the identifier is already prefixed
        
    print(network.get_summary())
    return network
    
signor_network = get_signor_network("SIGNOR-MM", load_plan)

Name: Untitled
Nodes: 30
Edges: 109
Node Attributes: 60
Edge Attributes: 783



## Function to Get the Description and Other Info About the Pathway

In [70]:
def add_pathway_info(network):
    url = "http://signor.uniroma2.it/getPathwayData.php?pathway=" + str(pathway_id)#network.get_network_attribute("SIGNOR_ID"))
    print(url)
    response = requests.get(url)
    pathway_info = response.text
    dataframe = pd.read_csv(io.StringIO(pathway_info), sep='\t')
    network.set_name(dataframe.iat[0,1])
    network.set_network_attribute("labels", [dataframe.iat[0, 0]], type='list_of_string')
    network.set_network_attribute("author", dataframe.iat[0, 3])
    network.set_network_attribute("description", dataframe.iat[0, 2])
    network.set_network_attribute("version", "0.0.1")
    network.set_network_attribute("networkType", "Signalling Pathway")
    # TODO: set “networkType” property depending on network
    #    a. Signalling Pathway
    #    b. Disease Pathway
    #    c. Cancer Pathway
    
add_pathway_info(signor_network)
#print(network.get_network_attribute("description"))
print(signor_network.get_network_attribute("Labels"))


http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-AMPK
['SIGNOR-AMPK']


## Create the Merge Plan for SIGNOR Networks

The SIGNOR table has two columns that specify node type - one for the source and one for the target of the edge. These type attributes are therefore redundant and should be merged into one attribute. We define a plan for the attributes to merge.

In [71]:
# merge "TYPEA" and "TYPEB" to "TYPE"
merge_plan = [
    {
        "attribute1": "TYPEA",
        "attribute2": "TYPEB",
        "new_attribute": "TYPE"
    },
    {
        "attribute1": "IDA",
        "attribute2": "IDB",
        "new_attribute": "SIGNOR_NODE_ID"
    }
]

## Specify the Network Containing the Visual Style to Apply to the SIGNOR Networks 

In [72]:
# Use the visual properties of network ... to style each output network
# http://www.ndexbio.org/#/network/d3c5ca09-bb42-11e7-94d3-0ac135e8bacf
cytoscape_visual_properties_template_id = "f54eaef9-013c-11e8-81c8-06832d634f41"

## Function to Apply a Layout to the Network

In [73]:
def cartesian(G):
    return [{'cartesianLayout': [
        {'node': n, 'x': float(G.pos[n][0]), 'y': float(G.pos[n][1])}
        for n in G.pos
        ]}]

def apply_spring_layout(network):
    my_networkx = network.to_networkx()
    my_networkx.pos = nx.drawing.spring_layout(my_networkx)
    #my_networkx.pos = nx.drawing.circular_layout(my_networkx)
    cartesian_aspect = cartesian(my_networkx)
    network.set_opaque_aspect("cartesianCoordinates", cartesian_aspect)

## Function to Print a Sample of Nodes in the Network
#### (For a Spot-Check when Debugging)

In [74]:
def spot_check_nodes(network, attribute_name):
    for id, node in itertools.islice(network.get_nodes(), 5):
        attribute_value = my_network.get_node_attribute(node, attribute_name)
        print("%s: %s = %s" % (node.get_name(), attribute_name, attribute_value))

## Function to Upload the Network

In [75]:
def upload_signor_network(network, server, username, password, update_uuid=False):
    if update_uuid:
        message = network.update_to(update_uuid, server, username, password)      
    else:
        message = network.upload_to(server, username, password)
    return(message)

## Function to Process One Signor Pathway ID

In [76]:
def process_signor_id(signor_id, 
                      cytoscape_visual_properties_template_id, 
                      load_plan, 
                      server, 
                      username, 
                      password):
    network = get_signor_network(signor_id, load_plan)
    add_pathway_info(network)
    # add context:
    # network.set_context(signor_context)
    # for merge_spec in merge_plan:
    #    network.merge_node_attributes(
    #        merge_spec.get("attribute1"), 
    #        merge_spec.get("attribute2"), 
    #        merge_spec.get("new_attribute"))
    network.apply_template(
        username=username, 
        password=password, 
        server=server, 
        uuid=cytoscape_visual_properties_template_id)
    apply_spring_layout(network)
    return upload_signor_network(network, server, username, password)

process_signor_id("SIGNOR-MM", cytoscape_visual_properties_template_id, load_plan, my_server, my_username, my_password)

Name: Untitled
Nodes: 30
Edges: 109
Node Attributes: 60
Edge Attributes: 783

http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-AMPK


'http://dev.ndexbio.org/v2/network/b2906661-06e7-11e8-81c8-06832d634f41'

## Create a Network Set to Collect the Output Networks

In [77]:
net_set_url = my_ndex.create_networkset('Signor Networks' + str(datetime.now()), 
                                              'Networks from Signor using data obtained by SIGNOR REST API')
net_set_uuid = net_set_url.split('/')[-1]
print('Network set uuid: ' + net_set_uuid)

Network set uuid: b54dd8b4-06e7-11e8-81c8-06832d634f41


## Process the SIGNOR Pathway IDs

In [78]:
count = 0
limit = 3
signor_uuids = []
for pathway_id in network_id_dataframe['pathway_id']:
    upload_message = process_signor_id(
        pathway_id, 
        cytoscape_visual_properties_template_id, 
        load_plan, 
        my_server, 
        my_username, 
        my_password)
    print(upload_message)
    network_uuid = upload_message.split('/')[-1]
    signor_uuids.append(network_uuid)
    if limit:
        count += 1
        if count >= limit:
            break

#spot_check_nodes(signor_uuids[0], "TYPE")
print('Adding networks to network set')
my_ndex.add_networks_to_networkset(net_set_uuid, signor_uuids)        
print('Done')

Name: Untitled
Nodes: 27
Edges: 73
Node Attributes: 54
Edge Attributes: 556

http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-AC
http://dev.ndexbio.org/v2/network/b6f7b825-06e7-11e8-81c8-06832d634f41
Name: Untitled
Nodes: 4
Edges: 4
Node Attributes: 8
Edge Attributes: 25

http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-Adipogenesis: BMP
http://dev.ndexbio.org/v2/network/b7aab478-06e7-11e8-81c8-06832d634f41
Name: Untitled
Nodes: 26
Edges: 59
Node Attributes: 52
Edge Attributes: 416

http://signor.uniroma2.it/getPathwayData.php?pathway=SIGNOR-AMPK
http://dev.ndexbio.org/v2/network/b87c354b-06e7-11e8-81c8-06832d634f41
Adding networks to network set
Done
