Upstream signaling network reconstruction
===============================

The following code use [pyPath library](https://github.com/saezlab/pypath) to reconstruct signaling network from a list of biological entities (csv format) and querying [OmnipathDB](http://omnipathdb.org/).

## 1. Load function

Import modules:

In [1]:
#!/usr/bin/env python
import csv
import os
import time
import pypath
from pypath.share import curl
from pypath.resources import data_formats


	=== d i s c l a i m e r ===

	All data accessed through this module,
	either as redistributed copy or downloaded using the
	programmatic interfaces included in the present module,
	are free to use at least for academic research or
	education purposes.
	Please be aware of the licenses of all the datasets
	you use in your analysis, and please give appropriate
	credits for the original sources when you publish your
	results. To find out more about data sources please
	look at `pypath.resources.descriptions` or
	http://omnipathdb.org/info and 
	`pypath.resources.urls.urls`.

[2020-05-06 16:15:24] [pypath] 
	- session ID: `dzwlm`
	- working directory: `/home/marie/Project/pyBravo/pypath-evaluation`
	- logfile: `/home/marie/Project/pyBravo/pypath-evaluation/pypath_log/pypath-dzwlm.log`
	- pypath version: 0.10.6


In [2]:
def _upstream_signaling(pa, max_depth, to_be_explore, already_explored=[], current_depth=0, network_sif = []):
    """
    Param:
    pa: pypath env, 
    max_depth: maximum level of reconstruction, 
    to_be_explore: list of entities, 
    already_explored=[], 
    current_depth=0, 
    network_sif = []
    """
    
    # Stopping criteria 1
    if current_depth >= max_depth:
        print("Exploring alted due to maximum depth")
        return network_sif
    else:
        print('Exploration depth ' + str(current_depth))
    # Stopping criteria 2
    if len(to_be_explore) == 0:
        print("Exploring done")
        return(network_sif)
    # Start exploring
    new_to_be_explored = []
    for gene in to_be_explore:
        # get entity that get affected by MYC using vertex object (inhibition, stimulation or other)
        regulators_list = list(pa.gs_affects(gene))
        already_explored.append(gene)
        # get direction and sign of interation
        for reg in range(len(regulators_list)):
            # direction and sign
            edge = pa.get_edge(regulators_list[reg]["name"], gene)
            dirs = edge['dirs']
            sign_check = dirs.get_sign(dirs.reverse) # reverse: source ===> target
            # A pair of boolean values means if the interaction is stimulation and if it is inhibition, respectively [True, False] 
            if sign_check[0] == True and sign_check[1] == False:
                sign = 'stimulation'
            elif sign_check[0] == False and sign_check[1] == True: 
                sign = 'inhibition'
            elif sign_check[0] == True and sign_check[1] == True:
                sign = 'stimulation_and_inhibition'
            else:
                sign = 'unknown'
            if regulators_list[reg]["label"] not in already_explored:
                new_to_be_explored.append(regulators_list[reg]["label"])
            # ID, name, sign and provenance
            network_sif.append({"source_id":regulators_list[reg]["name"], "source_name":regulators_list[reg]["label"], \
                "provenance":list(regulators_list[reg]["sources"]), "target_name":gene, "sign":sign})
    print("Depth explored " + str(current_depth))
    current_depth += 1
    _upstream_signaling(pa, max_depth, new_to_be_explored, already_explored, current_depth, network_sif)
    return network_sif


def _print_to_csv(network, output_path):
    """
    Param: network, path of output file
    """
    # set headers
    headers = ("source_id,source_name,target_name,sign,provenance\n")
    f = open(output_path, "w+")
    f.write(headers)
    f.close()
    # write network
    f = open(output_path + "-temp", "w+")
    for e in network:
        f.write(str(e['source_id']) + ",")
        f.write(e['source_name'] + ",")
        f.write(e['target_name'] + ",")
        f.write(e['sign'] + ",")
        f.write(str(' '.join(e['provenance'])) + "\n")
    f.close()
    # remove duplicates
    os.system("sort " + output_path + "-temp | uniq -c >> " + output_path )
    os.system("rm " + output_path + "-temp")
    print("Output file printed.")


## 2. Configuration

In [3]:
MAX_DEPTH = 8
INPUT_GENES = []
inputfile_path = 'input-910.csv'
outfile_path = 'md08-pypath_omnipathDB.csv'

## 3. Read input file

In [4]:
with open(inputfile_path, 'rt') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=',', quotechar='"')
    for row in spamreader:
        INPUT_GENES.append(row[0])

## 4. Main

In [5]:
# Init pypath
from pypath.legacy.main import PyPath
pa = PyPath()

In [6]:
# Load databases
with curl.cache_off():
   pa.load_resources(data_formats.pathway)

[2020-05-06 16:18:30] [network] `ramilowski_interactions`: Could not find file or dataio function or failed preprocessing.


In [7]:
start_time = time.time()
network = _upstream_signaling(pa, MAX_DEPTH, INPUT_GENES)
elapsed_time = round((time.time() - start_time), 2)
print("--- Upstream signaling network in %s seconds ---" % elapsed_time)
_print_to_csv(network, outfile_path)

Exploration depth 0
Depth explored 0
Exploration depth 1
Depth explored 1
Exploration depth 2
Depth explored 2
Exploration depth 3
Depth explored 3
Exploration depth 4
Depth explored 4
Exploration depth 5
Depth explored 5
Exploration depth 6
Depth explored 6
Exploration depth 7
Depth explored 7
Exploring alted due to maximum depth
--- Upstream signaling network in 59.92 seconds ---
Output file printed.
