In [None]:
#starting importing some useful stuff
import os
import igraph
from pypath.share import settings
settings.setup(progressbars = True)
import omnipath as op
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
#importing legacy, which is the 'old' version of pypath, the only one (for now), with the graph object implemented
from pypath.legacy import main as legacy
#importing the new class used by pypath, useful for the dataframe visualization
#but with noreferences (maybe a bug to report?). Here graph is not implemented
from pypath.core import network
from pypath.resources import network as netres
import pypath_functions as pf

In [None]:
#setting up the cachefolder (actually is not really useful you can skip this if you want)
settings.setup(cachedir = 'tmpcache_legacy')

In [None]:
#initialization of the network object
pa = network.Network()

In [None]:
#initialization of the 'old' PyPath object
pw_legacy = legacy.PyPath()

In [None]:
#load the network
pa.init_network(pickle_file='mynetwork.pickle')

In [None]:
#building the dataframe and visualize it
pa.make_df()

pa.df

In [None]:
#it is possible to search for a path and visualize the information for each path finded (it takes lots of time)

### pa.find_paths('O15350', 'P31749', maxlen = 4, interaction_type = ('post_translational'), silent=True)

for path in pa.find_paths('O15350', 'P31749', maxlen = 4, interaction_type = ('post_translational'), silent=True):
    print(path)

In [None]:
# you can check that in the dataframe there are no references:
a = pa.df.groupby(['references']).size().reset_index()

In [None]:
#this is a list of the databases you load with the script "download_databases.py"
print(legacy.data_formats.omnipath.keys())
print(legacy.data_formats.ligand_receptor.keys())
print(legacy.data_formats.tf_mirna.keys())
print(legacy.data_formats.transcription.keys())
print(legacy.data_formats.activity_flow.keys())

In [None]:
#load the network

pw_legacy.init_network(pickle_file='mylegacy.pickle')

In [None]:
#MAIN OBJECT

graph = pw_legacy.graph
igraph.summary(graph)

In [None]:
#get vertices attributes (I just selected a random one)

graph.vs[5].attributes()

In [None]:
#Get the index and uniprot name of a protein in the network with its gene symbol or its index:

print('index: ', pw_legacy.graph.vs[99].index, 'name: ', pw_legacy.graph.vs[99]['name'], 'label: ', pw_legacy.graph.vs[99]['original_names'])


In [None]:
#find uniprot name from genesymbol
#
from pypath.utils import mapping

mapping.map_name('AKT1', 'genesymbol', 'uniprot')

In [None]:
#this function takes as argument a list of genes and return a dictionary with the associated uniprot names

gene_list = [] # ==> you can insert here the list of gene you are looking for

gene_dict = pf.generate_dict(gene_list, pw_legacy)

gene_dict

In [None]:
# sometimes the function above is not working, here is an alternative using directly the omnipath database:
gene_dict2 = pf.get_code_from_annotations(gene_list)

gene_dict2

In [None]:
#just to be sure, let's compare the two dict
for gene1 in gene_dict:
    for gene2 in gene_dict2:
        if(gene1==gene2 and gene_dict[gene1]==gene_dict[gene2]):
            print(gene1, gene2, True)
        elif(gene1==gene2 and gene_dict[gene1]!=gene_dict[gene2]):
            print(gene1, gene2, False)

In [None]:
#get information about some interactions:
#sometimes edge[''] is a list containing more informations, you have to get access with edge[''][#]
# ex: edge['dirs'][0]
gene_name1=""
gene_name2=""
idedge = pw_legacy.edge_exists(pw_legacy.gs(gene_name1)['name'],pw_legacy.gs(gene_name2)['name'])
print(idedge)
edge = graph.es[idedge]

print(edge['dirs'])
print(edge['sources'])
print(edge['references'])
print(edge['dirs'][0].consensus_edges())
#the function below returns an average between the possible interections
#taken from the different databases
print(pf.get_consensus_edges(edge['dirs'][0], gene_dict))

#for element in edge:
 #   print(element['dirs'])
  #  print(element['sources'])
   # print(element['references'])
    #print(element['dirs'].is_directed())

In [None]:
#search for pathways specifying max len

AKT1 = graph.vs.find(name='P31749')
Notch = graph.vs.find(name='P46531')

for path in pw_legacy.find_all_paths(AKT1.index, Notch.index, mode='ALL'):
    print(path)
    print(graph.vs[path[1]]['original_names'])

In [None]:
#STARTING BUILDING THE SUBGRAPH

#getting the uniprot names from a gene list

sources = gene_list


uniprot_dict = pf.generate_dict(sources, pw_legacy)

uniprot_dict

In [None]:
#building the subgraph

subg4 = graph.induced_subgraph([pw_legacy.vs.find(name = uniprot_dict[e]) for e in uniprot_dict.keys()])

#printing the node list and the corresponding degree for each node
print('Node -- Degree')
for e in subg4.vs:
    print(e['label'] + ' -- '+ str(e.degree()), end=' ')

#function used to print the interaction
print("Interactions:")
        
pf.display_directed_edges_labels(subg4,subg4.es, uniprot_dict)

In [None]:
#this ufnction take as arguments a graph/subgraph (from pypath)
    # and a string (ex: my_network.png)
pf.plot_graph(subg4, "example_network.png")

In [None]:
#it is possible to visualize the dataframe containing all the informations about each edge:
pf.show_edge_dataframe(subg4)

In [None]:
#in the subgraph not all the nodes are connected, to check this:

gene_name1=''
gene_name2=''

print(subg4.are_connected(uniprot_dict[gene_name1], uniprot_dict[gene_name2]))
path = subg4.get_all_shortest_paths(uniprot_dict[gene_name1], uniprot_dict[gene_name2])
path

In [None]:
#to procede, we can search for another possible path that could link the nodes and then add the node to the subgraph
#we can decide the max lenght of the path (default is 2)
node_1 = graph.vs.find(label=gene_name1)
node_2 = graph.vs.find(label=gene_name2)

for path in pw_legacy.find_all_paths(node_1.index, node_2.index, mode='ALL', maxlen=2):
    print(path)
    print(graph.vs[path[1]]['original_names'])

In [None]:
#finally we can print the informations  about the new edge

print(pw_legacy.get_edge('P31749', 'P12931')['dirs'], pw_legacy.get_edge('P31749', 'P12931')['references'], pw_legacy.get_edge('P31749', 'P12931')['refs_by_source'])

In [None]:
#re-build the network with the new nodes

sources = []

uniprot_dict = pf.generate_dict(sources, pw_legacy)

#building the subgraph

subg4_with_new_node = graph.induced_subgraph([pw_legacy.vs.find(name = uniprot_dict[e]) for e in uniprot_dict.keys()])

#printing the node list and the corresponding degree for each node
print('Node -- Degree')
for e in subg4_with_new_node.vs:
    print(e['label'] + ' -- '+ str(e.degree()), end=' ')

#function used to print the interaction
print('\n', "Interactions:")
        
        
pf.display_directed_edges_labels(subg4_with_new_node,subg4_with_new_node.es)

pf.plot_graph(subg4_with_new_node, "my_network2.png")

In [None]:
# we can search for another possible path that could link the nodes and then add the node to the subgraph
#we can decide the max lenght of the path (default is 2)
gene_name1=''
gene_name2=''

node_1 = graph.vs.find(label=gene_name1)
node_2 = graph.vs.find(label=gene_name2)
#print(node_1)
#print(node_2)
for path in pw_legacy.find_all_paths(node_1.index, node_2.index, mode='ALL', maxlen=2):
    print(path)
    for i in range(0, len(path)):
        print(graph.vs[path[i]]['label'], end=' ')
    print('\n')

In [None]:
#according to the specifying depth, this function search in the databases all possible paths of length==depth 
#and add all the nodes founded in the graph (this can take some time depending on the depth)
connected_dict = pf.complete_connection(subg4, uniprot_dict, 2, pw_legacy)

subg5 = graph.induced_subgraph([pw_legacy.vs.find(name = connected_dict[e]) for e in connected_dict.keys()])

pf.plot_graph(subg5, "my_connected_network.png")

In [None]:
#according to the specifying depth, this function search in the databases all possible paths of length==depth 
#and add all the nodes founded in the graph (this can take some time depending on the depth)
complete_dict = pf.get_complete_dict(subg4, uniprot_dict, 2, pw_legacy)

subg6 = graph.induced_subgraph([pw_legacy.vs.find(name = complete_dict[e]) for e in complete_dict.keys()])

pf.plot_graph(subg6, "my_complete_network.png")

In [None]:
#if I want to select the interaction from a certain database (ex SIGNOR):
# in this case I am no longer using Igraph to print the network, but networkx
#DATABASES TO CHOSE: 
#HPRD, HPRD-phos, KEA, PhosphoSite, ProtMapper, SIGNOR, SPIKE, phosphoELM, BioGRID, IntAct, Macrophage
database = ['SIGNOR'] # ==> insert name of the database

edge_df = pf.show_edge_dataframe(subg5, connected_dict)
df_signor = edge_df[pd.DataFrame(edge_df.sources.tolist()).isin(database).any(1).values]

# print a network starting from the pandas dataframe and NOT froma  graph/subgraph
pf.print_graph(df_signor)

In [None]:
df_signor

In [None]:
# let's write a bnet file with the formulae taken by SIGNOR, as always you need a graph object 
# and the gene_dict associated to it
pf.write_bnet_from_signor(subg5, connected_dict)

In [None]:
gene_name=''
#if I want to know which are the neighbors of a particular node in the subgraph that I just built:
pf.search_neigh_interactions(gene_name, connected_dict, subg5)

In [None]:
#exporting graph for cytoscape in graphml, gml, lgl format
subg5.write_graphml('cytoscape_network.graphml') 
subg5.write_gml('cytoscape_network.gml')
subg5.write_lgl('cytoscape_network.lgl')

In [None]:
#here you can plot the network with node size equal to the total degree (in + out) 

degree_threshold = 5 #threshold to display the name of the nodes
visual_style = {'bbox': (3000, 3000),
               'margin': 150}
label_tmp = [node if d>degree_threshold else '\n' for node, d in zip(subg5.vs['label'], subg5.degree())]
igraph.plot(subg5, target = 'network_degree.pdf', 
            layout=subg5.layout_auto(), vertex_label=label_tmp, 
            vertex_size=subg5.degree(), edge_width = 0.3, edge_color = 'purple',
            vertex_color = '#97BE73', vertex_frame_width = 0,
            vertex_label_size = 7,
            vertex_label_color = 'red', inline = True, margin = 20)

In [None]:
pf.filter_by_node_degree(subg5, 5, pw_legacy)

In [None]:
gene_name1=''
gene_name2=''
#with this function, you can select two node of one of your graph, and check which are the shortest paths
#that link the two node, and then plot them
pf.plot_shortest_paths(gene_name1, gene_name2, subg5)

In [None]:
#since igraph and networkx do not allow (or at least I don't know how to) to show directed edges
# I implemented this function that colors the an edge depending on the result of the "consensus_edge" function:
#this function returns an average of the direction of the edge according to the associated sources

#I still have to add a colorbar or something similar, anyway I will think about something as soon as I can
pf.plot_with_colored_edges(subg5)