In [1]:
import pandas as pd, networkx as nx

In [2]:
## Reference network preparation 

HIPPIE_df=pd.read_csv("../Source/Interactomes/HIPPIE_v2_3.tab", keep_default_na=False, sep="\t")
print("The number of interaction in HIPPIE",len(HIPPIE_df.index))
HIPPIE_nx=nx.from_pandas_edgelist(HIPPIE_df,"Gene_1","Gene_2",edge_attr="Score")
self_interactions=list(nx.selfloop_edges(HIPPIE_nx))

# during permutation, we have to get rif off self interactions and nan-nan interaction, coming from reference network as a default.
for u, v in self_interactions:
    if isinstance(u, float):
        print(u)
    else:
        HIPPIE_nx.remove_edge(u,v)
    
#HIPPIE_nx.remove_edges_from(self_interactions)
HIPPIE_df=nx.to_pandas_edgelist(HIPPIE_nx)
print("The number of edges in HIPPIE network",HIPPIE_nx.number_of_edges())
print("The number of nodes in HIPPIE network",HIPPIE_nx.number_of_nodes())


edges=tuple(zip(HIPPIE_df.source,HIPPIE_df.target))
for edge in edges:
    if edge not in HIPPIE_nx.edges:
        print("check the edge",edge)

The number of interaction in HIPPIE 783182
The number of edges in HIPPIE network 774448
The number of nodes in HIPPIE network 19437


In [3]:
## initial nodes and weights 

initial_nodes_df=pd.read_csv(f'../Source/Netpath/Sampling_0_5/AndrogenReceptor_05A/AndrogenReceptor_05A_var_0.nodes',sep="\t")

initial_nodes=initial_nodes_df.name.to_list()
initial_weights=[1 for _ in initial_nodes]

### PageRank Flux Calculations

Without guidance network, we can directly infer subnetwork from reference network through the PageRankFlux algorithm.


In [4]:
from Paragon import NetworkInference 

In [5]:
pgrf=NetworkInference(network=HIPPIE_nx,edge_attribute="Score")
pgrf.load_initial_nodes(list(initial_nodes))


##### max_edge_count => the maximum number of edges with the highest flux scores




##### alpha => Damping parameter for PageRank, the probability of walking into neighbors 


##### threshold => the scaling factor, the threshold percentage of summed flux scores,
             the algorithm selects the edges from high to low flux scores by summing up the scores till completing intrested percentage  



##### intermediate_only => True or False: 
                 True --> algorithm only select interactions in graphlet guided network between initial nodes.  
                 False --> algorithm select interactions in graphlet guided network unconsidering initial nodes.


In [6]:



returned_nx=pgrf.reconstruct_subnetwork( max_edge_count=2000,alpha=0.8,threshold=0.8,intermediate_only=True)




theshold of 0.045000 limits predictions to 2000 edges


In [7]:
returned_nx.number_of_edges()

1547

In [8]:



print(f'we excluded {2000-returned_nx.number_of_edges()} interactions from inferred network since paths between any pair of initial nodes do not include these interactions.') #

we excluded 453 interactions from inferred network since paths between any pair of initial nodes do not include these interactions.


In [9]:
returned_df=nx.to_pandas_edgelist(returned_nx)
returned_df.to_csv('../AndrogenReceptor_reconstructed_pathway_without_GGN.sif')

Network inference with GraphletGudidedNetwork

### Graphlet-Guided Network

In [10]:
GGN_df=pd.read_csv(f'../AndrogenReceptor_GGN.sif',sep="\t")
GGN_nx=nx.from_pandas_edgelist(GGN_df)

In [11]:
pgrf=NetworkInference(network=HIPPIE_nx,guide_network=GGN_nx,edge_attribute="Score")
pgrf.load_initial_nodes(list(initial_nodes))


In [12]:
returned_nx=pgrf.reconstruct_subnetwork( max_edge_count=2000,alpha=0.8,threshold=0.8,intermediate_only=True)

theshold of 0.548000 limits predictions to 2000 edges


In [13]:

print(f'we excluded {2000-returned_nx.number_of_edges()} interactions from inferred network since paths between any pair of initial nodes do not include these interactions.') #

we excluded 288 interactions from inferred network since paths between any pair of initial nodes do not include these interactions.


In [14]:
returned_df=nx.to_pandas_edgelist(returned_nx)
returned_df.to_csv('../AndrogenReceptor_reconstructed_pathway_with_GGN.sif')

## Benefical outputs

#### PageRank scores

In [15]:
PageRank_Scores_for_allnodes_in_ref=pgrf.pagerank_score

##### scored reference intractome

In [17]:
Overall_network_values_df=nx.to_pandas_edgelist(pgrf.network)
Overall_network_values_df

Unnamed: 0,source,target,Score,neglog_flux,flux
0,ALDH1A1,NUPR1,0.63,7.604111,2.488223e-08
1,ALDH1A1,ALDH2,0.63,6.869425,1.350750e-07
2,ALDH1A1,POT1,0.63,7.117359,7.632039e-08
3,ALDH1A1,NOS2,0.63,7.002051,9.952893e-08
4,ALDH1A1,NEDD8,0.63,7.690741,2.038257e-08
...,...,...,...,...,...
774443,VNN1,VNN3,0.63,6.639485,2.293583e-07
774444,KLK14,OR13C3,0.63,7.569017,2.697634e-08
774445,SPDYE7P,SPDYE3,0.63,6.337234,4.600089e-07
774446,GADL1,OR10A4,0.63,6.693221,2.026649e-07


##### scored GGN

In [18]:
Motif_network_values_df=nx.to_pandas_edgelist(pgrf.motif_nx)
Motif_network_values_df

Unnamed: 0,source,target,neglog_flux,flux
0,PRMT2,PRDM11,5.282746,5.215000e-06
1,PRMT2,GTF2H3,4.377999,4.187948e-05
2,PRMT2,GTF2H2C,5.206151,6.220846e-06
3,PRMT2,CHCHD2P9,5.682046,2.079475e-06
4,PRMT2,GTF2F1,5.854065,1.399376e-06
...,...,...,...,...
21884,TCF4,RAB41,6.003631,9.916735e-07
21885,TCF4,NECTIN2,6.596765,2.530666e-07
21886,TCF4,SPG21,6.781936,1.652206e-07
21887,TCF4,ID4,6.034959,9.226593e-07
