In [1]:
import pandas as pd, networkx as nx

In [2]:
## Reference network preparation 

HIPPIE_df=pd.read_csv("../Source/Interactomes/HIPPIE_v2_3.tab", keep_default_na=False, sep="\t")
print("The number of interaction in HIPPIE",len(HIPPIE_df.index))
HIPPIE_nx=nx.from_pandas_edgelist(HIPPIE_df,"Gene_1","Gene_2",edge_attr="Score")
self_interactions=list(nx.selfloop_edges(HIPPIE_nx))

# during permutation, we have to get rif off self interactions and nan-nan interaction, coming from reference network as a default.
for u, v in self_interactions:
    if isinstance(u, float):
        print(u)
    else:
        HIPPIE_nx.remove_edge(u,v)
    
#HIPPIE_nx.remove_edges_from(self_interactions)
HIPPIE_df=nx.to_pandas_edgelist(HIPPIE_nx)
print("The number of edges in HIPPIE network",HIPPIE_nx.number_of_edges())
print("The number of nodes in HIPPIE network",HIPPIE_nx.number_of_nodes())


edges=tuple(zip(HIPPIE_df.source,HIPPIE_df.target))
for edge in edges:
    if edge not in HIPPIE_nx.edges:
        print("check the edge",edge)

The number of interaction in HIPPIE 783182
The number of edges in HIPPIE network 774448
The number of nodes in HIPPIE network 19437


In [3]:
## initial nodes and weights 

initial_nodes_df=pd.read_csv(f'../Source/Netpath/Sampling_0_5/AndrogenReceptor_05A/AndrogenReceptor_05A_var_0.nodes',sep="\t")

initial_nodes=initial_nodes_df.name.to_list()
initial_weights=[1 for _ in initial_nodes]

In [4]:
from Paragon import GraphletFrequency 

## network permutation

In [6]:

GRF=GraphletFrequency(HIPPIE_nx,initial_nodes)

## network permutation once
Permutated_nx=GRF.permutate_network()
Permutated_df=nx.to_pandas_edgelist(Permutated_nx)
Permutated_df
GRF.save_permuted_network(f'../Source/Interactomes/Permuted/HIPPIE_permuted/HIPPIE_perm_network')

'../Source/Interactomes/Permuted/HIPPIE_permuted/HIPPIE_perm_network'

In [6]:
## Permutation of reference network multiple times (n permuted network)

GRF.save_permuted_networks(f_name=f'../Source/Interactomes/Permuted/HIPPIE_permuted/HIPPIE_prm',
                          size=50)

permuted network saved in	 ../Source/Interactomes/Permuted/HIPPIE_permuted/HIPPIE_prm_1
permuted network saved in	 ../Source/Interactomes/Permuted/HIPPIE_permuted/HIPPIE_prm_2


1

# Graphlet Frequencies & Graphlet Gudided Network Construction


we keep the statistical information of graphlets such as their interactions, frequencies etc


In [7]:
### Graphlet Frequencies

## frequencies from reference network

GRF=GraphletFrequency(HIPPIE_nx,initial_nodes)
Own_freq=pd.DataFrame([GRF.get_Graphlet_Frequency()])## GRF.get_Graphlet_Frequency() gives the frequencies in dict

### frequencies from permutated network 

Random_frequencies=GRF.get_frequencies_from_pool("../Source/Interactomes/Permuted/HIPPIE_permuted/")
Random_frequencies


# Frequency calculations from permuted network might take long time depending on the network size. You can load the already calculated frequecies from pickle file 
#GRF.get_frequencies_from_pickle("../AndrogenReceptor_Pool_Frequencies.pickle")



In [8]:
GRF.get_Graphlet_Frequency()

{'Graphlets0': 0.000293848607814437,
 'Graphlets1': 0.03475779614926956,
 'Graphlets2': 0.01748433786920349,
 'Graphlets3': 0.523912016886961,
 'Graphlets4': 0.1580608204391412,
 'Graphlets5': 0.09767251160356297,
 'Graphlets6': 0.11204896831482318,
 'Graphlets7': 0.04366452030424739,
 'Graphlets8': 0.012105179824976855}

In [9]:
GRF.get_frequencies_from_pickle("../AndrogenReceptor_Pool_Frequencies.pickle")

### Motif Selection

We compare the frequencies of graphlets in reference network and its permutation

In [10]:
GRF.get_Z_score()

{'Graphlets0': (2.2971117959096, 0.010806196517093263),
 'Graphlets1': (-7.810968456188324, 2.837510903742159e-15),
 'Graphlets2': (5.858567400218211, 2.3343856337135186e-09),
 'Graphlets3': (-5.455123059080934, 2.446947069483062e-08),
 'Graphlets4': (-2.881547126491014, 0.001978640260971553),
 'Graphlets5': (9.539286646820543, 7.190907625198327e-22),
 'Graphlets6': (8.796022947139594, 7.087530797048371e-19),
 'Graphlets7': (15.475032792624292, 2.557360143000735e-54),
 'Graphlets8': (14.396978660825622, 2.702770841724582e-47)}

#### Select frequent graphlets: Motifs

In [11]:
Selected_Graphlets=GRF.select_significant_Graphlets()

In [12]:
Selected_Graphlets.keys() # selected significant graphlets

dict_keys(['Graphlets2', 'Graphlets5', 'Graphlets6', 'Graphlets7', 'Graphlets8'])

One by one, wee can list graphlets where  keys are composed of the node list and value is of the edge list.

In [16]:
selected_G2_dict=Selected_Graphlets['Graphlets2']

Greaphlet-Guided Network is composed of freqeuntly seen graphlets, motifs. Here, we can list edges in GGN. 

In [17]:
GGN_edgelist=GRF.get_selected_edge_list()

write GGN into sif file

In [18]:
GRF.write_guided_graphlet_network("../AndrogenReceptor_GGN")

True

The calculation of graphlet frequencies may take long time. Thus, we can save frequencies or we can calculate in an exemplified pool.  In the downstream part, we can manually decide required graphlets.


In [19]:
GRF.save_frequencies_into_pickle("../AndrogenReceptor_Pool_Frequencies")

In [20]:
GRF.get_frequencies_from_pickle("../AndrogenReceptor_Pool_Frequencies.pickle")

In [21]:
GRF.get_Graphlet_Frequency()

{'Graphlets0': 0.000293848607814437,
 'Graphlets1': 0.03475779614926956,
 'Graphlets2': 0.01748433786920349,
 'Graphlets3': 0.523912016886961,
 'Graphlets4': 0.1580608204391412,
 'Graphlets5': 0.09767251160356297,
 'Graphlets6': 0.11204896831482318,
 'Graphlets7': 0.04366452030424739,
 'Graphlets8': 0.012105179824976855}

In [22]:
GRF.get_Graphlet_Frequency()

{'Graphlets0': 0.000293848607814437,
 'Graphlets1': 0.03475779614926956,
 'Graphlets2': 0.01748433786920349,
 'Graphlets3': 0.523912016886961,
 'Graphlets4': 0.1580608204391412,
 'Graphlets5': 0.09767251160356297,
 'Graphlets6': 0.11204896831482318,
 'Graphlets7': 0.04366452030424739,
 'Graphlets8': 0.012105179824976855}

In [23]:
Graphlet_list=list(Selected_Graphlets.keys())
Graphlet_list

['Graphlets2', 'Graphlets5', 'Graphlets6', 'Graphlets7', 'Graphlets8']

#### GGN construction with known graphlet-motifs





In [5]:
from Paragon import GraphletGuidance

In [6]:
Graphlet_list=['Graphlets2', 'Graphlets5', 'Graphlets6', 'Graphlets7', 'Graphlets8']

In [7]:
GRF_lite=GraphletGuidance(HIPPIE_nx)

In [8]:

GGN_nx=GRF_lite.construct_GGN(initial_nodes,Graphlets=Graphlet_list,extention=False) # extension: True --> add existing interaction of hidden nodes in reference network 
## GGN_nx, Graph item in networkx


82 of 82 input nodes have been found in the given network



In [9]:
GGN_nx.number_of_nodes()

4685

In [10]:
GGN_df=nx.to_pandas_edgelist(GGN_nx)
GGN_df

Unnamed: 0,source,target
0,DDX5,
1,DDX5,BRCA1
2,DDX5,SUMO3
3,DDX5,SMARCA4
4,DDX5,CREBBP
...,...,...
21901,UXT,POLR1B
21902,UXT,ASB7
21903,UXT,TBC1D32
21904,UXT,POLR3A


In [11]:
len(initial_nodes)

82

In [12]:
GRF_lite.write_guided_graphlet_network(f'../AndrogenReceptor_GGN_')

True