In [1]:
import pandas as pd, networkx as nx

In [2]:
## Reference network preparation 

HIPPIE_df=pd.read_csv("../Source/Interactomes/HIPPIE_v2_3.tab", keep_default_na=False, sep="\t")
print("The number of interaction in HIPPIE",len(HIPPIE_df.index))
HIPPIE_nx=nx.from_pandas_edgelist(HIPPIE_df,"Gene_1","Gene_2",edge_attr="Score")
self_interactions=list(nx.selfloop_edges(HIPPIE_nx))

# during permutation, we have to get rif off self interactions and nan-nan interaction, coming from reference network as a default.
for u, v in self_interactions:
    if isinstance(u, float):
        print(u)
    else:
        HIPPIE_nx.remove_edge(u,v)
    
#HIPPIE_nx.remove_edges_from(self_interactions)
HIPPIE_df=nx.to_pandas_edgelist(HIPPIE_nx)
print("The number of edges in HIPPIE network",HIPPIE_nx.number_of_edges())
print("The number of nodes in HIPPIE network",HIPPIE_nx.number_of_nodes())


edges=tuple(zip(HIPPIE_df.source,HIPPIE_df.target))
for edge in edges:
    if edge not in HIPPIE_nx.edges:
        print("check the edge",edge)

The number of interaction in HIPPIE 783182
The number of edges in HIPPIE network 774448
The number of nodes in HIPPIE network 19437


# Community detection

In [3]:
from Paragon import CommunityAnalysis 

In [4]:
returned_df=pd.read_csv('../AndrogenReceptor_reconstructed_pathway_with_GGN.sif')
returned_nx=nx.from_pandas_edgelist(returned_df)
returned_nx.number_of_edges()

1799

In [5]:
CA=CommunityAnalysis(returned_nx)

In [6]:
module_df, node_df=CA.get_communities_in_DataFrames()
node_df

Unnamed: 0,Genes,Community
0,PSMC2,Module_1
1,ELAVL1,Module_1
2,XPO1,Module_1
3,RB1,Module_1
4,HSPA8,Module_1
...,...,...
353,PPP1CA,Module_37
354,RNF123,Module_37
355,FBXO6,Module_37
356,PTK2,Module_37


In [7]:
module_df

Unnamed: 0,Community_name,Community
0,Module_1,PSMC2;ELAVL1;XPO1;RB1;HSPA8;AIP;GTF2F1;HSPD1;A...
1,Module_2,MDM2;FOXO4;SIRT1;MANF;ZNF550
2,Module_3,FHL2;SATB1;ONECUT1;CEBPB;FOXA1;DLX5;FOXO1;SEZ6...
3,Module_4,PNRC2;PTEN;WIPI1;GAPDH;STUB1;CASP8;SMARCA4;AKT...
4,Module_5,RUNX2;MYOCD;SMAD3;SMAD2;NFE4;HOXC11;MYC;MSX2;S...
5,Module_6,JDP2;SRA1;CSNK2A1;HSP90AB1;ATF2;S100A2
6,Module_7,PML;PARP1;SIRT6;GCM1;ARRB2;SENP1;SUMO3
7,Module_8,MCPH1;ARSJ;ZMIZ2;PIN1;SSX2; SSX2B;PIAS1
8,Module_9,TBP;PIAS3;OPN1LW;PRPF40A;TAB2
9,Module_10,UNC119;PRDM16;SLC30A9;NKX3-1;SPDEF;STX16


In [8]:
#### Hypergeometric test for GOA

In [9]:
GOA_bio_proc_df=pd.read_csv(f'../Source/Annotations/GOA_proteins_isoforms_prepared.tab',sep="\t")
GOA_bio_proc_df

Unnamed: 0,GO ID,DB Object Symbol
0,GO:0002250,IGKV3-7
1,GO:0002250,IGKV1D-42
2,GO:0002250,IGLV4-69
3,GO:0002250,IGLV8-61
4,GO:0002250,IGLV4-60
...,...,...
210592,GO:0006958,C1QA
210593,GO:0001682,RPP40
210594,GO:0061640,DCTN3
210595,GO:0045892,NELFCD


In [10]:
### hypergeometric test for a community

In [11]:
CA.hypergeometric_test_for_community("Module_20",
                                     reference_network=HIPPIE_nx,
                                           prior_knowledge_df=GOA_bio_proc_df,
                                           prior_knowledge_on="GO ID", 
                                           name_on="DB Object Symbol",
                                    )

Unnamed: 0,GO ID,p-value,Erichment_Score,Genes in Module,Intersecting Genes,The number of intersecting genes,Process_Gene,The number of components of prior_knowledge,Community_name
0,GO:0000723,2.091289e-06,7.205865,"[HNRNPU, RPA1, RPA3, TP53BP1, GTF2H1, RXRB, FG...","[RPA2, RPA1, RPA3]",3,"[TERT, NBN, RAD51D, RECQL4, ACTL6A, CCNE2, PAR...",58,Module_20
1,GO:0000724,9.00937e-06,6.482927,"[HNRNPU, RPA1, RPA3, TP53BP1, GTF2H1, RXRB, FG...","[RPA2, RPA1, RPA3]",3,"[PSMD14, RAD51B, AP5Z1, RAD51C, XRCC3, XRCC2, ...",94,Module_20
2,GO:0006260,1.484991e-05,6.235118,"[HNRNPU, RPA1, RPA3, TP53BP1, GTF2H1, RXRB, FG...","[RPA2, RPA1, RPA3]",3,"[NFIB, CHEK1, ORC5, KIN, RNASEH2A, RECQL4, REC...",111,Module_20
3,GO:0006281,5.233893e-06,5.489667,"[HNRNPU, RPA1, RPA3, TP53BP1, GTF2H1, RXRB, FG...","[RPA1, RPA3, GTF2H3, GTF2H1]",4,"[EYA2, MEN1, CHEK1, BACH1, FANCG, RAD51B, FANC...",283,Module_20
4,GO:0006284,3.382002e-07,8.111176,"[HNRNPU, RPA1, RPA3, TP53BP1, GTF2H1, RXRB, FG...","[RPA2, RPA1, RPA3]",3,"[OGG1, POLQ, POLB, UNG, RPA2, LIG1, XRCC1, RPA...",32,Module_20
5,GO:0006289,4.609253e-12,8.993337,"[HNRNPU, RPA1, RPA3, TP53BP1, GTF2H1, RXRB, FG...","[RPA2, RPA1, GTF2H1, RPA3, GTF2H3]",5,"[TP53, ERCC1, POLA1, RPA2, ERCC2, ERCC3, RPA1,...",42,Module_20
6,GO:0006298,3.382002e-07,8.111176,"[HNRNPU, RPA1, RPA3, TP53BP1, GTF2H1, RXRB, FG...","[RPA2, RPA1, RPA3]",3,"[TP73, MSH4, MSH5, RNASEH2A, ABL1, PCNA, RPA2,...",32,Module_20
7,GO:0006351,0.0002689555,4.778832,"[HNRNPU, RPA1, RPA3, TP53BP1, GTF2H1, RXRB, FG...","[TP53BP1, RXRB, GTF2H1]",3,"[BCLAF3, LINC00473, AIP, TCERG1, POLR3A, BTAF1...",294,Module_20
8,GO:0045893,0.003725271,3.377899,"[HNRNPU, RPA1, RPA3, TP53BP1, GTF2H1, RXRB, FG...","[RXRB, GTF2H1, TP53BP1]",3,"[TAF11L5, TAF11L4, TAF11L13, TAF11L14, TAF11L1...",729,Module_20
9,GO:0045944,0.01488312,2.560954,"[HNRNPU, RPA1, RPA3, TP53BP1, GTF2H1, RXRB, FG...","[RXRB, HNRNPU, TP53BP1]",3,"[E2F8, ZBTB7C, NKX2-6, ZNF840P, HELT, MSGN1, N...",1200,Module_20


In [12]:
returned_all=CA.hypergeometric_test_for_all_communities(reference_network=HIPPIE_nx,
                                           prior_knowledge_df=GOA_bio_proc_df,
                                           prior_knowledge_on="GO ID", 
                                           name_on="DB Object Symbol")
returned_all 

Unnamed: 0,Community_name,GO ID,p-value,Erichment_Score,Genes in Module,Intersecting Genes,The number of intersecting genes,Process_Gene,The number of components of prior_knowledge
0,Module_1,GO:0006355,3.279431e-02,1.974098,"[PSMC2, ELAVL1, XPO1, RB1, HSPA8, AIP, GTF2F1,...","[RB1, AHR, RELA]",3,"[ZNF722, A0A2R8YD15, FOXO3B, NFILZ, A0A7P0TAN4...",1006
1,Module_1,GO:0006357,5.161063e-03,2.174350,"[PSMC2, ELAVL1, XPO1, RB1, HSPA8, AIP, GTF2F1,...","[AHR, ARNT, RELA, NCOA7, RB1]",5,"[MT-RNR1, EPOP, UNCX, ZNF98, ZNF716, ZNF724, Z...",1697
2,Module_1,GO:0006366,6.119262e-04,4.283569,"[PSMC2, ELAVL1, XPO1, RB1, HSPA8, AIP, GTF2F1,...","[RB1, GTF2F1, CDK9]",3,"[NFILZ, FAM170A, HELT, TAF4, WWP2, BCL9, PIR, ...",240
3,Module_1,GO:0045892,6.473379e-04,3.461595,"[PSMC2, ELAVL1, XPO1, RB1, HSPA8, AIP, GTF2F1,...","[RB1, HSPA8, AHR, RELA]",4,"[PRAMEF33, A0A0G2JP20, TLE7, PRAMEF22, PRAMEF2...",587
4,Module_1,GO:0045893,1.403575e-02,2.526941,"[PSMC2, ELAVL1, XPO1, RB1, HSPA8, AIP, GTF2F1,...","[AIP, AHR, RELA]",3,"[TAF11L5, TAF11L4, TAF11L13, TAF11L14, TAF11L1...",729
...,...,...,...,...,...,...,...,...,...
235,Module_37,GO:0007165,6.262722e-03,2.475190,"[GSN, TGFB1I1, PXN, CRKL, PIK3R1, CBLB, NEDD8,...","[PXN, CRKL, CBLB, PIK3R1]",4,"[CCL4L2, A0A2R8Y747, A0A499FJF3, ARHGAP10, PSD...",1293
236,Module_37,GO:0016477,4.813961e-04,4.428596,"[GSN, TGFB1I1, PXN, CRKL, PIK3R1, CBLB, NEDD8,...","[PXN, PTK2, CRKL]",3,"[PRSS37, FOXE1, PODXL, ITGB1BP1, LAMA5, ARPC5,...",261
237,Module_37,GO:0016567,2.144087e-04,3.953249,"[GSN, TGFB1I1, PXN, CRKL, PIK3R1, CBLB, NEDD8,...","[CBLB, RNF123, FBXO6, NEDD8]",4,"[TMEM129, UBA6, ASB14, RFPL4A, MARCHF11, ZSWIM...",522
238,Module_37,GO:0034446,5.442407e-06,6.687988,"[GSN, TGFB1I1, PXN, CRKL, PIK3R1, CBLB, NEDD8,...","[ITGA4, PIK3R1, PXN]",3,"[NRP1, LAMA5, SRGAP2, FZD7, ABL1, FN1, ITGB3, ...",58


## significant modules in network

In [14]:
returned_all.Community_name.nunique()

32

In [15]:
returned_all["GO ID"].nunique()

97