In [37]:
import omnipath as op
import pandas as pd

from collections import defaultdict
import networkx as nx

In [9]:
opd = op.interactions.AllInteractions().get()

In [12]:
opd.to_csv("../data/omnipath_march_2023.tsv", sep="\t")

In [15]:
hgncURL = "https://www.genenames.org/cgi-bin/download/custom?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=md_prot_id&status=Approved&status=Entry%20Withdrawn&hgnc_dbtag=on&order_by=gd_app_sym_sort&format=text&submit=submit"

hgncTranslation = pd.read_csv(hgncURL, sep="\t")
hgncTranslation.to_csv("../data/omnipath_hgnc_uniprot_march_2023.tsv", sep="\t")
hgncTranslation

Unnamed: 0,HGNC ID,Approved symbol,Approved name,UniProt ID(supplied by UniProt)
0,HGNC:5,A1BG,alpha-1-B glycoprotein,P04217
1,HGNC:37133,A1BG-AS1,A1BG antisense RNA 1,
2,HGNC:24086,A1CF,APOBEC1 complementation factor,Q9NQ94
3,HGNC:6,A1S9T,"symbol withdrawn, see [HGNC:12469](/data/gene-...",
4,HGNC:7,A2M,alpha-2-macroglobulin,P01023
...,...,...,...,...
48778,HGNC:25820,ZYG11B,"zyg-11 family member B, cell cycle regulator",Q9C0D3
48779,HGNC:13200,ZYX,zyxin,Q15942
48780,HGNC:51695,ZYXP1,zyxin pseudogene 1,
48781,HGNC:29027,ZZEF1,zinc finger ZZ-type and EF-hand domain contain...,O43149


In [24]:
uniprot2gene = defaultdict(set)

for ri, row in hgncTranslation.iterrows():
    
    hgncGene = row["Approved symbol"]
    uniprotID = row["UniProt ID(supplied by UniProt)"]
    
    if pd.isna(uniprotID):
        continue
    
    uniprot2gene[uniprotID].add(hgncGene)
    

In [28]:
list(opd.type.unique())

['post_translational',
 'transcriptional',
 'post_transcriptional',
 'mirna_transcriptional',
 'lncrna_post_transcriptional']

In [10]:
opd

Unnamed: 0,source,target,is_directed,is_stimulation,is_inhibition,consensus_direction,consensus_stimulation,consensus_inhibition,curation_effort,references,sources,type,references_stripped,n_references,n_sources,n_primary_sources
0,P0DP25,P48995,True,False,True,True,False,True,3,TRIP:11290752;TRIP:11983166;TRIP:12601176,TRIP,post_translational,11290752;11983166;12601176,3,1,1
1,P0DP24,P48995,True,False,True,True,False,True,3,TRIP:11290752;TRIP:11983166;TRIP:12601176,TRIP,post_translational,11290752;11983166;12601176,3,1,1
2,P0DP23,P48995,True,False,True,True,False,True,3,TRIP:11290752;TRIP:11983166;TRIP:12601176,TRIP,post_translational,11290752;11983166;12601176,3,1,1
3,Q03135,P48995,True,True,False,True,True,False,13,DIP:19897728;HPRD:12732636;IntAct:19897728;Lit...,DIP;HPRD;IntAct;Lit-BM-17;TRIP,post_translational,10980191;12732636;14551243;16822931;18430726;1...,8,5,5
4,P14416,P48995,True,True,False,True,True,False,1,TRIP:18261457,TRIP,post_translational,18261457,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222657,UCA1,P49715,True,False,False,False,False,False,1,ncRDeathDB:24648007,ncRDeathDB,lncrna_post_transcriptional,24648007,1,1,1
222658,UCA1,Q16665,True,False,False,False,False,False,1,ncRDeathDB:24737584,ncRDeathDB,lncrna_post_transcriptional,24737584,1,1,1
222659,URH,Q9NYL2,True,False,False,False,False,False,1,ncRDeathDB:25013376,ncRDeathDB,lncrna_post_transcriptional,25013376,1,1,1
222660,Xist,P26358,True,False,False,False,False,False,1,ncRDeathDB:8769643,ncRDeathDB,lncrna_post_transcriptional,8769643,1,1,1


In [34]:
opd[(opd.is_directed != True)]

Unnamed: 0,source,target,is_directed,is_stimulation,is_inhibition,consensus_direction,consensus_stimulation,consensus_inhibition,curation_effort,references,sources,type,references_stripped,n_references,n_sources,n_primary_sources


In [None]:
kg = nx.DiGraph()

for ri, row in opd.iterrows():
        
    srcUID = row["source"]
    tgtUID = row["target"]
    
    srcGenes = uniprot2gene.get(srcUID, set())
    tgtGenes = uniprot2gene.get(tgtUID, set())
    
    if len(srcGenes) == 0 or len(tgtGenes) == 0:
        continue
        
    for src in srcGenes:
        kg.add_node(src, type="gene")
        
    for tgt in tgtGenes:
        kg.add_node(tgt, type="gene")

In [42]:
interactionTypes = {} # stimulation, inhibition
interactionTypes[(False, False)] = "interacts"
interactionTypes[(False, True)] = "represses"
interactionTypes[(True, False)] = "activates"
interactionTypes[(True, True)] = "interacts"

ignoredCount = 0

kg = nx.DiGraph()

for ri, row in opd.iterrows():
        
    srcUID = row["source"]
    tgtUID = row["target"]
    
    srcGenes = uniprot2gene.get(srcUID, set())
    tgtGenes = uniprot2gene.get(tgtUID, set())
    
    if len(srcGenes) == 0 or len(tgtGenes) == 0:
        continue
    
    # consensus_direction -> all resources have same direction
    # consensus_stimulation -> all resources show this as stimulation
    # consensus_inhibition -> all resources show this as consensus_inhibition
    
    # it must have a consensus direction
    if not (row["consensus_direction"]):
        ignoredCount += 1
        continue
    
    # it must either have consensus stimulation or inhibition
    if not (row["consensus_stimulation"] or row["consensus_inhibition"]):
        ignoredCount += 1
        continue

    interactionType = interactionTypes[ (row["consensus_stimulation"], row["consensus_inhibition"]) ]
    
    omnipathEvidences = str(row["references"]).strip().split(";")
    omnipathType = row["type"]
    
    
    for src in srcGenes:
        
        for tgt in tgtGenes:
            
            attrDict = {
                "type": interactionType, "source": "omnipathdb", "omnipath_evidences": omnipathEvidences, "omnipath_type": omnipathType
            }
            
            kg.add_edge( src, tgt, **attrDict )
            
            if not row["is_directed"]:
                kg.add_edge(tgt, src, **attrDict)
    

In [43]:
print("Ignored Entries", ignoredCount)

Ignored Entries 90206


In [44]:
print(kg)

DiGraph with 8851 nodes and 60388 edges
