In [102]:
from omnipath._core.downloader._downloader import Downloader
import omnipath

In [103]:
import liana as li

In [104]:
import pandas as pd
import numpy as np

In [105]:
dnwld = Downloader()

In [106]:
# Show homologues available
raw_taxa_url = "https://raw.githubusercontent.com/oganm/homologene/master/data-raw/taxData.tsv"
homlogene_url = "https://raw.githubusercontent.com/oganm/homologene/master/data-raw/homologene2.tsv"

In [107]:
def show_homologene():
    return dnwld.maybe_download(raw_taxa_url, callback=pd.read_table)

In [108]:
show_homologene()

Unnamed: 0,tax_id,name_txt
0,10090,Mus musculus
1,10116,Rattus norvegicus
2,28985,Kluyveromyces lactis
3,318829,Magnaporthe oryzae
4,33169,Eremothecium gossypii
5,3702,Arabidopsis thaliana
6,4530,Oryza sativa
7,4896,Schizosaccharomyces pombe
8,4932,Saccharomyces cerevisiae
9,5141,Neurospora crassa


In [109]:
# omnipath.clear_cache()

Homologene Download function

In [77]:
def _get_homologene_raw(): 
    homologene = (dnwl.maybe_download(homlogene_url,
                                     callback=pd.read_table,
                                     ).
                  astype(str)
                  )
    return homologene

In [78]:
homologene = _get_homologene_raw()

In [85]:
def download_homologene(source_organism, target_organism):
    
    homologene = _get_homologene_raw().set_index("HID")
    
    source_df = homologene[(homologene[['Taxonomy']] == source_organism).values][['Gene.Symbol']]
    target_df = homologene[(homologene[['Taxonomy']] == target_organism).values][['Gene.Symbol']]

    homologene = pd.merge(source_df, target_df,
                          right_index=True, left_index=True,
                          suffixes=('_source', '_target'),
                          how='inner')
    homologene = homologene.reset_index().rename({f'Gene.Symbol_source':'source', f'Gene.Symbol_target':'target'}, axis=1)
    homologene = homologene[['source', 'target']]
    
    return homologene

Homology Conversion

In [86]:
map_df = download_homologene('9606', '10090')

In [88]:
map_df

Unnamed: 0,source,target
0,ETFA,Etfa
1,CLDN4,Cldn4
2,HAUS2,Haus2
3,EFHC1,Efhc1
4,LRRC8D,Lrrc8d
...,...,...
17307,ODAD2,Odad2
17308,SRBD1,Srbd1
17309,WRAP53,Wrap53
17310,ZNF358,Zfp358


In [89]:
resource = li.resource.select_resource()

In [90]:
resource = li.resource.explode_complexes(resource)
# map_df = dict(zip(map_df['source'], map_df['target']))

In [91]:
def _rebuild_complexes(df, group_cols, target_col):
    df = resource.copy()
    df_grouped = resource.groupby(group_cols)['target'].agg(lambda x: '_'.join(map(str, x))).reset_index()
    df = df.drop('target', axis=1).merge(df_grouped, on=list(np.setdiff1d(group_cols, target_col)), how='inner')
    df = df.drop(f'{target_col}_complex', axis=1).rename({'target':f'{target_col}_complex'}, axis=1)
    
    return df

In [92]:
resource

Unnamed: 0,interaction,ligand,receptor,ligand_complex,receptor_complex
0,LGALS9&PTPRC,LGALS9,PTPRC,LGALS9,PTPRC
1,LGALS9&MET,LGALS9,MET,LGALS9,MET
2,LGALS9&CD44,LGALS9,CD44,LGALS9,CD44
3,LGALS9&LRP1,LGALS9,LRP1,LGALS9,LRP1
4,LGALS9&CD47,LGALS9,CD47,LGALS9,CD47
...,...,...,...,...,...
5849,BMP2&ACTR2,BMP2,ACTR2,BMP2,ACTR2
5850,BMP15&ACTR2,BMP15,ACTR2,BMP15,ACTR2
5851,CSF1&CSF3R,CSF1,CSF3R,CSF1,CSF3R
5852,IL36G&IFNAR1,IL36G,IFNAR1,IL36G,IFNAR1


In [93]:
resource = resource.merge(map_df, left_on='receptor', right_on='source', how='inner').drop(['source'], axis=1)
resource = _rebuild_complexes(resource, ['interaction', 'ligand'], 'receptor')

In [94]:
resource

Unnamed: 0,interaction,ligand,receptor,ligand_complex,receptor_complex
0,LGALS9&PTPRC,LGALS9,PTPRC,LGALS9,Ptprc
1,CD22&PTPRC,CD22,PTPRC,CD22,Ptprc
2,MRC1&PTPRC,MRC1,PTPRC,MRC1,Ptprc
3,LGALS1&PTPRC,LGALS1,PTPRC,LGALS1,Ptprc
4,LGALS9&MET,LGALS9,MET,LGALS9,Met
...,...,...,...,...,...
5720,GDF1&ACTR2,GDF1,ACTR2,GDF1,Actr2
5721,INHBA&ACTR2,INHBA,ACTR2,INHBA,Actr2
5722,BMP7&ACTR2,BMP7,ACTR2,BMP7,Actr2
5723,BMP2&ACTR2,BMP2,ACTR2,BMP2,Actr2


In [95]:
resource = resource.merge(map_df, left_on='ligand', right_on='source', how='inner').drop(['source'], axis=1)
resource = _rebuild_complexes(resource, ['interaction', 'receptor'], 'ligand')

In [96]:
resource

Unnamed: 0,interaction,ligand,receptor,receptor_complex,ligand_complex
0,CD22&PTPRC,CD22,PTPRC,Ptprc,Cd22
1,MRC1&PTPRC,MRC1,PTPRC,Ptprc,Mrc1
2,LGALS1&PTPRC,LGALS1,PTPRC,Ptprc,Lgals1
3,LGALS1&ITGB1,LGALS1,ITGB1,Itgb1,Lgals1
4,LGALS1&CD69,LGALS1,CD69,Cd69,Lgals1
...,...,...,...,...,...
5427,SERPINA7&SLC16A2,SERPINA7,SLC16A2,Slc16a2,Serpina7
5428,BTN1A1&TARM1,BTN1A1,TARM1,Tarm1,Btn1a1
5429,BTN1A1&NEGR1,BTN1A1,NEGR1,Negr1,Btn1a1
5430,IGFBP7&MPIG6B,IGFBP7,MPIG6B,Mpig6b,Igfbp7


In [97]:
# remove duplicates (1:many mappings)
resource = resource.drop_duplicates(['interaction']).drop(['interaction'], axis=1)

In [101]:
resource

Unnamed: 0,ligand,receptor,receptor_complex,ligand_complex
0,CD22,PTPRC,Ptprc,Cd22
1,MRC1,PTPRC,Ptprc,Mrc1
2,LGALS1,PTPRC,Ptprc,Lgals1
3,LGALS1,ITGB1,Itgb1,Lgals1
4,LGALS1,CD69,Cd69,Lgals1
...,...,...,...,...
5427,SERPINA7,SLC16A2,Slc16a2,Serpina7
5428,BTN1A1,TARM1,Tarm1,Btn1a1
5429,BTN1A1,NEGR1,Negr1,Btn1a1
5430,IGFBP7,MPIG6B,Mpig6b,Igfbp7
