Need to Generate PK from:
CellCall; SignaLink; 

Read in links from:

CellCall: https://github.com/ShellyCoder/cellcall/blob/master/inst/extdata/new_ligand_receptor_TFs.txt

stMLnet: https://github.com/SunXQlab/stMLnet/blob/main/data/ex_databases.rda

scseqcomm: https://gitlab.com/sysbiobig/scseqcomm/-/tree/master/data

Check rough sizes for these, and make a new database from OmniPath where:

source (receptor), target (TF), pathway, database, shortest_path_length (as from OmniPath)

In [1]:
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.8     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


### stmlnet

In [7]:
load("~/Downloads/ex_databases.rda")
stmlnet <- ex_databases$RecTF.DB

In [12]:
stmlnet <- stmlnet %>% select(source, target) %>%
    mutate(database='STMLNet')

In [8]:
summary(stmlnet)

    source             target              score          
 Length:872490      Length:872490      Min.   :0.0000000  
 Class :character   Class :character   1st Qu.:0.0000009  
 Mode  :character   Mode  :character   Median :0.0000031  
                                       Mean   :0.0000380  
                                       3rd Qu.:0.0000140  
                                       Max.   :0.3967780  

### CellCall

In [4]:
cellcall <- read.table(url('https://raw.githubusercontent.com/ShellyCoder/cellcall/master/inst/extdata/new_ligand_receptor_TFs.txt'), header = TRUE,  sep = '	',  stringsAsFactors = FALSE)  %>%
    select(source=Receptor_Symbol, target=TF_Symbol, pathway=Pathway) %>% 
    distinct() %>% 
    mutate(source = stringr::str_replace_all(source, ',', '_')) %>%
    mutate(database='cellcall')
head(cellcall)

Unnamed: 0_level_0,source,target,pathway,database
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
1,OCLN,GATA4,"hsa04530_3,hsa04530_5",cellcall
2,OCLN,YBX3,hsa04530_2,cellcall
3,CCR1,FOXO3,hsa04062_5,cellcall
4,CCR1,IKBKB,hsa04062_5,cellcall
5,CCR1,NFKB1,hsa04062_7,cellcall
6,CCR1,NFKBIA,hsa04062_6,cellcall


In [94]:
summary(cellcall)

    source             target            pathway         
 Length:3878        Length:3878        Length:3878       
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  

### scseqcomm

#### KEGG

In [5]:
load("~/Downloads/TF_PPR_KEGG_human.rda")

csc_kegg <- TF_PPR_KEGG_human  %>% 
    select(source=receptor, target=tf, pathway) %>%
    mutate(source = stringr::str_replace_all(source, ',', '_')) %>% 
    distinct() %>%
    mutate(database='scseqcomm_KEGG')
head(csc_kegg)

Unnamed: 0_level_0,source,target,pathway,database
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
1,PKM,ENO1,Glycolysis / Gluconeogenesis,scseqcomm_KEGG
2,ALDOA,ENO1,Glycolysis / Gluconeogenesis,scseqcomm_KEGG
3,GPI,ENO1,Glycolysis / Gluconeogenesis,scseqcomm_KEGG
4,MINPP1,ENO1,Glycolysis / Gluconeogenesis,scseqcomm_KEGG
5,NPR2,NME2,Purine metabolism,scseqcomm_KEGG
6,PKM,NME2,Purine metabolism,scseqcomm_KEGG


In [98]:
summary(csc_kegg)

    source             target            pathway         
 Length:69300       Length:69300       Length:69300      
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  

#### Reactome

In [6]:
load("~/Downloads/TF_PPR_REACTOME_human.rda")
scs_reactome <- TF_PPR_REACTOME_human %>% 
    select(source=receptor, target=tf, pathway) %>%
    mutate(source = stringr::str_replace_all(source, ',', '_')) %>% 
    distinct() %>%
    mutate(database='scseqcomm_REACTOME')
head(csc_kegg)

Unnamed: 0_level_0,source,target,pathway,database
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
1,PKM,ENO1,Glycolysis / Gluconeogenesis,scseqcomm_KEGG
2,ALDOA,ENO1,Glycolysis / Gluconeogenesis,scseqcomm_KEGG
3,GPI,ENO1,Glycolysis / Gluconeogenesis,scseqcomm_KEGG
4,MINPP1,ENO1,Glycolysis / Gluconeogenesis,scseqcomm_KEGG
5,NPR2,NME2,Purine metabolism,scseqcomm_KEGG
6,PKM,NME2,Purine metabolism,scseqcomm_KEGG


In [102]:
summary(scs_reactome)

    source             target            pathway         
 Length:24306       Length:24306       Length:24306      
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  

## Generate Ours

In [28]:
import omnipath
import liana as li
import decoupler as dc

In [2]:
pathways = omnipath.requests.Annotations.get(resources=['SignaLink_pathway', 'KEGG-PC', 'SIGNOR', 'NetPath'])

In [3]:
pathways['genesymbol'] = pathways['genesymbol'].str.replace('COMPLEX:', '')

In [4]:
pathways = pathways[['genesymbol', 'value', 'source']]
pathways.rename(columns={'genesymbol': 'target', 'value': 'pathway', 'source': 'database'}, inplace=True)

In [5]:
pathways

Unnamed: 0,target,pathway,database
0,AGER,Advanced glycation end-products (AGE/RAGE),NetPath
1,AIFM1,Advanced glycation end-products (AGE/RAGE),NetPath
2,AK2,Advanced glycation end-products (AGE/RAGE),NetPath
3,AKT1,Interleukin-2 (IL-2),NetPath
4,AKT1,Thymic stromal lymphopoietin (TSLP),NetPath
...,...,...,...
17204,ECHS1_HADH,Fatty acid degradation,KEGG-PC
17205,ECHS1_HADH,Fatty acid elongation,KEGG-PC
17206,ECHS1_HADH,Metabolic pathways,KEGG-PC
17207,ECHS1_HADH,"Valine, leucine and isoleucine degradation",KEGG-PC


In [6]:
resource = li.resource.select_resource('consensus')

  dot = np.dot(x * w, y)


In [7]:
resource

Unnamed: 0,ligand,receptor
0,LGALS9,PTPRC
1,LGALS9,MET
2,LGALS9,CD44
3,LGALS9,LRP1
4,LGALS9,CD47
...,...,...
4696,BMP2,ACTR2
4697,BMP15,ACTR2
4698,CSF1,CSF3R
4699,IL36G,IFNAR1


In [8]:
pathways

Unnamed: 0,target,pathway,database
0,AGER,Advanced glycation end-products (AGE/RAGE),NetPath
1,AIFM1,Advanced glycation end-products (AGE/RAGE),NetPath
2,AK2,Advanced glycation end-products (AGE/RAGE),NetPath
3,AKT1,Interleukin-2 (IL-2),NetPath
4,AKT1,Thymic stromal lymphopoietin (TSLP),NetPath
...,...,...,...
17204,ECHS1_HADH,Fatty acid degradation,KEGG-PC
17205,ECHS1_HADH,Fatty acid elongation,KEGG-PC
17206,ECHS1_HADH,Metabolic pathways,KEGG-PC
17207,ECHS1_HADH,"Valine, leucine and isoleucine degradation",KEGG-PC


In [32]:
# generate an lr geneset
lr_net = li.fun.generate_lr_geneset(resource, net=pathways, source='pathway', weight=None)
lr_net[['ligand', 'receptor']] = lr_net['interaction'].str.split('^', expand=True)

In [34]:
lr_net

Unnamed: 0,pathway,interaction,weight,ligand,receptor
56,Receptor activator of nuclear factor kappa-B l...,TNFSF11^TNFRSF11A,1.0,TNFSF11,TNFRSF11A
59,TNF-related weak inducer of apoptosis (TWEAK),TNFSF12^CD163,1.0,TNFSF12,CD163
63,TNF-related weak inducer of apoptosis (TWEAK),TNFSF12^TNFRSF12A,1.0,TNFSF12,TNFRSF12A
67,TGF,BMP7^ACVR1B,1.0,BMP7,ACVR1B
71,Receptor tyrosine kinase,EFNA1^EPHA3,1.0,EFNA1,EPHA3
...,...,...,...,...,...
1440,Mitochondrial Control of Apoptosis,FADD^TRADD,1.0,FADD,TRADD
1451,NF-KB Canonical,FADD^TRADD,1.0,FADD,TRADD
1463,SARS-COV APOPTOSIS,FADD^TRADD,1.0,FADD,TRADD
1474,TNF-alpha Signaling,FADD^TRADD,1.0,FADD,TRADD


In [30]:
# import dorothea
dorothea = dc.get_dorothea()

In [35]:
dorothea

Unnamed: 0,source,confidence,target,weight
0,MYC,A,TERT,1.000000
1,ETV4,A,TNF,1.000000
2,ETV4,A,TIMP2,1.000000
3,ETV4,A,BDKRB1,1.000000
4,ETV3,A,NFKB1,-1.000000
...,...,...,...,...
32270,GATA6,C,BHLHE40,0.333333
32271,GATA6,C,BCAS3,0.333333
32272,GATA6,C,BCAR3,0.333333
32273,GFI1B,C,NEDD4L,0.333333


In [37]:
# check for TF links within each pathway for which we have a ligand-receptor interaction

In [42]:
pathway_msk = pathways['pathway'].isin(lr_net['pathway'])
tf_msk = pathways['target'].isin(dorothea['source'])


In [44]:
pathways = pathways[pathway_msk & tf_msk]

In [48]:
receptor_tf = lr_net.merge(pathways, on=['pathway'], how='inner')
receptor_tf.drop(columns=['interaction', 'weight_y', 'weight_x'], inplace=True)

In [54]:
receptor_tf['pathway'].unique()

array(['Receptor activator of nuclear factor kappa-B ligand (RANKL)',
       'TNF-related weak inducer of apoptosis (TWEAK)', 'TGF',
       'Receptor tyrosine kinase',
       'Epidermal growth factor receptor (EGFR)', 'Hedgehog',
       'Alpha6 Beta4 Integrin', 'Multiple sclerosis', 'JAK/STAT',
       'SARS-CoV CYTOKINE STORM', 'Leptin Signaling', 'WNT', 'Wnt',
       'WNT Signaling', 'Thyroid Hormone Metabolism',
       'Thyroid-stimulating hormone (TSH)',
       'Follicle-stimulating hormone (FSH)', 'Gastrin',
       'Interleukin-1 (IL-1)', 'Brain-derived neurotrophic factor (BDNF)',
       'Dopaminergic Synapse', 'Interleukin-2 (IL-2)', 'Sonic Hedgehog',
       'COVID-19 Causal Network', 'Mitochondrial Control of Apoptosis',
       'TNF-alpha Signaling', 'Tumor necrosis factor (TNF) alpha'],
      dtype=object)

In [57]:
receptor_tf.to_csv('../../data/receptor_tf.csv', index=False)