In [1]:
from pathlib import Path
import pandas as pd
import scanpy as sc
import os, warnings 
warnings.filterwarnings('ignore') 

import stan

Create a new environment and follow the documentation of [DecoupleR](https://decoupler-py.readthedocs.io/en/latest/) for installatoin.

In [2]:
import decoupler as dc
def run_decoupler(adata, net):
    # Input: raw scRNA-seq data
    #        TF-gene dataframe
    adata.raw = adata
    sc.pp.normalize_total(adata)
    sc.pp.log1p(adata)
    dc.run_ulm(mat=adata, net=net, source='source', target='target', weight='weight', verbose=True)
    dc.run_mlm(mat=adata, net=net, source='source', target='target', weight='weight', verbose=True)
    return adata

In [3]:
net = pd.read_csv('resources/hTFtarget.csv', index_col=0)
net.head()

Unnamed: 0,TF,gene,tissue
0,AEBP2,TMEM53,colon
1,AEBP2,C1orf228,colon
2,AEBP2,FBXO31,colon
3,AEBP2,ADAMTSL5,colon
4,AEBP2,CTB-25B13.9,colon


In [4]:
net = net.drop_duplicates(subset=['TF', 'gene'])
net.columns = ['source', 'target', 'tissue']
net['weight'] = 1

In [5]:
path = Path('benchmark_decoupler')
if not os.path.exists(path):
    os.makedirs(path)

### Lymphnode

In [6]:
sample_id="V1_Human_Lymph_Node"
adata = stan.read_visium_sge(sample_id=sample_id, min_cells=5, min_counts=5000)
adata = run_decoupler(adata, net)
adata.obsm['ulm_estimate'].to_csv(path / ('ulm_'+sample_id+'.csv'))
adata.obsm['mlm_estimate'].to_csv(path / ('mlm_'+sample_id+'.csv'))

Running ulm on mat with 3991 samples and 21261 targets for 441 sources.


100%|██████████| 1/1 [00:01<00:00,  1.07s/it]


Running mlm on mat with 3991 samples and 21261 targets for 441 sources.


100%|██████████| 1/1 [00:04<00:00,  4.19s/it]


### Glioblastoma

In [7]:
sample_id="Parent_Visium_Human_Glioblastoma"
adata = stan.read_visium_sge(sample_id=sample_id, min_cells=5, min_counts=500)
adata = run_decoupler(adata, net)
adata.obsm['ulm_estimate'].to_csv(path / ('ulm_'+sample_id+'.csv'))
adata.obsm['mlm_estimate'].to_csv(path / ('mlm_'+sample_id+'.csv'))

Running ulm on mat with 3462 samples and 20950 targets for 442 sources.


100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


Running mlm on mat with 3462 samples and 20950 targets for 442 sources.


100%|██████████| 1/1 [00:03<00:00,  3.61s/it]


### Breast

In [8]:
sample_list = ["1142243F", "1160920F", "CID4290", "CID4535", "CID4465", "CID44971"]
for sample_id in sample_list:
    adata = stan.read_breast_wu("data/Breast_Wu/{}.h5ad".format(sample_id))
    adata = run_decoupler(adata, net)
    adata.obsm['ulm_estimate'].to_csv(path / ('ulm_Breast_Wu_'+sample_id+'.csv'))
    adata.obsm['mlm_estimate'].to_csv(path / ('mlm_Breast_Wu_'+sample_id+'.csv'))

Running ulm on mat with 4636 samples and 19372 targets for 442 sources.


100%|██████████| 1/1 [00:01<00:00,  1.10s/it]


Running mlm on mat with 4636 samples and 19372 targets for 442 sources.


100%|██████████| 1/1 [00:04<00:00,  4.50s/it]


Running ulm on mat with 4842 samples and 19982 targets for 440 sources.


100%|██████████| 1/1 [00:01<00:00,  1.29s/it]


Running mlm on mat with 4842 samples and 19982 targets for 440 sources.


100%|██████████| 1/1 [00:04<00:00,  4.89s/it]


Running ulm on mat with 2425 samples and 17506 targets for 440 sources.


100%|██████████| 1/1 [00:00<00:00,  1.86it/s]


Running mlm on mat with 2425 samples and 17506 targets for 440 sources.


100%|██████████| 1/1 [00:02<00:00,  2.17s/it]


Running ulm on mat with 1031 samples and 16765 targets for 439 sources.


100%|██████████| 1/1 [00:00<00:00,  4.52it/s]


Running mlm on mat with 1031 samples and 16765 targets for 439 sources.


100%|██████████| 1/1 [00:00<00:00,  1.17it/s]


Running ulm on mat with 1207 samples and 17362 targets for 439 sources.


100%|██████████| 1/1 [00:00<00:00,  3.86it/s]


Running mlm on mat with 1207 samples and 17362 targets for 439 sources.


100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


Running ulm on mat with 1161 samples and 17479 targets for 440 sources.


100%|██████████| 1/1 [00:00<00:00,  4.87it/s]


Running mlm on mat with 1161 samples and 17479 targets for 440 sources.


100%|██████████| 1/1 [00:01<00:00,  1.11s/it]


### CytAssist

In [9]:
sample_list = ["CytAssist_FFPE_Protein_Expression_Human_Tonsil",
              "CytAssist_FFPE_Protein_Expression_Human_Tonsil_AddOns"]
for sample_id in sample_list:
    adata, pdata = stan.read_cytassist(sample_id, min_cells=5, min_counts=500, remove_isotype=True)
    adata = run_decoupler(adata, net)
    adata.obsm['ulm_estimate'].to_csv(path / ('ulm_'+sample_id+'.csv'))
    adata.obsm['mlm_estimate'].to_csv(path / ('mlm_'+sample_id+'.csv'))

Running ulm on mat with 4191 samples and 18041 targets for 433 sources.


100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


Running mlm on mat with 4191 samples and 18041 targets for 433 sources.


100%|██████████| 1/1 [00:03<00:00,  3.90s/it]


Running ulm on mat with 4906 samples and 18050 targets for 433 sources.


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]


Running mlm on mat with 4906 samples and 18050 targets for 433 sources.


100%|██████████| 1/1 [00:04<00:00,  4.78s/it]


In [7]:
sample_id = 'CytAssist_FFPE_Protein_Expression_Human_Breast_Cancer'
adata, pdata = stan.read_cytassist(sample_id, min_cells=5, min_counts=500, remove_isotype=True)
adata = run_decoupler(adata, net)
adata.obsm['ulm_estimate'].to_csv(path / ('ulm_'+sample_id+'.csv'))
adata.obsm['mlm_estimate'].to_csv(path / ('mlm_'+sample_id+'.csv'))

Running ulm on mat with 4166 samples and 16911 targets for 433 sources.


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]


Running mlm on mat with 4166 samples and 16911 targets for 433 sources.


100%|██████████| 1/1 [00:06<00:00,  7.00s/it]
