In [None]:
import warnings
warnings.filterwarnings('ignore')

import os
import scanpy as sc
import decoupler as dc
import numpy as np
import pandas as pd

In [None]:
in_dir = '../../results/spatial/filtered_data'
out_dir = '../../results/04_spatial/27_decoupler'
os.makedirs(out_dir, exist_ok=True)

In [None]:
sample_list = ['HC01', 'HC02', 'HC03', 'HC05',
               'SSc4733', 'SSc4994', 'SSc5380', 'SSc5722',
               'SSc-HL01', 'SSc-HL05', 'SSc-HL06', 'SSc-HL11',
               'SSc-HL13', 'SSc-HL25', 'SSc-HL33', 'SSc-HL35']

In [None]:
def process_adata(adata):
    sc.pp.normalize_total(adata)
    sc.pp.log1p(adata)
    
#     # Identify highly variable genes
#     sc.pp.highly_variable_genes(adata)

#     # Filter higly variable genes
#     adata.raw = adata
#     adata = adata[:, adata.var.highly_variable]

    # Scale the data
    # sc.pp.scale(adata)
    
    return adata

## 1. Pathway activity

In [6]:
os.makedirs(f"{out_dir}/pathway_activity", exist_ok=True)

progeny = dc.get_progeny(organism='human', top=500)
for sample in sample_list:
    adata = sc.read_h5ad(f'{in_dir}/{sample}.h5ad')
    adata = process_adata(adata)
    
    dc.run_mlm(mat=adata, net=progeny, 
               source='source', target='target',
               weight='weight', verbose=False, use_raw=False)
    
    acts = dc.get_acts(adata, obsm_key='mlm_estimate')    
    acts.write_h5ad(f'{out_dir}/pathway_activity/{sample}.h5ad')
    
    df = pd.DataFrame(acts.X)
    df.columns = acts.var_names
    df.index = acts.obs_names
    df.to_csv(f'{out_dir}/pathway_activity/{sample}.csv')

## 2. TF activity

In [None]:
os.makedirs(f"{out_dir}/tf_activity", exist_ok=True)

net = dc.get_collectri(organism='human', split_complexes=False)
for sample in sample_list:
    adata = sc.read_h5ad(f'{in_dir}/{sample}.h5ad')
    adata = process_adata(adata)
    
    dc.run_mlm(mat=adata,net=net, 
               source='source', target='target',
               weight='weight', verbose=False, use_raw=False)
    
    acts = dc.get_acts(adata, obsm_key='mlm_estimate')    
    acts.write_h5ad(f'{out_dir}/tf_activity/{sample}.h5ad')
    
    df = pd.DataFrame(acts.X)
    df.columns = acts.var_names
    df.index = acts.obs_names
    df.to_csv(f'{out_dir}/tf_activity/{sample}.csv')