# Estimate TF activities with decoupler - Comparing the effect of the number of targets on the estimated activities

## Libraries

In [1]:
import pandas as pd
import numpy as np

import decoupler as dc

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


## Benchmark data

### Load KnockTF data
The benchmark data can be downloaded from Zenodo as shown in scripts/benchmark.ipynb

In [2]:
mat = pd.read_csv('../../data/knockTF_expr.csv', index_col=0)
obs = pd.read_csv('../../data/knockTF_meta.csv', index_col=0)

### Filter knockTF data
Filtering is based on log fold-change of perturbed transcription factors

In [3]:
msk = obs['logFC'] < -1
mat = mat[msk]
obs = obs[msk]

## Load GRNs

In [4]:
doro_ABC = pd.read_csv('../../data/networks/filtered_dorothea_ABC.csv')
regnet = pd.read_csv('../../data/networks/filtered_regnetwork.csv')
collecTRI = pd.read_csv('../../output/CollecTRI/CollecTRI_GRN.csv')

## Data preparation before activity estimation 

In [5]:
decouple_kws =  {'source': 'source', 'target': 'target', 'weight': 'weight', 'min_n': 5}

In [6]:
mat_regnet, obs_regnet, var_regnet, regnet, groupby_regnet = dc.format_benchmark_inputs(mat = mat, obs = obs, sign = -1, net = regnet, by = 'experiment', perturb='TF', groupby = None, decouple_kws=decouple_kws)
mat_doro_ABC, obs_doro_ABC, var_doro_ABC, doro_ABC, groupby_doro_ABC = dc.format_benchmark_inputs(mat = mat, obs = obs, sign = -1, net = doro_ABC, by = 'experiment', perturb='TF', groupby = None, decouple_kws=decouple_kws)
mat_collecTRI, obs_collecTRI, var_collecTRI, collecTRI, groupby_collecTRI = dc.format_benchmark_inputs(mat = mat, obs = obs, sign = -1, net = collecTRI, by = 'experiment', perturb='TF', groupby = None, decouple_kws=decouple_kws)

In [7]:
mat_regnet_new = pd.DataFrame(mat_regnet.A, index=obs_regnet.index, columns=var_regnet.index)
mat_doro_ABC_new = pd.DataFrame(mat_doro_ABC.A, index=obs_doro_ABC.index, columns=var_doro_ABC.index)
mat_collecTRI_new = pd.DataFrame(mat_collecTRI.A, index=obs_collecTRI.index, columns=var_collecTRI.index)

## Activity estimation with decoupler

In [8]:
results_regnet = dc.run_ulm(mat_regnet_new, net=regnet, source='source', target='target', weight='weight', verbose=True, min_n=5)
results_doro = dc.run_ulm(mat_doro_ABC_new, net=doro_ABC, source='source', target='target', weight='weight', verbose=True, min_n=5)
results_collecTRI = dc.run_ulm(mat_collecTRI_new, net=collecTRI, source='source', target='target', weight='weight', verbose=True, min_n=5)

54 features of mat are empty, they will be removed.
Running ulm on mat with 232 samples and 21931 targets for 638 sources.
55 features of mat are empty, they will be removed.
Running ulm on mat with 214 samples and 21930 targets for 297 sources.
52 features of mat are empty, they will be removed.
Running ulm on mat with 279 samples and 21933 targets for 766 sources.


### Save results
activity scores are safed

In [9]:
pd.DataFrame.to_csv(results_regnet[0], '../../output/benchmark/regnet_activity.csv')
pd.DataFrame.to_csv(results_doro[0], '../../output/benchmark/dorothea_activity.csv')
pd.DataFrame.to_csv(results_collecTRI[0], '../../output/benchmark/collecTRI_activity.csv')