# Yada Deconvolution

---



- Run the following cells for deconvolution using Yada.

## 1 - Import prerequisites

In [1]:
%load_ext autoreload
%autoreload 2
from IPython.display import FileLink, FileLinks
import pandas as pd
from yada import *

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 10000)

!pip install -q tslearn
!pip -q install gseapy
#!git clone https://github.com/zurkin1/Yada.git
#!mv Yada/* .

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


## 2 - Configure Input Files.

Example input files are in the ./data/ folder.


In [2]:
#This is the mixture file in the format: columns: mix1, mix2, ..., rows: gene names.
mix = './data/GBM/mix1.csv'

#Reference matrix name. Should be normalized as the mix data.
pure = './data/GBM/pure1.csv'

#True cell type proportions file.
labels = './data/Abbas/labels.csv'

## 3 - Run deconvolution.

In [3]:
#Select deconvolution method. Available methods: run_dtw_deconv (recommended for Microarray), cibersort, dsection, lasso, nnls_deconv_constrained, pxcell (recommended for RNASeq).
result = run_dtw_deconv(mix, pure)
result

#Download Result.
#FileLink('data/results.csv')
#from google.colab import files
#files.download('data/results.csv') 

Deconvolution, num_cells: 5, num_mixes: 16
 99%

Unnamed: 0,Leading_edge,cellular_tumor,Pseudopalisading_cells,Microvascular_prolif,infiltrating_tumor
0,0.009849,0.585305,0.251238,0.053805,0.00389
1,0.014396,0.67548,0.213564,0.049001,0.001111
2,0.53912,0.068831,0.027815,6.4e-05,0.276958
3,0.075561,0.576231,0.20448,0.056004,0.037505
4,0.60763,0.043203,0.025772,0.000528,0.311269
5,0.174794,0.509577,0.234483,0.040256,0.08664
6,0.644983,0.034069,0.031636,0.000961,0.336635
7,0.138335,0.557509,0.217551,0.054198,0.063244
8,0.053982,0.547143,0.235301,0.068798,0.026666
9,0.592636,0.038201,0.022763,0.00081,0.290056


## 4 - Evaluate Results.

In case true proportions are available.

In [3]:
calc_corr(labels, result)

Jurkat, 0.9932029300645067, SpearmanrResult(correlation=0.9716254134469436, pvalue=1.3811738967574605e-07)
IM-9, 0.990164022868434, SpearmanrResult(correlation=0.9716254134469436, pvalue=1.3811738967574605e-07)
Raji, 0.9857187838767321, SpearmanrResult(correlation=0.9716254134469436, pvalue=1.3811738967574605e-07)
THP-1, 0.945942905807847, SpearmanrResult(correlation=0.6909336273400487, pvalue=0.012843447163603845)


## 5 - Another Example With RNASeq.

Using xCell: ran, Hu and Butte, xCell: digitally portraying the tissue cellular heterogeneity landscape. Genome Biology (2017) 18:220.

In [5]:
#This is the mixture file in the format: columns: mix1, mix2, ..., rows: gene names.
mix = './data/Challenge/input/mix-107019.csv'

#True cell type proportions file.
#labels = './data/Abbas/labels.csv'

pxcell(mix)

Number of samples: 20, number of gene sets: 489


Unnamed: 0_level_0,mix0,mix1,mix2,mix3,mix4,mix5,mix6,mix7,mix8,mix9,mix10,mix11,mix12,mix13,mix14,mix15,mix16,mix17,mix18,mix19
cell_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Adipocytes,0.000000,0.002355,0.000000,0.000104,0.000000,0.000000,0.000003,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.003059,0.002760
Astrocytes,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
B-cells,0.060759,0.045040,0.053219,0.065533,0.000000,0.104229,0.000000,0.133439,0.080371,0.000947,0.111579,0.033880,0.000000,0.133060,0.112339,0.045254,0.058061,0.000000,0.000000,0.022430
Basophils,0.000000,0.000000,0.000000,0.025438,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.030487,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
CD4+ T-cells,0.000000,0.000000,0.064289,0.000000,0.000000,0.044092,0.000000,0.058988,0.047214,0.047096,0.000000,0.031886,0.000000,0.000000,0.008516,0.000024,0.102259,0.081592,0.001961,0.046897
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ly Endothelial cells,0.000524,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000192,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
mv Endothelial cells,0.000188,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
naive B-cells,0.000000,0.002835,0.000000,0.022392,0.004026,0.021188,0.000000,0.000000,0.000000,0.000870,0.000000,0.011545,0.003523,0.012027,0.015073,0.000000,0.017573,0.003951,0.000000,0.006758
pDC,0.000000,0.003228,0.000000,0.003777,0.002855,0.005337,0.002300,0.000907,0.000000,0.000000,0.000857,0.000000,0.002008,0.032735,0.017715,0.000000,0.000000,0.000000,0.000000,0.000000
