In [1]:
import numpy as np
import pandas as pd
from TAPE.utils import CCCscore, L1error

In [2]:
## rare celltype in Limb_Muscle is skeletal muscle satellite cell, endothelial cell

## rare celltype in Lung is T cell, ciliated columnar cell of tracheobronchial tree, natural killer
## rare celltype in Marrow is monocyte, hematopoietic precursor cell

rare_celltype_lm = ['skeletal muscle satellite cell', 'endothelial cell']
rare_celltype_marrow = ['monocyte', 'hematopoietic precursor cell']
rare_celltype_lung = ['T cell', 'ciliated columnar cell of tracheobronchial tree', 'natural killer']

In [3]:
record = pd.DataFrame(columns=['CellType','Method','DataType','Tissue','CCC','L1error','abundant'])

In [4]:
### rare
names = ['Limb_Muscle','Lung', 'Marrow']
tasks = ['umi2counts','counts2umi']
for name in names:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating DWLS's prediction of "+name+' ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./DWLS/rare/'+name+'_'+task+'_pred.csv',index_col=0).T
        label = pd.read_csv('./TabularMuris/rare/'+name+'_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        pred.columns = [x.replace(".", " ") for x in pred.columns]
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        if name == 'Limb_Muscle':
            rare = rare_celltype_lm
        elif name == 'Lung':
            rare = rare_celltype_lung
        elif name == 'Marrow':
            rare = rare_celltype_marrow
        
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            if celltype in rare:
                abundant = 'rare'
            else:
                abundant = 'normal'
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                                    'DataType':task,'Tissue':name,'Method':'DWLS','abundant':abundant},ignore_index=True)

#################################################################################
################## Evaluating DWLS's prediction of Limb_Muscle (umi2counts) ##################
#################################################################################
Overall ccc score is 0.9639703307681747
Overall L1 error is 0.03637234823692142
skeletal muscle satellite cell's ccc score is 0.20986833343894493 l1 error is 0.011399586239933163
mesenchymal stem cell's ccc score is 0.9736375027521955 l1 error is 0.03967475195916457
endothelial cell's ccc score is 0.10110758373447945 l1 error is 0.03794986785674846
B cell's ccc score is 0.978186765683159 l1 error is 0.031466259654188165
macrophage's ccc score is 0.9718147353970602 l1 error is 0.03498825681261426
T cell's ccc score is 0.892410574932403 l1 error is 0.0627553668988799
#################################################################################
################## Evaluating DWLS's prediction of Limb_Muscle (counts2umi) ###########

In [5]:
pd.read_csv('./DWLS/rare/'+name+'_'+task+'_pred.csv')

Unnamed: 0,solDWLS,solDWLS.1,solDWLS.2,solDWLS.3,solDWLS.4,solDWLS.5,solDWLS.6,solDWLS.7,solDWLS.8,solDWLS.9,...,solDWLS.90,solDWLS.91,solDWLS.92,solDWLS.93,solDWLS.94,solDWLS.95,solDWLS.96,solDWLS.97,solDWLS.98,solDWLS.99
hematopoietic.precursor.cell,0.048553,0.080442,0.065969,0.058561,0.046928,0.168983,0.037849,0.003968,0.04361,0.31899,...,0.04496,0.051434,0.031635,0.00088,0.37643,0.070748,0.079038,0.084164,0.375223,3e-06
granulocyte,0.580209,0.298096,0.679533,0.759457,0.679241,0.223596,0.74307,0.109716,0.838498,0.102412,...,0.315632,0.601055,0.65109,0.521821,0.220423,0.533838,0.567403,0.709829,0.25844,0.817777
immature.B.cell,0.066148,0.046502,0.015922,0.065813,0.003703,0.063769,0.03848,0.012738,0.015875,0.018698,...,0.104551,0.01166,0.041829,0.018768,0.00716,0.050688,0.032463,0.044652,0.045324,0.005933
late.pro.B.cell,0.030947,8.4e-05,0.041591,0.025129,0.027027,0.144329,0.101177,0.54057,0.01153,0.01079,...,0.109537,0.184038,0.105455,0.359683,0.131509,0.197644,0.113423,0.032016,0.094774,0.120758
monocyte,0.235345,0.201026,0.010703,0.01716,0.020958,0.118049,0.009122,0.037134,0.009004,0.02126,...,0.218972,0.008677,0.014409,0.008801,0.030226,0.00897,0.005767,0.008909,0.026484,0.006045
macrophage,0.038789,0.286691,0.125256,0.0151,0.151676,0.261265,0.011847,0.232378,0.051527,0.320537,...,0.109236,0.108583,0.144104,0.037217,0.00658,0.13365,0.133137,0.039161,0.054954,0.040088
basophil,9e-06,0.087159,0.061026,0.058781,0.070467,0.02001,0.058455,0.063496,0.029957,0.207313,...,0.097112,0.034553,0.011478,0.05283,0.227672,0.004464,0.06877,0.08127,0.144802,0.009397


In [6]:
### rare
names = ['Limb_Muscle','Lung','Marrow']
tasks = ['counts2umi','umi2counts']
for name in names:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating CSx's prediction of "+name+' ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./CSx/rare/'+name+'_'+task+'_pred.csv',index_col=0)
        label = pd.read_csv('./TabularMuris/rare/'+name+'_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        if name == 'Limb_Muscle':
            rare = rare_celltype_lm
        elif name == 'Lung':
            rare = rare_celltype_lung
        elif name == 'Marrow':
            rare = rare_celltype_marrow
        
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            if celltype in rare:
                abundant = 'rare'
            else:
                abundant = 'normal'
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                                    'DataType':task,'Tissue':name,'Method':'CIBERSORTx','abundant':abundant},ignore_index=True)

#################################################################################
################## Evaluating CSx's prediction of Limb_Muscle (counts2umi) ##################
#################################################################################
Overall ccc score is 0.16394824115250162
Overall L1 error is 0.16398322133942456
skeletal muscle satellite cell's ccc score is 0.00017125712426621344 l1 error is 0.34350993072041086
mesenchymal stem cell's ccc score is 0.7928481587954801 l1 error is 0.0789087503885278
endothelial cell's ccc score is 0.013376954396642016 l1 error is 0.10116814691376122
B cell's ccc score is 0.24921159871105988 l1 error is 0.1662581155070458
macrophage's ccc score is 0.4186095824274919 l1 error is 0.14612749314254006
T cell's ccc score is 0.4285769988316785 l1 error is 0.1479268913642616
#################################################################################
################## Evaluating CSx's prediction of Limb_Muscle (umi2counts) #########

In [7]:
### rare
names = ['Limb_Muscle','Lung','Marrow']
tasks = ['counts2umi','umi2counts']
for name in names:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating Scaden's prediction of "+name+' ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./Scaden/rare/'+name+'_'+task+'_pred.txt',index_col=0,sep='\t')
        label = pd.read_csv('./TabularMuris/rare/'+name+'_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        if name == 'Limb_Muscle':
            rare = rare_celltype_lm
        elif name == 'Lung':
            rare = rare_celltype_lung
        elif name == 'Marrow':
            rare = rare_celltype_marrow
        
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            if celltype in rare:
                abundant = 'rare'
            else:
                abundant = 'normal'
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                                    'DataType':task,'Tissue':name,'Method':'Scaden','abundant':abundant},ignore_index=True)

#################################################################################
################## Evaluating Scaden's prediction of Limb_Muscle (counts2umi) ##################
#################################################################################
Overall ccc score is 0.6643552652735168
Overall L1 error is 0.09467162300027553
mesenchymal stem cell's ccc score is 0.5976401706998938 l1 error is 0.10240715073076295
skeletal muscle satellite cell's ccc score is 0.008383700762434318 l1 error is 0.0788344138142345
T cell's ccc score is 0.5515606054205789 l1 error is 0.09703507274064627
macrophage's ccc score is 0.6374508106927168 l1 error is 0.10208577211664029
B cell's ccc score is 0.5464776572491933 l1 error is 0.10250276852332726
endothelial cell's ccc score is 0.013106564374480103 l1 error is 0.0851645600760418
#################################################################################
################## Evaluating Scaden's prediction of Limb_Muscle (umi2counts) ######

In [8]:
### rare
names = ['Limb_Muscle','Lung','Marrow']
tasks = ['counts2umi','umi2counts']
for name in names:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating MuSiC's prediction of "+name+' ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./MuSiC/rare/'+name+'_'+task+'_pred.csv',index_col=0)
        label = pd.read_csv('./TabularMuris/rare/'+name+'_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        if name == 'Limb_Muscle':
            rare = rare_celltype_lm
        elif name == 'Lung':
            rare = rare_celltype_lung
        elif name == 'Marrow':
            rare = rare_celltype_marrow
        
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            if celltype in rare:
                abundant = 'rare'
            else:
                abundant = 'normal'
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                                    'DataType':task,'Tissue':name,'Method':'MuSiC','abundant':abundant},ignore_index=True)

#################################################################################
################## Evaluating MuSiC's prediction of Limb_Muscle (counts2umi) ##################
#################################################################################
Overall ccc score is 0.716275242759457
Overall L1 error is 0.09245014491597774
skeletal muscle satellite cell's ccc score is 0.028600676190964732 l1 error is 0.014701482144221218
mesenchymal stem cell's ccc score is 0.7761861893289466 l1 error is 0.08656730320208654
endothelial cell's ccc score is 0.2681871630657501 l1 error is 0.015509099398325206
B cell's ccc score is 0.36728657037755424 l1 error is 0.2368313216200278
macrophage's ccc score is 0.5730152019820544 l1 error is 0.12293106365737184
T cell's ccc score is 0.7986351815950237 l1 error is 0.07816059947383394
#################################################################################
################## Evaluating MuSiC's prediction of Limb_Muscle (umi2counts) #######

In [9]:
### rare
names = ['Limb_Muscle','Lung','Marrow']
tasks = ['counts2umi','umi2counts']
for name in names:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating TAPE's prediction of "+name+' ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./TAPE/rare/'+name+'_'+task+'_pred.csv',index_col=0)
        label = pd.read_csv('./TabularMuris/rare/'+name+'_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        if name == 'Limb_Muscle':
            rare = rare_celltype_lm
        elif name == 'Lung':
            rare = rare_celltype_lung
        elif name == 'Marrow':
            rare = rare_celltype_marrow
        
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            if celltype in rare:
                abundant = 'rare'
            else:
                abundant = 'normal'
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                                    'DataType':task,'Tissue':name,'Method':'TAPE','abundant':abundant},ignore_index=True)

#################################################################################
################## Evaluating TAPE's prediction of Limb_Muscle (counts2umi) ##################
#################################################################################
Overall ccc score is 0.7008579234499881
Overall L1 error is 0.09231424978057655
B cell's ccc score is 0.6771016856740762 l1 error is 0.09019884102876473
T cell's ccc score is 0.7224038273575647 l1 error is 0.08748681156178037
endothelial cell's ccc score is 0.024953831839014763 l1 error is 0.06496484459604181
macrophage's ccc score is 0.6870282347011684 l1 error is 0.09085006147190292
mesenchymal stem cell's ccc score is 0.5446792127745246 l1 error is 0.12675059662073493
skeletal muscle satellite cell's ccc score is 0.0024194382566504243 l1 error is 0.09363434340423452
#################################################################################
################## Evaluating TAPE's prediction of Limb_Muscle (umi2counts) #######

In [10]:
names = ['Limb_Muscle','Lung','Marrow']
tasks = ['counts2umi','umi2counts']
for name in names:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating RNASieve's prediction of "+name+' ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./RNAsieve/rare/'+name+'_'+task+'_pred.csv',index_col=0)
        label = pd.read_csv('./TabularMuris/rare/'+name+'_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        pred.columns = [x.replace("_", " ") for x in pred.columns]
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        if name == 'Limb_Muscle':
            rare = rare_celltype_lm
        elif name == 'Lung':
            rare = rare_celltype_lung
        elif name == 'Marrow':
            rare = rare_celltype_marrow
        
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            if celltype in rare:
                abundant = 'rare'
            else:
                abundant = 'normal'
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                                    'DataType':task,'Tissue':name,'Method':'RNAsieve','abundant':abundant},ignore_index=True)

#################################################################################
################## Evaluating RNASieve's prediction of Limb_Muscle (counts2umi) ##################
#################################################################################
Overall ccc score is 0.045106405558741514
Overall L1 error is 0.16873080009160343
B cell's ccc score is -0.00037588734980622423 l1 error is 0.16133691003836367
T cell's ccc score is -0.056404160581935765 l1 error is 0.16809842928340976
endothelial cell's ccc score is 0.002327231031612719 l1 error is 0.10457177257105915
macrophage's ccc score is 0.00810104333249131 l1 error is 0.18862652109520062
mesenchymal stem cell's ccc score is -0.08457129397600593 l1 error is 0.20932594219522255
skeletal muscle satellite cell's ccc score is -0.0019879381804356606 l1 error is 0.18042522536636463
#################################################################################
################## Evaluating RNASieve's prediction of Limb_Muscl

In [11]:
names = ['Limb_Muscle','Lung','Marrow']
tasks = ['counts2umi','umi2counts']
for name in names:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating Bisque's prediction of "+name+' ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./Bisque/rare/'+name+'_'+task+'_pred.csv',index_col=0).T
        label = pd.read_csv('./TabularMuris/rare/'+name+'_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        pred.columns = [x.replace("_", " ") for x in pred.columns]
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        if name == 'Limb_Muscle':
            rare = rare_celltype_lm
        elif name == 'Lung':
            rare = rare_celltype_lung
        elif name == 'Marrow':
            rare = rare_celltype_marrow
        
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            if celltype in rare:
                abundant = 'rare'
            else:
                abundant = 'normal'
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                                    'DataType':task,'Tissue':name,'Method':'Bisque','abundant':abundant},ignore_index=True)

#################################################################################
################## Evaluating Bisque's prediction of Limb_Muscle (counts2umi) ##################
#################################################################################
Overall ccc score is 0.46339059441980907
Overall L1 error is 0.13657427064228958
B cell's ccc score is 0.3673702345183654 l1 error is 0.17141217180053567
T cell's ccc score is 0.09879098035057535 l1 error is 0.20573876301483085
endothelial cell's ccc score is -0.008684663572000081 l1 error is 0.0904901658461522
macrophage's ccc score is 0.4466007306879527 l1 error is 0.1507767109868971
mesenchymal stem cell's ccc score is 0.6987723070383831 l1 error is 0.10897370991607057
skeletal muscle satellite cell's ccc score is 0.011306953597514803 l1 error is 0.0920541022892511
#################################################################################
################## Evaluating Bisque's prediction of Limb_Muscle (umi2counts) ####

In [12]:
record.to_csv('pseudobulk_rare_result.csv')