In [1]:
import numpy as np
import pandas as pd
from TAPE.utils import CCCscore, L1error

In [2]:
record = pd.DataFrame(columns=['CellType','Method','DataType','MissingCell','CCC','L1error'])

In [3]:
### missing & similar
tasks = ['counts2umi','umi2counts']
missing_celltypes = ['lpb','ib']
for missing_celltype in missing_celltypes:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating DWLS's prediction of "+missing_celltype+' missing ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./DWLS/similar/Marrow_'+task+'_'+missing_celltype+'_missing_pred.csv',index_col=0).T
        label = pd.read_csv('./TabularMuris/common/Marrow_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        pred.columns = [x.replace(".", " ") for x in pred.columns]
        label['Bcell_sum'] = label['late pro-B cell']+label['immature B cell']
        
        if missing_celltype == 'lpb':
            pred['Bcell_sum'] = pred['immature B cell']
            pred.drop('immature B cell',inplace=True,axis=1)
        elif missing_celltype == 'ib':
            pred['Bcell_sum'] = pred['late pro B cell']
            pred.drop('late pro B cell',inplace=True,axis=1)
        
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                           'DataType':task,'MissingCell':missing_celltype,'Method':'DWLS'},ignore_index=True)

#################################################################################
################## Evaluating DWLS's prediction of lpb missing (counts2umi) ##################
#################################################################################
Overall ccc score is 0.2519748630206778
Overall L1 error is 0.14909738151949775
hematopoietic precursor cell's ccc score is 0.3107238608331995 l1 error is 0.16322229549626144
granulocyte's ccc score is 0.056566001251268594 l1 error is 0.24652473653230575
monocyte's ccc score is 0.6620086250265291 l1 error is 0.09691570330517152
macrophage's ccc score is 0.3828030290192958 l1 error is 0.09401554699834751
basophil's ccc score is 0.5047764852436746 l1 error is 0.07667533504597586
Bcell_sum's ccc score is 0.2524931155977122 l1 error is 0.2172306717389244
#################################################################################
################## Evaluating DWLS's prediction of lpb missing (umi2counts) ##################
#######

In [4]:
pred

Unnamed: 0,hematopoietic precursor cell,granulocyte,monocyte,macrophage,basophil,Bcell_sum
solDWLS,2.206518e-09,0.161949,0.076499,0.396860,0.142997,0.221695
solDWLS.1,2.268868e-01,0.271220,0.193519,0.047709,0.190652,0.070012
solDWLS.2,8.873930e-20,0.187207,0.234310,0.141101,0.267727,0.169655
solDWLS.3,-6.849473e-21,0.056663,0.048269,0.234730,0.390070,0.270268
solDWLS.4,2.253037e-22,0.333693,0.087080,0.135424,0.263421,0.180383
...,...,...,...,...,...,...
solDWLS.95,2.728728e-03,0.111836,0.299687,0.088205,0.244601,0.252943
solDWLS.96,6.047287e-23,0.093581,0.349688,0.344452,0.118788,0.093492
solDWLS.97,-2.225002e-26,0.213949,0.345181,0.292619,0.004893,0.143359
solDWLS.98,3.787794e-20,0.260328,0.308190,0.141034,0.181150,0.109298


In [5]:
### missing & similar
tasks = ['counts2umi','umi2counts']
missing_celltypes = ['lpb','ib']
for missing_celltype in missing_celltypes:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating CSx's prediction of "+missing_celltype+' missing ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./CSx/similar/Marrow_'+task+'_'+missing_celltype+'_missing_pred.csv',index_col=0)
        label = pd.read_csv('./TabularMuris/common/Marrow_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        label['Bcell_sum'] = label['late pro-B cell']+label['immature B cell']
        
        if missing_celltype == 'lpb':
            pred['Bcell_sum'] = pred['immature B cell']
            pred.drop('immature B cell',inplace=True,axis=1)
        elif missing_celltype == 'ib':
            pred['Bcell_sum'] = pred['late pro-B cell']
            pred.drop('late pro-B cell',inplace=True,axis=1)
        
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                           'DataType':task,'MissingCell':missing_celltype,'Method':'CIBERSORTx'},ignore_index=True)

#################################################################################
################## Evaluating CSx's prediction of lpb missing (counts2umi) ##################
#################################################################################
Overall ccc score is 0.378912036993594
Overall L1 error is 0.1284371186021278
hematopoietic precursor cell's ccc score is 0.3005509695812479 l1 error is 0.13757356530006548
granulocyte's ccc score is 0.3048060793036115 l1 error is 0.2323095043109085
monocyte's ccc score is 0.7157603464314098 l1 error is 0.07699118866561756
macrophage's ccc score is 0.745506575826885 l1 error is 0.056149695676604924
basophil's ccc score is 0.14091639460234834 l1 error is 0.11398527377235514
Bcell_sum's ccc score is 0.29861570237107354 l1 error is 0.15361348388721519
#################################################################################
################## Evaluating CSx's prediction of lpb missing (umi2counts) ##################
###########

In [6]:
### missing & similar
tasks = ['counts2umi','umi2counts']
missing_celltypes = ['lpb','ib']
for missing_celltype in missing_celltypes:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating Scaden's prediction of "+missing_celltype+' missing ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./Scaden/similar/Marrow_'+task+'_'+missing_celltype+'_missing_pred.txt',index_col=0,sep='\t')
        label = pd.read_csv('./TabularMuris/common/Marrow_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        label['Bcell_sum'] = label['late pro-B cell']+label['immature B cell']
        
        if missing_celltype == 'lpb':
            pred['Bcell_sum'] = pred['immature B cell']
            pred.drop('immature B cell',inplace=True,axis=1)
        elif missing_celltype == 'ib':
            pred['Bcell_sum'] = pred['late pro-B cell']
            pred.drop('late pro-B cell',inplace=True,axis=1)
        
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                           'DataType':task,'MissingCell':missing_celltype,'Method':'Scaden'},ignore_index=True)

#################################################################################
################## Evaluating Scaden's prediction of lpb missing (counts2umi) ##################
#################################################################################
Overall ccc score is 0.6235068284436825
Overall L1 error is 0.07609255520807214
hematopoietic precursor cell's ccc score is 0.6322801286342727 l1 error is 0.050871817351695346
macrophage's ccc score is 0.7321068822026273 l1 error is 0.05406032281177143
granulocyte's ccc score is 0.5643860305498546 l1 error is 0.09010950872572547
basophil's ccc score is 0.5656758750974001 l1 error is 0.07586981273184314
monocyte's ccc score is 0.6198451105772361 l1 error is 0.07201224832141637
Bcell_sum's ccc score is 0.4847337996192537 l1 error is 0.11363162130598108
#################################################################################
################## Evaluating Scaden's prediction of lpb missing (umi2counts) ##################
###

In [7]:
### missing & similar
tasks = ['counts2umi','umi2counts']
missing_celltypes = ['lpb','ib']
for missing_celltype in missing_celltypes:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating MuSiC's prediction of "+missing_celltype+' missing ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./MuSiC/similar/Marrow_'+task+'_'+missing_celltype+'_missing_pred.csv',index_col=0,sep=',')
        label = pd.read_csv('./TabularMuris/common/Marrow_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        label['Bcell_sum'] = label['late pro-B cell']+label['immature B cell']
        
        if missing_celltype == 'lpb':
            pred['Bcell_sum'] = pred['immature B cell']
            pred.drop('immature B cell',inplace=True,axis=1)
        elif missing_celltype == 'ib':
            pred['Bcell_sum'] = pred['late pro-B cell']
            pred.drop('late pro-B cell',inplace=True,axis=1)
        
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                           'DataType':task,'MissingCell':missing_celltype,'Method':'MuSiC'},ignore_index=True)

#################################################################################
################## Evaluating MuSiC's prediction of lpb missing (counts2umi) ##################
#################################################################################
Overall ccc score is 0.676200893851492
Overall L1 error is 0.08483229525801235
hematopoietic precursor cell's ccc score is 0.31608686770456623 l1 error is 0.17502261123339177
granulocyte's ccc score is 0.8243839652456953 l1 error is 0.06043402636845816
monocyte's ccc score is 0.6659180746664995 l1 error is 0.07547848502889118
macrophage's ccc score is 0.7042954270242255 l1 error is 0.06937563994515589
basophil's ccc score is 0.9125861328349171 l1 error is 0.032561913313387404
Bcell_sum's ccc score is 0.6907579726001739 l1 error is 0.09612109565878958
#################################################################################
################## Evaluating MuSiC's prediction of lpb missing (umi2counts) ##################
#####

In [8]:
### missing & similar
tasks = ['counts2umi','umi2counts']
missing_celltypes = ['lpb','ib']
for missing_celltype in missing_celltypes:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating TAPE's prediction of "+missing_celltype+' missing ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./TAPE/similar/Marrow_'+task+'_'+missing_celltype+'_missing_pred.csv',index_col=0,sep=',')
        label = pd.read_csv('./TabularMuris/common/Marrow_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        label['Bcell_sum'] = label['late pro-B cell']+label['immature B cell']
        
        if missing_celltype == 'lpb':
            pred['Bcell_sum'] = pred['immature B cell']
            pred.drop('immature B cell',inplace=True,axis=1)
        elif missing_celltype == 'ib':
            pred['Bcell_sum'] = pred['late pro-B cell']
            pred.drop('late pro-B cell',inplace=True,axis=1)
        
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                           'DataType':task,'MissingCell':missing_celltype,'Method':'TAPE'},ignore_index=True)

#################################################################################
################## Evaluating TAPE's prediction of lpb missing (counts2umi) ##################
#################################################################################
Overall ccc score is 0.6995705794137994
Overall L1 error is 0.07034901914040144
basophil's ccc score is 0.5992026752938825 l1 error is 0.06409033652730463
granulocyte's ccc score is 0.7033180527801082 l1 error is 0.06669383578739839
hematopoietic precursor cell's ccc score is 0.7512606538474369 l1 error is 0.04797597221804934
macrophage's ccc score is 0.7247153424893769 l1 error is 0.05921088921801339
monocyte's ccc score is 0.6836960568334455 l1 error is 0.07876247938702421
Bcell_sum's ccc score is 0.5757213374539496 l1 error is 0.1053606017046187
#################################################################################
################## Evaluating TAPE's prediction of lpb missing (umi2counts) ##################
#########

In [9]:
### missing & similar
tasks = ['counts2umi','umi2counts']
missing_celltypes = ['lpb','ib']
for missing_celltype in missing_celltypes:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating RNAsieve's prediction of "+missing_celltype+' missing ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./RNAsieve/similar/Marrow_'+task+'_'+missing_celltype+'_missing_pred.csv',index_col=0,sep=',')
        label = pd.read_csv('./TabularMuris/common/Marrow_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        pred.columns = [x.replace("_", " ") for x in pred.columns]
        label['Bcell_sum'] = label['late pro-B cell']+label['immature B cell']
        
        if missing_celltype == 'lpb':
            pred['Bcell_sum'] = pred['immature B cell']
            pred.drop('immature B cell',inplace=True,axis=1)
        elif missing_celltype == 'ib':
            pred['Bcell_sum'] = pred['late pro-B cell']
            pred.drop('late pro-B cell',inplace=True,axis=1)
        
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                           'DataType':task,'MissingCell':missing_celltype,'Method':'RNAsieve'},ignore_index=True)

#################################################################################
################## Evaluating RNAsieve's prediction of lpb missing (counts2umi) ##################
#################################################################################
Overall ccc score is -0.05631376694637027
Overall L1 error is 0.14516718311876678
basophil's ccc score is -0.01283000366976816 l1 error is 0.10040375765892325
granulocyte's ccc score is 0.04128805307510485 l1 error is 0.09365923523182282
hematopoietic precursor cell's ccc score is -0.030780485605456414 l1 error is 0.26780037198814005
macrophage's ccc score is -0.02542714943214439 l1 error is 0.11497920963461793
monocyte's ccc score is 0.03793269347681882 l1 error is 0.10896227575285682
Bcell_sum's ccc score is -0.01679632221451993 l1 error is 0.1851982484462398
#################################################################################
################## Evaluating RNAsieve's prediction of lpb missing (umi2counts) #######

In [10]:
### missing & similar
tasks = ['counts2umi','umi2counts']
missing_celltypes = ['lpb','ib']
for missing_celltype in missing_celltypes:
    for task in tasks:
        print('#################################################################################')
        print("################## Evaluating Bisque's prediction of "+missing_celltype+' missing ('+task+") ##################")
        print('#################################################################################')
        pred = pd.read_csv('./Bisque/similar/Marrow_'+task+'_'+missing_celltype+'_missing_pred.csv',index_col=0,sep=',').T
        label = pd.read_csv('./TabularMuris/common/Marrow_pseudobulk_'+task.split('2')[-1]+'_label.txt',sep='\t',index_col=0)
        pred.columns = [x.replace("_", " ") for x in pred.columns]
        label['Bcell_sum'] = label['late pro-B cell']+label['immature B cell']
        
        if missing_celltype == 'lpb':
            pred['Bcell_sum'] = pred['immature B cell']
            pred.drop('immature B cell',inplace=True,axis=1)
        elif missing_celltype == 'ib':
            pred['Bcell_sum'] = pred['late pro-B cell']
            pred.drop('late pro-B cell',inplace=True,axis=1)
        
        inter = pred.columns.intersection(label.columns)
        label = label[inter]
        pred = pred[inter]
        overall_ccc = CCCscore(label.values,pred.values)
        overall_l1 = L1error(label.values,pred.values)
        print('Overall ccc score is',overall_ccc)
        print('Overall L1 error is',overall_l1)
        for i in range(pred.shape[1]):
            score = CCCscore(label.iloc[:,i].values,pred.iloc[:,i].values)
            error = L1error(label.iloc[:,i].values,pred.iloc[:,i].values)
            celltype = pred.columns[i]
            print(celltype+"'s ccc score is",score,"l1 error is",error)
            record = record.append({'CellType':celltype,'CCC':score,'L1error':error,
                           'DataType':task,'MissingCell':missing_celltype,'Method':'Bisque'},ignore_index=True)

#################################################################################
################## Evaluating Bisque's prediction of lpb missing (counts2umi) ##################
#################################################################################
Overall ccc score is 0.4563394066070459
Overall L1 error is 0.14210878351054101
basophil's ccc score is 0.6858881631155442 l1 error is 0.10075361926005721
granulocyte's ccc score is 0.7094144391420409 l1 error is 0.0991015608283241
hematopoietic precursor cell's ccc score is 0.2997820644343948 l1 error is 0.1353446281592577
macrophage's ccc score is 0.3252093569342155 l1 error is 0.17099736574993613
monocyte's ccc score is 0.6332746499078205 l1 error is 0.11355189127879477
Bcell_sum's ccc score is 0.20310185807615505 l1 error is 0.23290363578687606
#################################################################################
################## Evaluating Bisque's prediction of lpb missing (umi2counts) ##################
#####

In [11]:
record.to_csv('./pseudobulk_similar_result.csv')