In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os

In [2]:
projdir = '/u/home/t/terencew/project-cluo/igvf/pilot/multiome/'

donors = list(np.loadtxt(f'{projdir}/txt/donors.txt', dtype=str))
samples = list(np.loadtxt(f'{projdir}/txt/samples.txt', dtype=str))
s = samples[0]

gex_methods = pd.Index(['demuxlet', 'freemuxlet',  'vireo', 'vireo_nogenos',
    'souporcell', 'souporcell_nogenos', 'scsplit', 'scsplit_nogenos', 'demuxalot'])
atac_methods = pd.Index(['demuxlet', 'freemuxlet',  'vireo', 'vireo_nogenos',
    'souporcell', 'souporcell_nogenos', 'scsplit', 'scsplit_nogenos', 'scavengers'])

doub_experiments = np.loadtxt(f'{projdir}/ambient/ambisim/cov_doub_test/txt/experiments.txt', dtype=str)
mux_experiments = np.loadtxt(f'{projdir}/ambient/ambisim/cov_mux_test/txt/experiments.txt', dtype=str)

In [4]:
doub_gex = pd.read_csv(f'{projdir}/csv/ambisim/prop_doub/ambisim_gex.csv.gz', sep='\t', header=0, index_col=0)
doub_atac = pd.read_csv(f'{projdir}/csv/ambisim/prop_doub/ambisim_atac.csv.gz', sep='\t', header=0, index_col=0)
mux_gex = pd.read_csv(f'{projdir}/csv/ambisim/mux_test/ambisim_gex.csv.gz', sep='\t', header=0, index_col=0)
mux_atac = pd.read_csv(f'{projdir}/csv/ambisim/mux_test/ambisim_atac.csv.gz', sep='\t', header=0, index_col=0)

In [5]:
doub_gex.shape, doub_atac.shape, mux_gex.shape, mux_atac.shape

((1361090, 6), (1361090, 6), (2386320, 6), (2386320, 6))

In [6]:
### getting droplet type column
doub_gex['droplet_type'] = doub_gex['donor_id'].copy()
doub_gex_mask = (doub_gex['droplet_type'] == 'doublet') | (doub_gex['droplet_type'] == 'unassigned')
doub_gex.loc[doub_gex_mask, 'droplet_type'] = 'non_singlet'
doub_gex.loc[~doub_gex_mask, 'droplet_type'] = 'singlet'

doub_atac['droplet_type'] = doub_atac['donor_id'].copy()
doub_atac_mask = (doub_atac['droplet_type'] == 'doublet') | (doub_atac['droplet_type'] == 'unassigned')
doub_atac.loc[doub_atac_mask, 'droplet_type'] = 'non_singlet'
doub_atac.loc[~doub_atac_mask, 'droplet_type'] = 'singlet'

mux_gex['droplet_type'] = mux_gex['donor_id'].copy()
mux_gex_mask = (mux_gex['droplet_type'] == 'doublet') | (mux_gex['droplet_type'] == 'unassigned')
mux_gex.loc[mux_gex_mask, 'droplet_type'] = 'non_singlet'
mux_gex.loc[~mux_gex_mask, 'droplet_type'] = 'singlet'

mux_atac['droplet_type'] = mux_atac['donor_id'].copy()
mux_atac_mask = (mux_atac['droplet_type'] == 'doublet') | (mux_atac['droplet_type'] == 'unassigned')
mux_atac.loc[mux_atac_mask, 'droplet_type'] = 'non_singlet'
mux_atac.loc[~mux_atac_mask, 'droplet_type'] = 'singlet'

### get accuracy metrics

In [7]:
doub_gex_drop_acc = pd.DataFrame(index=gex_methods, columns=doub_experiments)

exp = '0_0'
tmp_doub_gex = doub_gex[doub_gex['exp'] == exp]

mask = tmp_doub_gex['method'] == 'truth'
truth_doub_gex = tmp_doub_gex[mask]
method_doub_gex = tmp_doub_gex[~mask]

n_drops = truth_doub_gex.shape[0]
tmp_methods = method_doub_gex['method'].unique()

drop_acc = []
for method in tmp_methods:
    final_doub_gex = method_doub_gex[method_doub_gex['method'] == method]
    acc = np.sum(final_doub_gex['droplet_type'] == truth_doub_gex['droplet_type']) / n_drops
    drop_acc.append(acc)
doub_gex_drop_acc[exp] = drop_acc

In [8]:
doub_gex_drop_acc

Unnamed: 0,0_0,0_10,0_20,0_30,10_0,10_10,10_20,10_30,20_0,20_10,20_20,20_30,30_0,30_10,30_20,30_30
demuxlet,0.984011,,,,,,,,,,,,,,,
freemuxlet,1.0,,,,,,,,,,,,,,,
vireo,1.0,,,,,,,,,,,,,,,
vireo_nogenos,1.0,,,,,,,,,,,,,,,
souporcell,1.0,,,,,,,,,,,,,,,
souporcell_nogenos,0.999884,,,,,,,,,,,,,,,
scsplit,0.858417,,,,,,,,,,,,,,,
scsplit_nogenos,0.88773,,,,,,,,,,,,,,,
demuxalot,0.999537,,,,,,,,,,,,,,,


In [9]:
doub_gex_drop_acc = pd.DataFrame(index=gex_methods, columns=doub_experiments)

for exp in doub_experiments:
    tmp_doub_gex = doub_gex[doub_gex['exp'] == exp]
    mask = tmp_doub_gex['method'] == 'truth'
    truth_doub_gex = tmp_doub_gex[mask]
    method_doub_gex = tmp_doub_gex[~mask]

    n_drops = truth_doub_gex.shape[0]
    tmp_methods = method_doub_gex['method'].unique()

    drop_acc = []
    for method in tmp_methods:
        final_doub_gex = method_doub_gex[method_doub_gex['method'] == method]
        acc = np.sum(final_doub_gex['droplet_type'] == truth_doub_gex['droplet_type']) / n_drops
        drop_acc.append(acc)
    doub_gex_drop_acc[exp] = drop_acc

In [10]:
doub_gex_drop_acc

Unnamed: 0,0_0,0_10,0_20,0_30,10_0,10_10,10_20,10_30,20_0,20_10,20_20,20_30,30_0,30_10,30_20,30_30
demuxlet,0.984011,0.965983,0.945747,0.924755,0.91017,0.902072,0.89379,0.890742,0.77005,0.782141,0.779126,0.787108,0.616712,0.635571,0.656306,0.687938
freemuxlet,1.0,0.951538,0.87586,0.765166,1.0,0.925377,0.832037,0.727146,0.999763,0.914962,0.815879,0.713306,0.998334,0.904204,0.807569,0.708021
vireo,1.0,0.999767,0.998717,0.9972,0.976844,0.987288,0.988392,0.990089,0.89366,0.917445,0.936388,0.948433,0.804904,0.840779,0.864041,0.891979
vireo_nogenos,1.0,0.999767,0.998367,0.996617,0.98464,0.992467,0.992687,0.991371,0.934942,0.951035,0.959241,0.968185,0.893584,0.919494,0.929885,0.931075
souporcell,1.0,0.998835,0.9965,0.992184,0.996975,0.993997,0.985258,0.976679,0.982967,0.979657,0.9636,0.943702,0.973098,0.961658,0.939969,0.896138
souporcell_nogenos,0.999884,0.998486,0.994283,0.990667,0.996393,0.993173,0.984562,0.970499,0.980128,0.977173,0.95877,0.937315,0.967742,0.956402,0.9338,0.884492
scsplit,0.858417,0.971109,0.894411,0.842044,0.902374,0.945504,0.865351,0.816115,0.919328,0.911295,0.87902,0.805795,0.881562,0.875896,0.824653,0.789542
scsplit_nogenos,0.88773,0.972973,0.891495,0.843677,0.903188,0.948093,0.874985,0.809818,0.945706,0.914252,0.870303,0.789119,0.878348,0.880554,0.853007,0.79085
demuxalot,0.999537,0.999301,0.999417,0.9986,0.951478,0.955979,0.96065,0.968284,0.759877,0.791721,0.804217,0.8356,0.532437,0.570831,0.619409,0.673203


In [11]:
doub_gex_donor_acc = pd.DataFrame(index=gex_methods, columns=doub_experiments)

for exp in doub_experiments:
    tmp_doub_gex = doub_gex[doub_gex['exp'] == exp]
    mask = tmp_doub_gex['method'] == 'truth'
    
    truth_doub_gex = tmp_doub_gex[mask]
    method_doub_gex = tmp_doub_gex[~mask]
    
#     ### singleton-donor accuracy
    truth_doub_gex = truth_doub_gex[truth_doub_gex['droplet_type'] == 'singlet']
    method_doub_gex = method_doub_gex[method_doub_gex['droplet_type'] == 'singlet']

    tmp_methods = method_doub_gex['method'].unique()

    donor_acc = []
    for method in tmp_methods:
        final_doub_gex = method_doub_gex[method_doub_gex['method'] == method]

        ### singleton-donor accuracy
        final_doub_gex = final_doub_gex[final_doub_gex.index.isin(truth_doub_gex.index)]
        tmp_truth = truth_doub_gex[truth_doub_gex.index.isin(final_doub_gex.index)]
        n_drops = tmp_truth.shape[0]
        assert np.sum(final_doub_gex.index == tmp_truth.index) == n_drops
        
        acc = np.sum(final_doub_gex['donor_id'] == tmp_truth['donor_id']) / n_drops
        donor_acc.append(acc)
    
    doub_gex_donor_acc[exp] = donor_acc

In [12]:
doub_gex_donor_acc

Unnamed: 0,0_0,0_10,0_20,0_30,10_0,10_10,10_20,10_30,20_0,20_10,20_20,20_30,30_0,30_10,30_20,30_30
demuxlet,0.999647,0.999737,0.999702,0.999319,0.997315,0.997989,0.996814,0.998173,0.99278,0.994234,0.992305,0.991318,0.980892,0.987606,0.98291,0.984131
freemuxlet,1.0,1.0,1.0,1.0,1.0,1.0,0.999854,1.0,0.999527,0.999868,0.999409,0.998978,0.997258,0.997201,0.994787,0.995881
vireo,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.999868,1.0,1.0,0.999817,1.0,1.0,0.999822,1.0
vireo_nogenos,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.999691,1.0,0.0,0.0,0.0,0.0
souporcell,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.999759,1.0,0.999701,0.999656,0.999511,0.999591,0.999241,0.999826
souporcell_nogenos,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9997,0.999828,0.999385,0.999453,0.999543,0.999124
scsplit,1.0,1.0,1.0,1.0,0.999871,0.999868,0.999853,1.0,0.999614,0.999452,0.99939,0.998947,0.995679,0.998551,0.99536,0.995448
scsplit_nogenos,1.0,1.0,1.0,1.0,0.999871,1.0,0.999853,0.999831,0.99925,0.999454,0.99893,0.998044,0.996341,0.998269,0.997604,0.995251
demuxalot,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.999804,1.0,0.999776,1.0,1.0,1.0


In [13]:
doub_atac_drop_acc = pd.DataFrame(index=atac_methods, columns=doub_experiments)

for exp in doub_experiments:
    tmp_doub_atac = doub_atac[doub_atac['exp'] == exp]
    mask = tmp_doub_atac['method'] == 'truth'
    truth_doub_atac = tmp_doub_atac[mask]
    method_doub_atac = tmp_doub_atac[~mask]

    n_drops = truth_doub_atac.shape[0]
    tmp_methods = method_doub_atac['method'].unique()

    drop_acc = []
    for method in tmp_methods:
        final_doub_atac = method_doub_atac[method_doub_atac['method'] == method]
        acc = np.sum(final_doub_atac['droplet_type'] == truth_doub_atac['droplet_type']) / n_drops
        drop_acc.append(acc)
    doub_atac_drop_acc[exp] = drop_acc

In [14]:
doub_atac_drop_acc

Unnamed: 0,0_0,0_10,0_20,0_30,10_0,10_10,10_20,10_30,20_0,20_10,20_20,20_30,30_0,30_10,30_20,30_30
demuxlet,0.993859,0.979031,0.970249,0.95497,0.970561,0.961629,0.949042,0.940299,0.882186,0.884211,0.880434,0.881963,0.764909,0.7758,0.789892,0.802971
freemuxlet,1.0,0.898765,0.87586,0.706136,1.0,0.925377,0.797098,0.696362,0.999763,0.899349,0.815879,0.6945,1.0,0.897635,0.807569,0.693761
vireo,0.971614,0.974487,0.966515,0.958703,0.946009,0.950683,0.949971,0.938316,0.894606,0.908102,0.913064,0.902898,0.851922,0.862757,0.865583,0.862507
vireo_nogenos,0.983316,0.976817,0.962898,0.938987,0.965208,0.96057,0.943703,0.912547,0.930329,0.928445,0.910472,0.874867,0.898822,0.888318,0.872701,0.829352
souporcell,0.998146,0.987884,0.965815,0.925805,0.996975,0.978814,0.9509,0.901353,0.989946,0.967238,0.930145,0.867179,0.979407,0.946727,0.903547,0.833749
souporcell_nogenos,0.999189,0.9887,0.970132,0.938637,0.998022,0.98411,0.961114,0.91861,0.99101,0.974098,0.943574,0.896866,0.976908,0.955925,0.92419,0.866786
scsplit,0.821458,0.892241,0.821374,0.74265,0.841168,0.876766,0.801393,0.732743,0.783416,0.84175,0.773,0.707629,0.75991,0.775681,0.734844,0.682115
scsplit_nogenos,0.822964,0.885368,0.818224,0.745684,0.747266,0.73976,0.701219,0.631646,0.777857,0.696511,0.772411,0.71437,0.756458,0.7672,0.731878,0.684135
scavengers,0.990036,0.893523,0.715436,0.701003,0.985106,0.813324,0.726872,0.635844,0.911639,0.830278,0.732477,0.647782,0.925366,0.761706,0.7944,0.591444


In [15]:
doub_atac_donor_acc = pd.DataFrame(index=atac_methods, columns=doub_experiments)

for exp in doub_experiments:
    tmp_doub_atac = doub_atac[doub_atac['exp'] == exp]
    mask = tmp_doub_atac['method'] == 'truth'
    
    truth_doub_atac = tmp_doub_atac[mask]
    method_doub_atac = tmp_doub_atac[~mask]
    
#     ### singleton-donor accuracy
    truth_doub_atac = truth_doub_atac[truth_doub_atac['droplet_type'] == 'singlet']
    method_doub_atac = method_doub_atac[method_doub_atac['droplet_type'] == 'singlet']

    tmp_methods = method_doub_atac['method'].unique()

    donor_acc = []
    for method in tmp_methods:
        final_doub_atac = method_doub_atac[method_doub_atac['method'] == method]

        ### singleton-donor accuracy
        final_doub_atac = final_doub_atac[final_doub_atac.index.isin(truth_doub_atac.index)]
        tmp_truth = truth_doub_atac[truth_doub_atac.index.isin(final_doub_atac.index)]
        n_drops = tmp_truth.shape[0]
        assert np.sum(final_doub_atac.index == tmp_truth.index) == n_drops
        
        acc = np.sum(final_doub_atac['donor_id'] == tmp_truth['donor_id']) / n_drops
        donor_acc.append(acc)
    
    doub_atac_donor_acc[exp] = donor_acc

In [16]:
doub_atac_donor_acc

Unnamed: 0,0_0,0_10,0_20,0_30,10_0,10_10,10_20,10_30,20_0,20_10,20_20,20_30,30_0,30_10,30_20,30_30
demuxlet,0.999883,0.999608,0.999559,0.999832,0.999281,0.999326,0.998649,0.998792,0.99638,0.997178,0.997153,0.997498,0.992997,0.994245,0.994159,0.991745
freemuxlet,0.502259,0.99987,1.0,0.750794,1.0,1.0,0.753277,0.999833,0.999527,0.504603,0.999409,0.751533,0.996548,0.502861,0.994787,0.509421
vireo,0.999762,0.999867,1.0,1.0,0.999754,0.999863,0.999696,1.0,0.999207,0.999565,0.999839,0.999814,0.998882,0.99907,0.999139,0.998414
vireo_nogenos,0.999764,1.0,0.999109,0.99983,0.999518,0.999594,0.999699,0.999135,0.998601,0.999299,0.998424,0.998009,0.0,0.0,0.0,0.0
souporcell,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.999663,0.999522,0.999867,0.999851,0.999656,0.999514,0.99932,0.998483,0.999122
souporcell_nogenos,1.0,1.0,1.0,1.0,1.0,0.999869,0.999562,0.999497,0.999284,0.999735,0.999851,0.999828,0.999513,0.999048,0.998324,0.998058
scsplit,0.999577,1.0,0.999846,0.999822,0.999032,0.999719,0.99905,0.998897,0.994413,0.998228,0.997473,0.997222,0.992638,0.99417,0.991854,0.991502
scsplit_nogenos,0.999578,1.0,0.999691,0.999644,0.475241,0.35798,0.274996,0.464271,0.995894,0.183827,0.99798,0.998052,0.991817,0.993434,0.993063,0.992816
scavengers,0.891867,0.99452,0.826579,0.951754,0.929955,0.812582,0.810141,0.823367,0.79058,0.79298,0.797385,0.784729,0.781065,0.854935,0.255564,0.860546


### mux

In [17]:
mux_gex_drop_acc = pd.DataFrame(index=gex_methods, columns=mux_experiments)

for exp in mux_experiments:
    tmp_mux_gex = mux_gex[mux_gex['exp'] == exp]
    mask = tmp_mux_gex['method'] == 'truth'
    truth_mux_gex = tmp_mux_gex[mask]
    method_mux_gex = tmp_mux_gex[~mask]

    n_drops = truth_mux_gex.shape[0]
    tmp_methods = method_mux_gex['method'].unique()

    drop_acc = []
    for method in gex_methods:
        final_mux_gex = method_mux_gex[method_mux_gex['method'] == method]
        acc = np.sum(final_mux_gex['droplet_type'] == truth_mux_gex['droplet_type']) / n_drops
        drop_acc.append(acc)
    mux_gex_drop_acc[exp] = drop_acc

In [18]:
mux_gex_drop_acc

Unnamed: 0,2_0,2_10,2_20,2_30,6_0,6_10,6_20,6_30,8_0,8_10,...,12_20,12_30,14_0,14_10,14_20,14_30,16_0,16_10,16_20,16_30
demuxlet,0.970029,0.952237,0.907475,0.836524,0.955876,0.896763,0.798919,0.676534,0.94729,0.874186,...,0.731595,0.594442,0.937222,0.849901,0.726031,0.586697,0.932367,0.85234,0.715581,0.575347
freemuxlet,0.996888,0.990354,0.966215,0.946261,0.933,0.907946,0.903866,0.897527,0.912379,0.900698,...,0.897003,0.897233,0.900164,0.899895,0.89875,0.897301,0.89942,0.898331,0.897421,0.8977
vireo,0.999885,0.996049,0.963861,0.930924,0.999768,0.983049,0.910683,0.841298,1.0,0.98,...,0.888745,0.800096,0.999415,0.971631,0.891043,0.805469,0.999768,0.972452,0.888352,0.798155
vireo_nogenos,1.0,0.997095,0.973043,0.963144,0.999419,0.990583,0.954401,0.927841,0.999541,0.987326,...,0.789287,0.651815,0.936404,0.84339,0.636693,0.692978,0.845244,0.795961,0.69238,0.726402
souporcell,0.999885,0.998257,0.984697,0.978362,0.997445,0.989052,0.973557,0.950785,0.994488,0.980814,...,0.924847,0.899389,0.973463,0.94582,0.91218,0.766659,0.966937,0.927629,0.898952,0.751078
souporcell_nogenos,0.999654,0.997792,0.983402,0.977173,0.997097,0.98917,0.97156,0.95019,0.99621,0.982791,...,0.941482,0.910768,0.985504,0.95454,0.926428,0.903272,0.97993,0.955527,0.923331,0.898658
scsplit,1.0,0.995235,0.965274,0.952205,0.942638,0.924426,0.89846,0.874465,0.923633,0.89814,...,0.876239,0.871721,0.911503,0.899198,0.884386,0.849654,0.887471,0.89623,0.879402,0.848107
scsplit_nogenos,1.0,0.995119,0.966804,0.94638,0.941013,0.91183,0.899401,0.879933,0.925701,0.903372,...,0.877773,0.873278,0.907412,0.893733,0.874226,0.851206,0.891415,0.889109,0.879991,0.84643
demuxalot,0.999885,0.978036,0.841436,0.680537,0.999536,0.951265,0.765425,0.553495,0.999426,0.942442,...,0.721095,0.499461,0.999766,0.937798,0.724746,0.505254,0.999536,0.934283,0.723943,0.487542


In [19]:
mux_gex_donor_acc = pd.DataFrame(index=gex_methods, columns=mux_experiments)

for exp in mux_experiments:
    tmp_mux_gex = mux_gex[mux_gex['exp'] == exp]
    mask = tmp_mux_gex['method'] == 'truth'
    
    truth_mux_gex = tmp_mux_gex[mask]
    method_mux_gex = tmp_mux_gex[~mask]
    
#     ### singleton-donor accuracy
    truth_mux_gex = truth_mux_gex[truth_mux_gex['droplet_type'] == 'singlet']
    method_mux_gex = method_mux_gex[method_mux_gex['droplet_type'] == 'singlet']

    donor_acc = []
    for method in gex_methods:
        final_mux_gex = method_mux_gex[method_mux_gex['method'] == method]

        ### singleton-donor accuracy
        final_mux_gex = final_mux_gex[final_mux_gex.index.isin(truth_mux_gex.index)]
        tmp_truth = truth_mux_gex[truth_mux_gex.index.isin(final_mux_gex.index)]
        n_drops = tmp_truth.shape[0]
        assert np.sum(final_mux_gex.index == tmp_truth.index) == n_drops
        
        acc = np.sum(final_mux_gex['donor_id'] == tmp_truth['donor_id']) / n_drops
        donor_acc.append(acc)
    
    mux_gex_donor_acc[exp] = donor_acc

In [20]:
mux_gex_donor_acc

Unnamed: 0,2_0,2_10,2_20,2_30,6_0,6_10,6_20,6_30,8_0,8_10,...,12_20,12_30,14_0,14_10,14_20,14_30,16_0,16_10,16_20,16_30
demuxlet,0.998965,0.998133,0.995875,0.994192,0.9984,0.996948,0.989125,0.987291,0.998669,0.995289,...,0.987521,0.975952,0.997662,0.992101,0.988599,0.975983,0.997112,0.994021,0.984133,0.971953
freemuxlet,1.0,1.0,1.0,1.0,1.0,0.999869,0.998822,0.996553,1.0,1.0,...,0.995396,0.977306,1.0,0.999741,0.993762,0.975509,1.0,0.99935,0.995538,0.966373
vireo,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,0.999831,0.999871,1.0,0.999851,1.0
vireo_nogenos,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.999715,1.0,0.999869,...,0.235906,0.195136,0.923428,0.760876,0.267747,0.147747,0.676312,0.270882,0.154253,0.127464
souporcell,1.0,1.0,1.0,1.0,1.0,1.0,0.999868,0.999059,1.0,1.0,...,0.998415,0.979279,1.0,0.999871,0.996875,0.088759,1.0,1.0,0.986124,0.078125
souporcell_nogenos,1.0,1.0,1.0,1.0,1.0,1.0,0.999867,0.999593,1.0,1.0,...,0.997078,0.987591,0.99987,0.932206,0.928581,0.983847,0.999871,0.940263,0.932217,0.854543
scsplit,1.0,1.0,1.0,1.0,1.0,1.0,0.997837,0.994059,1.0,0.999211,...,0.974272,0.899958,0.931978,0.996732,0.967991,0.712011,1.0,0.996711,0.888378,0.628686
scsplit_nogenos,1.0,1.0,1.0,1.0,1.0,1.0,0.998092,0.993531,1.0,0.999472,...,0.974806,0.902405,0.937353,0.997239,0.961932,0.696525,0.939263,0.995094,0.894751,0.58362
demuxalot,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.999737,1.0,1.0,...,1.0,0.999699,1.0,0.999861,1.0,1.0,1.0,0.99986,0.999621,1.0


In [21]:
mux_atac_drop_acc = pd.DataFrame(index=atac_methods, columns=mux_experiments)

for exp in mux_experiments:
    tmp_mux_atac = mux_atac[mux_atac['exp'] == exp]
    mask = tmp_mux_atac['method'] == 'truth'
    truth_mux_atac = tmp_mux_atac[mask]
    method_mux_atac = tmp_mux_atac[~mask]

    n_drops = truth_mux_atac.shape[0]
    
    drop_acc = []
    for method in atac_methods:
        final_mux_atac = method_mux_atac[method_mux_atac['method'] == method]
        acc = np.sum(final_mux_atac['droplet_type'] == truth_mux_atac['droplet_type']) / n_drops
        drop_acc.append(acc)
    mux_atac_drop_acc[exp] = drop_acc

In [22]:
mux_atac_drop_acc

Unnamed: 0,2_0,2_10,2_20,2_30,6_0,6_10,6_20,6_30,8_0,8_10,...,12_20,12_30,14_0,14_10,14_20,14_30,16_0,16_10,16_20,16_30
demuxlet,0.98928,0.972342,0.915951,0.824278,0.980725,0.934667,0.822658,0.687827,0.974621,0.924302,...,0.776192,0.614684,0.974398,0.907104,0.76305,0.596131,0.97007,0.901482,0.757743,0.589722
freemuxlet,0.906052,0.899128,0.898646,0.898229,0.899907,0.897587,0.898578,0.897646,0.899518,0.898256,...,0.897003,0.897952,0.898761,0.89943,0.898634,0.897659,0.898492,0.898331,0.897539,0.89782
vireo,0.98513,0.973969,0.949029,0.917132,0.97666,0.942084,0.909978,0.854969,0.96819,0.938721,...,0.877301,0.805246,0.958031,0.926404,0.865351,0.802364,0.958237,0.92331,0.858438,0.796239
vireo_nogenos,0.989741,0.982452,0.960565,0.933421,0.969345,0.894526,0.865319,0.770804,0.790308,0.708372,...,0.731713,0.716972,0.771452,0.766074,0.74203,0.737401,0.783411,0.779736,0.759746,0.749401
souporcell,0.998617,0.997327,0.99176,0.983712,0.973874,0.961271,0.954754,0.940918,0.952458,0.942558,...,0.90184,0.902384,0.903437,0.901872,0.899334,0.897659,0.900348,0.899265,0.899423,0.898539
souporcell_nogenos,0.998386,0.997676,0.990936,0.982285,0.973177,0.728899,0.692796,0.698526,0.947749,0.714302,...,0.843322,0.855432,0.909282,0.787234,0.837207,0.870074,0.906613,0.776234,0.810976,0.876497
scsplit,0.971643,0.957467,0.929017,0.881821,0.838946,0.781048,0.782466,0.750951,0.844855,0.797907,...,0.711656,0.662355,0.780804,0.749797,0.70536,0.660736,0.786891,0.7411,0.712284,0.672976
scsplit_nogenos,0.969683,0.955956,0.92678,0.879325,0.823734,0.819305,0.76378,0.739539,0.831994,0.793721,...,0.706701,0.655049,0.782324,0.755028,0.709448,0.662766,0.781671,0.754757,0.714403,0.675611
scavengers,0.964957,0.935386,0.923014,0.910355,0.898978,0.894644,0.663063,0.615074,0.898714,0.898023,...,0.36798,0.370104,0.898059,0.447622,0.896648,0.302364,0.253712,0.257383,0.590154,0.269166


In [None]:
mux_atac_donor_acc = pd.DataFrame(index=atac_methods, columns=mux_experiments)

for exp in mux_experiments:
    tmp_mux_atac = mux_atac[mux_atac['exp'] == exp]
    mask = tmp_mux_atac['method'] == 'truth'
    
    truth_mux_atac = tmp_mux_atac[mask]
    method_mux_atac = tmp_mux_atac[~mask]
    
#     ### singleton-donor accuracy
    truth_mux_atac = truth_mux_atac[truth_mux_atac['droplet_type'] == 'singlet']
    method_mux_atac = method_mux_atac[method_mux_atac['droplet_type'] == 'singlet']

    donor_acc = []
    for method in atac_methods:
        final_mux_atac = method_mux_atac[method_mux_atac['method'] == method]

        ### singleton-donor accuracy
        final_mux_atac = final_mux_atac[final_mux_atac.index.isin(truth_mux_atac.index)]
        tmp_truth = truth_mux_atac[truth_mux_atac.index.isin(final_mux_atac.index)]
        n_drops = tmp_truth.shape[0]
        assert np.sum(final_mux_atac.index == tmp_truth.index) == n_drops
        
        acc = np.sum(final_mux_atac['donor_id'] == tmp_truth['donor_id']) / n_drops
        donor_acc.append(acc)
    
    mux_atac_donor_acc[exp] = donor_acc

In [None]:
mux_atac_donor_acc

### let's look at heatmaps

In [None]:
fig, axes = plt.subplots(1, figsize=(12, 8))

sns.heatmap(doub_gex_drop_acc, annot=True, cmap='bwr')

In [None]:
fig, axes = plt.subplots(1, figsize=(12, 8))

sns.heatmap(doub_gex_donor_acc, annot=True, cmap='bwr')

In [None]:
fig, axes = plt.subplots(1, figsize=(12, 8))

sns.heatmap(doub_atac_drop_acc, annot=True, cmap='bwr')

In [None]:
fig, axes = plt.subplots(1, figsize=(12, 8))

sns.heatmap(doub_atac_donor_acc, annot=True, cmap='bwr')

In [None]:
fig, axes = plt.subplots(1, figsize=(16, 6))

sns.heatmap(mux_gex_drop_acc, annot=True, cmap='bwr')

In [None]:
fig, axes = plt.subplots(1, figsize=(16, 6))

sns.heatmap(mux_gex_donor_acc, annot=True, cmap='bwr')

In [None]:
fig, axes = plt.subplots(1, figsize=(16, 6))

sns.heatmap(mux_atac_drop_acc, annot=True, cmap='bwr')

In [None]:
fig, axes = plt.subplots(1, figsize=(16, 6))

sns.heatmap(mux_atac_donor_acc, annot=True, cmap='bwr')

In [33]:
doub_gex_drop_acc.to_csv(f'{projdir}/csv/final_figures/figure3/doub_gex_drop_acc_low_cov.csv.gz',
                         sep='\t', header=True, index=True)

doub_gex_donor_acc.to_csv(f'{projdir}/csv/final_figures/figure3/doub_gex_donor_acc_low_cov.csv.gz',
                         sep='\t', header=True, index=True)

doub_atac_drop_acc.to_csv(f'{projdir}/csv/final_figures/figure3/doub_atac_drop_acc_low_cov.csv.gz',
                         sep='\t', header=True, index=True)

doub_atac_donor_acc.to_csv(f'{projdir}/csv/final_figures/figure3/doub_atac_donor_acc_low_cov.csv.gz',
                         sep='\t', header=True, index=True)

In [34]:
mux_gex_drop_acc.to_csv(f'{projdir}/csv/final_figures/figure3/mux_gex_drop_acc_low_cov.csv.gz',
                         sep='\t', header=True, index=True)

mux_gex_donor_acc.to_csv(f'{projdir}/csv/final_figures/figure3/mux_gex_donor_acc_low_cov.csv.gz',
                         sep='\t', header=True, index=True)

mux_atac_drop_acc.to_csv(f'{projdir}/csv/final_figures/figure3/mux_atac_drop_acc_low_cov.csv.gz',
                         sep='\t', header=True, index=True)

mux_atac_donor_acc.to_csv(f'{projdir}/csv/final_figures/figure3/mux_atac_donor_acc_low_cov.csv.gz',
                         sep='\t', header=True, index=True)