### calculated all the benchmarking metrics

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import anndata as ad
import scanpy as sc

import sys
sys.path.append("./")
import match
import metrics
import utils
from scipy.io import mmread
import os

sys.executable

'/home/bkzhu/python/miniconda3/envs/super_mario_testing/bin/python3'

In [4]:
meta = pd.read_csv('/atac_bench_nrz/10xpbmc/data/10x_pbmc_meta.csv')
annt = meta['celltype'].to_numpy()

In [6]:
msx = pd.read_csv("/atac_bench_nrz/10xpbmc/maestro/rna_embed.csv")
msy = pd.read_csv("/atac_bench_nrz/10xpbmc/maestro/atac_embed.csv")

ms_dist = utils.cdist_correlation(msy.to_numpy(), msx.to_numpy())
ms_full_match, ms_scores = metrics.get_knn_matching(ms_dist)
msmatch = [np.arange(msx.shape[0]),ms_full_match,ms_scores]

In [7]:
glx = pd.read_csv("/atac_bench_nrz/10xpbmc/glue/pbmc_rna.csv")
gly = pd.read_csv("/atac_bench_nrz/10xpbmc/glue/pbmc_atac.csv")

gl_dist = utils.cdist_correlation(gly.to_numpy(), glx.to_numpy())
gl_full_match, gl_scores = metrics.get_knn_matching(gl_dist)
glmatch = [np.arange(glx.shape[0]),gl_full_match,gl_scores]

In [8]:
scjx = pd.read_csv("/atac_bench_nrz/10xpbmc/scJoint/raw_labels/rna_embd.csv")
scjy = pd.read_csv("/atac_bench_nrz/10xpbmc/scJoint/raw_labels/atac_embd.csv")
scj_dist = utils.cdist_correlation(scjy.to_numpy(), scjx.to_numpy())
scj_full_match, scj_scores = metrics.get_knn_matching(scj_dist)
scjmatch = [np.arange(scjx.shape[0]),scj_full_match,scj_scores]

In [9]:
mf = pd.read_csv("/atac_bench_nrz/10xpbmc/mf/full_idx_21.csv")
mfmatch = [mf['idx1'].tolist(),mf['idx2'].tolist(),mf['score'].tolist()]

In [11]:
order = (2, 1)
acc_ann_ms = metrics.get_matching_acc(
        matching=msmatch, 
        labels1=annt, 
        labels2=annt,
        order = order
    )
acc_ann_ms

0.8026565464895635

In [12]:
order = (2, 1)
acc_ann_scj = metrics.get_matching_acc(
        matching=scjmatch, 
        labels1=annt, 
        labels2=annt,
        order = order
    )
acc_ann_scj

0.8185857089966679

In [13]:
order = (2, 1)
acc_ann_mf = metrics.get_matching_acc(
        matching=mfmatch, 
        labels1=annt, 
        labels2=annt,
        order = order
    )
acc_ann_mf

0.9215661103979461

In [14]:
order = (2, 1)
acc_ann_gl = metrics.get_matching_acc(
        matching=glmatch, 
        labels1=annt, 
        labels2=annt,
        order = order
    )
acc_ann_gl

0.9179306608884074

In [22]:
ann_listlv1 = []
ann_listlv1.extend([acc_ann_mf, acc_ann_gl, acc_ann_scj, acc_ann_ms])
ann_listlv1

[0.9215661103979461,
 0.9179306608884074,
 0.8185857089966679,
 0.8026565464895635]

### start to calculate other metrics

In [16]:
mfx = pd.read_csv("/atac_bench_nrz/10xpbmc/mf/full_embed_x0.csv")
mfy = pd.read_csv("/atac_bench_nrz/10xpbmc/mf/full_embed_y0.csv")
mf_dist = utils.cdist_correlation(mfx.to_numpy(), mfy.to_numpy())

In [17]:
mf_fos = metrics.get_foscttm(mf_dist)
scj_fos = metrics.get_foscttm(scj_dist)
lg_fos = metrics.get_foscttm(lg_dist)
ms_fos = metrics.get_foscttm(ms_dist)
gl_fos = metrics.get_foscttm(gl_dist)

In [23]:
foscttm_list = []
foscttm_list.extend([mf_fos, gl_fos, scj_fos, ms_fos])
foscttm_list

[0.038037838577156126,
 0.034041833033694166,
 0.08169077237694881,
 0.07780616339370611]

## read in slt and ari values

In [19]:
slt_f1 = []
ari_f1 = []

mf_int = pd.read_csv("/atac_bench_nrz/10xpbmc/mf/metrics.csv")
ms_int = pd.read_csv("/atac_bench_nrz/10xpbmc/maestro/metrics.csv")
gl_int = pd.read_csv("/atac_bench_nrz/10xpbmc/glue/metrics.csv")
scj_int = pd.read_csv("/atac_bench_nrz/10xpbmc/scJoint/metrics.csv")

slt_f1.extend([mf_int.loc[0,'slt_f1'], gl_int.loc[0,'slt_f1'], scj_int.loc[0,'slt_f1'], ms_int.loc[0,'slt_f1']])
ari_f1.extend([mf_int.loc[0,'ari_f1'], gl_int.loc[0,'ari_f1'], scj_int.loc[0,'ari_f1'], ms_int.loc[0,'ari_f1']])

slt_f1

[0.5387545117479168,
 0.5556004872371391,
 0.5720980927286078,
 0.5435032398561955]

In [20]:
ari_f1

[0.6143712333107971,
 0.6075829158252378,
 0.6193348831892977,
 0.6040566080199844]

## make into dataframe and save out

In [24]:
m = ["mf","gl","scj","ms"]
minfo = m

data = {'method':minfo,'slt_f1': slt_f1, 'ari_f1':ari_f1,
       'ann1':ann_listlv1, 'foscttm':foscttm_list}

matching_result = pd.DataFrame(data)
matching_result

Unnamed: 0,method,slt_f1,ari_f1,ann1,foscttm
0,mf,0.538755,0.614371,0.921566,0.038038
1,gl,0.5556,0.607583,0.917931,0.034042
2,scj,0.572098,0.619335,0.818586,0.081691
3,ms,0.543503,0.604057,0.802657,0.077806


In [25]:
matching_result.to_csv("/atac_bench_nrz/10xpbmc/metric_results.csv")

## knn true match percentage

In [26]:
# knn search tmp
k = 100
knn_match_mf = metrics.get_knn_alignment_score(
    dist=mf_dist,
    k_max=k
)
knn_match_scj = metrics.get_knn_alignment_score(
    dist=scj_dist,
    k_max=k
)
knn_match_ms = metrics.get_knn_alignment_score(
    dist=ms_dist,
    k_max=k
)
knn_match_gl = metrics.get_knn_alignment_score(
    dist=gl_dist,
    k_max=k
)

In [27]:
data = {'mf': knn_match_mf,
        'scj':knn_match_scj,
        'ms':knn_match_ms,
        'gl':knn_match_gl
       }
knn_tmp = pd.DataFrame(data)
knn_tmp

Unnamed: 0,mf,scj,ms,gl
0,0.018999,0.004750,0.004108,0.028370
1,0.031451,0.010783,0.007189,0.046598
2,0.042105,0.015918,0.010911,0.060719
3,0.052375,0.020796,0.013350,0.073941
4,0.061874,0.024134,0.016688,0.086521
...,...,...,...,...
95,0.410783,0.213479,0.237741,0.439024
96,0.412580,0.214249,0.238768,0.441078
97,0.414891,0.215019,0.240436,0.444159
98,0.418100,0.215918,0.242619,0.445956


In [38]:
import numpy as np

knn_tmp = []
knn_tmp.extend([knn_match_mf.tolist(), knn_match_gl.tolist(),
                knn_match_scj.tolist(), knn_match_ms.tolist()])
m = np.array(["mf", "gl","scj","ms"])
minfo = np.repeat(m, [100,100,100,100], axis=0).tolist()
minfo2 = minfo #* 5
knn = [item for sublist in knn_tmp for item in sublist]
step = [i for i in range(100)]*4 #*5
data = {'method':minfo2,'knn_tmp': knn, 'step':step }

knn_result = pd.DataFrame(data)
knn_result

Unnamed: 0,method,knn_tmp,step
0,mf,0.018999,0
1,mf,0.031451,1
2,mf,0.042105,2
3,mf,0.052375,3
4,mf,0.061874,4
...,...,...,...
395,ms,0.237741,95
396,ms,0.238768,96
397,ms,0.240436,97
398,ms,0.242619,98


In [39]:
# save the results as csvs
knn_result.to_csv("/atac_bench_nrz/10xpbmc/knntmp.csv")