In [1]:
import numpy as np
import scanpy as sc
import pandas as pd
import anndata as ad
import scib_metrics
from scib_metrics.benchmark import Benchmarker
import faiss
from scipy import sparse

In [2]:
def define_path(l_coef):
    adata_path = '../results/multigrate/trimodal/coef_' + str(l_coef) + '/'
    return adata_path

In [3]:
def load_adata(path):
    print("loading trimodal adata..\n\n")
    combined = ad.read_h5ad(path+"combined.h5ad")
    return combined

In [4]:
def compute_neighbours(combined):
    print("computing neighbours on latent space..\n\n")
    sc.pp.neighbors(combined,  use_rep='latent')

In [5]:
def define_variables(combined):
    print("defining variables..\n\n")
    X = combined.obsm['latent']
    labels = combined.obs['Annotation_cell_type']
    batches = combined.obs['Domain']
    distances_nn = combined.obsp['distances']
    connectivities = combined.obsp['connectivities']
    
    return X, labels, batches, distances_nn, connectivities

In [6]:
def run_metrics(X, labels, batches, distances_nn, connectivities):
    print("running metrics..\n\n")
    batch_effect_metrics = {}
    #removal of batch effects
    print("running pcr..\n\n")
    pcr = scib_metrics.utils.principal_component_regression(X=X, covariate=batches.cat.codes.to_numpy())
    print("running graph connectivity..\n\n")
    graph_connectivity = scib_metrics.graph_connectivity(X=distances_nn, labels=labels)
    print("running ilisi knn..\n\n")
    ilisi_knn = scib_metrics.ilisi_knn(X=distances_nn, batches=batches)
    print("running silhouette batch..\n\n")
    silhouette_batch = scib_metrics.silhouette_batch(X=X, labels=labels, batch=batches)
    print("running kbet..\n\n")
    kbet = scib_metrics.kbet(X=distances_nn, batches=batches)
    
    #conservation of variance from cell identity labels (label conservation metrics)
    label_conserv_metrics = {}
    print("running silhouette label.. \n\n")
    silhouette_label = scib_metrics.silhouette_label(X=X, labels=labels)
    print("running nmi, ari with leiden..\n\n")
    nmi_ari_leiden = scib_metrics.nmi_ari_cluster_labels_leiden(X=connectivities, labels=labels)
    print("running clisi knn..\n\n")
    clisi_knn = scib_metrics.clisi_knn(X=distances_nn, labels=labels)
    
    print("generating batch effect metrics dictionary.. \n\n")
    batch_effect_metrics.update([('pcr', pcr), ('graph_connectivity', graph_connectivity),
                                 ('ilisi_knn', ilisi_knn), ('silhouette_batch', silhouette_batch),
                                 ('kbet', kbet)])
    print("generating label conservation metrics dictionary.. \n\n")
    label_conserv_metrics.update([('silhouette_label', silhouette_label), ('nmi_ari_leiden', nmi_ari_leiden),
                                 ('clisi_knn', clisi_knn)])
    
    print("batch effect metrics for current integration coefficient:")
    print(batch_effect_metrics)
    print("label conservation metrics for current integration coefficient:")
    print(label_conserv_metrics)
    
    return batch_effect_metrics, label_conserv_metrics

In [7]:
def save_metrics(adata_path, batch_effect_metrics, label_conserv_metrics):    
    with open(adata_path+"integration_metrics_cell_type.txt", "w") as f:
        f.write("batch_effect_metrics:\n\n")
        for key, value in batch_effect_metrics.items():
            f.write(f"{key}: {value}\n")
        print("\n")
        f.write("label conservation metrics:\n\n")
        for key, value in label_conserv_metrics.items():
            f.write(f"{key}: {value}\n")

In [8]:
def main():
    print("Analysis starting..\n\n")
    for l_coef in [0, 1, 1e1, 1e2, 1e3, 1e4, 1e5]:       
        print("Computing metrics for integration coefficient: {}\n\n".format(l_coef))
        adata_path = define_path(l_coef)
        combined = load_adata(adata_path)
        compute_neighbours(combined)
        X, labels, batches, distances_nn, connectivities = define_variables(combined)
        batch_effect_metrics, label_conserv_metrics = run_metrics(X, labels, batches, distances_nn, connectivities)
        save_metrics(adata_path, batch_effect_metrics, label_conserv_metrics)
    
    print("analysis finished")

In [9]:
main()

Analysis starting..


Computing metrics for integration coefficient: 0


loading trimodal adata..


computing neighbours on latent space..




  from .autonotebook import tqdm as notebook_tqdm
No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


defining variables..


running metrics..


running pcr..


running graph connectivity..


running ilisi knn..


running silhouette batch..


running kbet..


running silhouette label.. 


running nmi, ari with leiden..


running clisi knn..


generating batch effect metrics dictionary.. 


generating label conservation metrics dictionary.. 


batch effect metrics for current integration coefficient:
{'pcr': 0.05129028484225273, 'graph_connectivity': 0.8156051688668696, 'ilisi_knn': 0.0, 'silhouette_batch': 0.72287476, 'kbet': (0.0009248029108550636, array([15., 15., 15., ..., 15., 15., 15.], dtype=float32), array([0.00010753, 0.00010753, 0.00010753, ..., 0.00010753, 0.00010753,
       0.00010753], dtype=float32))}
label conservation metrics for current integration coefficient:
{'silhouette_label': 0.5301571115851402, 'nmi_ari_leiden': {'nmi': 0.5950741771523855, 'ari': 0.31522169095830066}, 'clisi_knn': 1.0}


Computing metrics for integration coefficient: 1


loading trimodal adata..
