In [2]:
import numpy as np
import scanpy as sc
import pandas as pd
import anndata as ad
import scib_metrics
import scib
from scib_metrics.benchmark import Benchmarker
from scipy import sparse
import os

In [3]:
def define_path(l_coef):
    adata_path = '../results/multigrate/trimodal/all_samples/coef_' + str(l_coef) + '/'
    return adata_path

In [4]:
def load_adata(path):
    print("loading trimodal adata..\n\n")
    combined = ad.read_h5ad(path+"combined.h5ad")
    return combined

In [5]:
def compute_neighbours(combined):
    print("computing neighbours on latent space..\n\n")
    sc.pp.neighbors(combined,  use_rep='latent')

In [6]:
def define_variables(combined, label_key, batch_key):
    print("defining variables..\n\n")
    X = combined.obsm['latent']
    combined.obs[label_key+'_code'] = combined.obs[label_key].cat.codes.to_numpy()
    combined.obs[batch_key+'_code'] = combined.obs[batch_key].cat.codes.to_numpy() #for scib
    distances_nn = combined.obsp['distances']
    connectivities = combined.obsp['connectivities']

    return X, label_key, batch_key, distances_nn, connectivities

In [7]:
def run_metrics(combined, X, label_key, batch_key, distances_nn, connectivities):
    print("running metrics..\n\n")
    batch_effect_metrics = {}
    #removal of batch effects
    print("running pcr..\n\n")
    pcr = scib_metrics.utils.principal_component_regression(X=X, covariate=combined.obs[batch_key+'_code'])
    print("running graph connectivity..\n\n")
    graph_connectivity = scib_metrics.graph_connectivity(X=distances_nn, labels=combined.obs[label_key+'_code'])
    print("running ilisi knn (scib implementation).. \n\n")
    ilisi_knn = scib.metrics.ilisi_graph(combined, batch_key, "knn", use_rep='latent')
    print("running silhouette batch (scib implementation)..\n\n")
    silhouette_batch = scib.metrics.silhouette_batch(combined, batch_key, label_key, 'latent')
    
    #conservation of variance from cell identity labels (label conservation metrics)
    label_conserv_metrics = {}
    print("running silhouette label.. \n\n")
    silhouette_label =  scib.metrics.silhouette(combined, label_key, 'latent')
    print("running nmi, ari with leiden..\n\n")
    nmi_ari_leiden = scib_metrics.nmi_ari_cluster_labels_leiden(X=connectivities, labels=combined.obs[label_key+'_code'])
    print("running clisi knn..\n\n")
    clisi_knn =  scib.metrics.clisi_graph(combined, label_key, 'knn', use_rep='latent')
    
    print("generating batch effect metrics dictionary.. \n\n")
    batch_effect_metrics.update([('pcr', pcr), ('graph_connectivity', graph_connectivity),
                                 ('ilisi_knn', ilisi_knn), ('silhouette_batch', silhouette_batch)]) 
    
    print("generating label conservation metrics dictionary.. \n\n")
    label_conserv_metrics.update([('silhouette_label', silhouette_label),
                                  ('nmi_ari_leiden', nmi_ari_leiden), 
                                  ('clisi_knn', clisi_knn)])
    
    print("batch effect metrics for current integration coefficient:")
    print(batch_effect_metrics)
    print("label conservation metrics for current integration coefficient:")
    print(label_conserv_metrics)
    
    return batch_effect_metrics, label_conserv_metrics

In [8]:
def save_metrics(adata_path, batch_effect_metrics, label_conserv_metrics, label_key, batch_key):
    metrics_path = adata_path + 'scib_metrics/'
    os.makedirs(metrics_path, exist_ok=True)
    with open(metrics_path+"scib_metrics-"+batch_key+'-'+label_key+'.txt', "w") as f:
        f.write("batch_effect_metrics:\n\n")
        for key, value in batch_effect_metrics.items():
            f.write(f"{key}: {value}\n")
        print("\n")
        f.write("label conservation metrics:\n\n")
        for key, value in label_conserv_metrics.items():
            f.write(f"{key}: {value}\n")

In [9]:
def main(l_coefs=[0, 1, 1e1, 1e2, 1e3, 1e4, 1e5],
     label_key='Annotation_major_subset',
     batch_key='Domain'):

    print("analysis starting..\n\n")
    for l_coef in l_coefs:
        print(f"Computing metrics for integration coefficient: {l_coef} \n\n")
        adata_path = define_path(l_coef)
        combined = load_adata(adata_path)
        X, label_key, batch_key, distances_nn, connectivities = define_variables(combined, label_key, batch_key)
        compute_neighbours(combined)
        batch_effect_metrics, label_conserv_metrics = run_metrics(combined, X, label_key,
                                                                  batch_key, distances_nn, connectivities)
        save_metrics(adata_path, batch_effect_metrics, label_conserv_metrics, label_key, batch_key)

    print("analysis finished")

In [10]:
#main(batch_key='Domain', label_key='Annotation_major_subset')

In [11]:
#main(batch_key='Domain', label_key='Annotation_cell_type', l_coefs=[1e1, 1e2, 1e3, 1e4, 1e5])

In [None]:
main(batch_key='Domain', label_key='Annotation_major_subset', l_coefs=[1e4])

analysis starting..


Computing metrics for integration coefficient: 10000.0 


loading trimodal adata..


defining variables..


computing neighbours on latent space..




  from .autonotebook import tqdm as notebook_tqdm


running metrics..


running pcr..




No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


running graph connectivity..


running ilisi knn (scib implementation).. 


running silhouette batch (scib implementation)..




In [None]:
main(batch_key='Domain', label_key='Annotation_cell_type', l_coefs=[1e4])