In [1]:
import anndata as ad
import numpy as np

from datetime import datetime

from src.analysis.external.lisi import compute_lisi
import eighta_lib as ea

In [2]:
# RESULTS_FILE = "out/results/GAT_2024-05-23-16-24-07_results copy.h5ad"  # No-Graph model
RESULTS_FILE = "out/results/GAT_2024-05-23-16-23-45_results copy.h5ad"  # GAT model

adata = ea.BackedAnnData(RESULTS_FILE).filter(include=[
    "X", "obs", "obsm/X_umap", "obsm/h_2_f0", "obsm/h_2_f1", "obsm/h_2_f2", "obsm/h_2_f3", "obsm/h_2_f4"])

In [3]:
rng = np.random.default_rng(42)

for size in [10000, 25000, 50000, 100000, 250000]:
    print(f'Computing LISI for {size} cells:')
    
    start = datetime.now()
    if size < adata.shape[0]:
        subset = adata[rng.choice(adata.obs_names, size, replace=False)]
    else:
        subset = adata

    lisi_x = compute_lisi(np.array(subset.X.todense()), subset.obs[['Label']], label_colnames=["Label"]).flatten().mean()
    print(f" - x     : {lisi_x:.3f}")
    lisi_h1_f0 = compute_lisi(subset.obsm['h_2_f0'], subset.obs[['Label']], label_colnames=["Label"]).flatten().mean()
    print(f" - h_2_f0: {lisi_h1_f0:.3f}")
    lisi_h1_f1 = compute_lisi(subset.obsm['h_2_f1'], subset.obs[['Label']], label_colnames=["Label"]).flatten().mean()
    print(f" - h_2_f1: {lisi_h1_f1:.3f}")
    lisi_h1_f2 = compute_lisi(subset.obsm['h_2_f2'], subset.obs[['Label']], label_colnames=["Label"]).flatten().mean()
    print(f" - h_2_f2: {lisi_h1_f2:.3f}")
    lisi_h1_f3 = compute_lisi(subset.obsm['h_2_f3'], subset.obs[['Label']], label_colnames=["Label"]).flatten().mean()
    print(f" - h_2_f3: {lisi_h1_f3:.3f}")
    lisi_h1_f4 = compute_lisi(subset.obsm['h_2_f4'], subset.obs[['Label']], label_colnames=["Label"]).flatten().mean()
    print(f" - h_2_f4: {lisi_h1_f4:.3f}")

    print(f"took : {datetime.now() - start}\n")

Computing LISI for 10000 cells:
 - x     : 2.143
 - h_2_f0: 1.853
 - h_2_f1: 1.849
 - h_2_f2: 1.866
 - h_2_f3: 1.872
 - h_2_f4: 1.858
took : 0:00:26.756671

Computing LISI for 25000 cells:
 - x     : 2.142
 - h_2_f0: 1.710
 - h_2_f1: 1.709
 - h_2_f2: 1.716
 - h_2_f3: 1.728
 - h_2_f4: 1.721
took : 0:02:02.272972

Computing LISI for 50000 cells:
 - x     : 2.148
 - h_2_f0: 1.581
 - h_2_f1: 1.580
 - h_2_f2: 1.587
 - h_2_f3: 1.598
 - h_2_f4: 1.593
took : 0:07:57.195328

Computing LISI for 100000 cells:
 - x     : 2.157
 - h_2_f0: 1.439
 - h_2_f1: 1.440
 - h_2_f2: 1.444
 - h_2_f3: 1.455
 - h_2_f4: 1.449
took : 0:32:43.412445

Computing LISI for 250000 cells:


KeyboardInterrupt: 

In [4]:
# Takes 42 minutes for 100k cells and the default perplexity of 30
adata.obs["lisi_x"] = compute_lisi(
    X=np.asarray(adata.X.todense()),
    metadata=adata.obs[["Label"]].copy(),
    label_colnames=["Label"],
).flatten()

adata.write(IN_FILE.replace(".h5ad", "_umap.h5ad"))

In [None]:
# Takes 42 minutes for 100k cells and the default perplexity of 30
adata.obs["lisi_h"] = compute_lisi(
    X=np.asarray(adata.obsm["X_embedding"]),
    metadata=adata.obs[["Label"]].copy(),
    label_colnames=["Label"],
).flatten()

adata.write(IN_FILE.replace(".h5ad", "_umap.h5ad"))