In [1]:
# Compute iLISI for all extended use case models
# run on max254 (leg)
# run on max042
# January 15th, 2024

# Anaconda environment: scib_v1.0.1_min

In [2]:
import anndata as ad
import numpy as np
import pandas as pd
import scib
import matplotlib.pyplot as plt
from pandas.api.types import CategoricalDtype

1: Setting LC_CTYPE failed, using "C" 
2: Setting LC_COLLATE failed, using "C" 
3: Setting LC_TIME failed, using "C" 
4: Setting LC_MESSAGES failed, using "C" 
5: Setting LC_MONETARY failed, using "C" 
6: Setting LC_PAPER failed, using "C" 
7: Setting LC_MEASUREMENT failed, using "C" 


In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
models = []

In [5]:
for factor in ['_x1', '_x5', '_x10', '_x25', '_x50', '_x100']:
    if factor == '_x1':
        models += ['BAVAE_sample_100_extended_use_case',
                   'VAE_100_extended_use_case']
    else:
        models += ['BAVAE_sample_100_extended_use_case{}'.format(factor)]

In [6]:
# 2023-12-12
# Manual fix to line 594 of scib/metrics/lisi.py
# changed: print(i[1] + " has not enough neighbors.")
# to: print(str(i[1]) + " has not enough neighbors.")

In [7]:
score_dict = {}
for model in models:
    score_dict[model] = {}
    adata = ad.read_h5ad('./../../../models/ETCU/{}/adata.h5ad'.format(model))
    embedding = ad.AnnData(adata.obsm['embedding'], obs=adata.obs, obsm=adata.obsm, uns=adata.uns, obsp=adata.obsp)
    score_dict[model]['iLISI_sample'] = scib.me.lisi.ilisi_graph(embedding, batch_key='sample', multiprocessing=True)
    
    # DOGMA-seq-specific metrics
    embedding_DOGMA = embedding[embedding.obs['sample'].apply(lambda x: 'DOGMA' in x).values.tolist()].copy()
    embedding_DOGMA.obs['DOGMA_rep'] = embedding_DOGMA.obs['sample'].apply(lambda x: 'Rep1' if 'DIG' in x else 'Rep2').astype('category')
    embedding_DOGMA.obs['DOGMA_cond'] = embedding_DOGMA.obs['sample'].apply(lambda x: 'Ctrl' if 'CTRL' in x else 'Stim').astype('category')
    
    score_dict[model]['iLISI_DOGMA_replicate'] = scib.me.lisi.ilisi_graph(embedding_DOGMA, batch_key='DOGMA_rep', multiprocessing=True)
    score_dict[model]['iLISI_DOGMA_condition'] = scib.me.lisi.ilisi_graph(embedding_DOGMA, batch_key='DOGMA_cond', multiprocessing=True)
      
    # 10x annotated data specific metrics
    ### asw_label
    embedding_10x = embedding[embedding.obs['sample']== '10k_sorted_nuclei'].copy()
    score = scib.me.silhouette(
        embedding_10x,
        group_key='celltype',
        embed='embedding')
    score_dict[model]['asw_label_10k_sorted_nuclei'] = score
    
    ### nmi
    scib.cl.opt_louvain(
        embedding_10x,
        label_key='celltype',
        cluster_key='cluster',
        plot=False,
        inplace=True,
        force=True
    )

    score = scib.me.nmi(
        embedding_10x,
        group1='cluster',
        group2='celltype'
    )
    
    score_dict[model]['nmi_10k_sorted_nuclei'] = score
    
    

/tmp/6661713.1.all.q/lisi_clo5fdvj/input.mtx /tmp/6661713.1.all.q/lisi_clo5fdvj/
/tmp/6661713.1.all.q/lisi_4xj862du/input.mtx /tmp/6661713.1.all.q/lisi_4xj862du/
File has no entries. Doing nothing.
7290 has not enough neighbors.
5550 has not enough neighbors.
6792 has not enough neighbors.
7709 has not enough neighbors.
/tmp/6661713.1.all.q/lisi_vwvndw94/input.mtx /tmp/6661713.1.all.q/lisi_vwvndw94/
File has no entries. Doing nothing.
7290 has not enough neighbors.
5550 has not enough neighbors.
6792 has not enough neighbors.
7709 has not enough neighbors.
Clustering...
resolution: 0.1, nmi: 0.7482322334025422
resolution: 0.2, nmi: 0.8126506752755652
resolution: 0.3, nmi: 0.7905772548251082
resolution: 0.4, nmi: 0.7894024625745543
resolution: 0.5, nmi: 0.7877649747371462
resolution: 0.6, nmi: 0.7772751762063956
resolution: 0.7, nmi: 0.7758296982227743
resolution: 0.8, nmi: 0.7618093068583202
resolution: 0.9, nmi: 0.7622435290540109
resolution: 1.0, nmi: 0.7532487887906111
resolution: 1

In [8]:
iLISI = pd.DataFrame(score_dict).T

In [9]:
iLISI['Model'] = iLISI.index

In [10]:
iLISI['Model'].value_counts()

BAVAE_sample_100_extended_use_case         1
VAE_100_extended_use_case                  1
BAVAE_sample_100_extended_use_case_x5      1
BAVAE_sample_100_extended_use_case_x10     1
BAVAE_sample_100_extended_use_case_x25     1
BAVAE_sample_100_extended_use_case_x50     1
BAVAE_sample_100_extended_use_case_x100    1
Name: Model, dtype: int64

In [11]:
iLISI

Unnamed: 0,iLISI_sample,iLISI_DOGMA_replicate,iLISI_DOGMA_condition,asw_label_10k_sorted_nuclei,nmi_10k_sorted_nuclei,Model
BAVAE_sample_100_extended_use_case,0.378749,0.803639,0.744849,0.615471,0.812651,BAVAE_sample_100_extended_use_case
VAE_100_extended_use_case,0.002813,0.002236,0.079759,0.6225,0.840412,VAE_100_extended_use_case
BAVAE_sample_100_extended_use_case_x5,0.465444,0.838364,0.765689,0.614223,0.775028,BAVAE_sample_100_extended_use_case_x5
BAVAE_sample_100_extended_use_case_x10,0.49918,0.852161,0.77767,0.605701,0.760488,BAVAE_sample_100_extended_use_case_x10
BAVAE_sample_100_extended_use_case_x25,0.518073,0.861687,0.825707,0.608753,0.812367,BAVAE_sample_100_extended_use_case_x25
BAVAE_sample_100_extended_use_case_x50,0.525662,0.855177,0.841465,0.606339,0.791595,BAVAE_sample_100_extended_use_case_x50
BAVAE_sample_100_extended_use_case_x100,0.528294,0.856856,0.842479,0.595586,0.773547,BAVAE_sample_100_extended_use_case_x100


In [12]:
iLISI.to_csv("./../Evaluation/batch_removal_scores_publication_r1_20240125.csv", index=True)
