In [9]:
import numpy as np
import scanpy as sc
import cinemaot as co
import matplotlib.colors as colors
import matplotlib.pyplot as plt

import random
import torch
import sklearn
import os

from metrics import calculate_metrics

def set_seed(seed: int):
    # Set Python random seed
    random.seed(seed)

    # Set NumPy random seed
    np.random.seed(seed)

    # Set PyTorch random seed
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # If using multi-GPU.

        # Ensure deterministic behavior in PyTorch (can slow down computations)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    # Set sklearn random seed
    sklearn.utils.check_random_state(seed)

    # Set environment variable for reproducibility
    os.environ['PYTHONHASHSEED'] = str(seed)

set_seed(123)

import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=FutureWarning)

# ECCITE dataset

In [5]:
def evaluate_ECCITE(adata, embed):
    import warnings
    warnings.filterwarnings("ignore")
    warnings.filterwarnings("ignore", category=FutureWarning)

    print("Evaluating the performance of condition (whether or not perturbed) mixing, phase as cell type")
    # Prepare the environment
    import rpy2.robjects as robjects
    import anndata2ri
    anndata2ri.activate()
    library_path = "/gpfs/gibbs/project/wang_zuoheng/xx244/R/4.3/"  # Replace with the actual path
    robjects.r(f'.libPaths(c("{library_path}", .libPaths()))')
    calculate_metrics(adata, batch_key= 'perturbation', celltype_key="Phase", all=True, n_neighbors=15,
                      embed=embed)
    print("="*20)

    print("Evaluating the performance of condition (gene target of CRISPR) mixing, phase as cell type")
    # Prepare the environment
    import rpy2.robjects as robjects
    import anndata2ri
    anndata2ri.activate()
    library_path = "/gpfs/gibbs/project/wang_zuoheng/xx244/R/4.3/"  # Replace with the actual path
    robjects.r(f'.libPaths(c("{library_path}", .libPaths()))')
    calculate_metrics(adata, batch_key='gene_target', celltype_key="Phase", all=True, n_neighbors=15,
                      embed=embed)
    print("=" * 20)

    print("Evaluating the performance of batch mixing, phase as cell type")
    # Prepare the environment
    import rpy2.robjects as robjects
    import anndata2ri
    anndata2ri.activate()
    library_path = "/gpfs/gibbs/project/wang_zuoheng/xx244/R/4.3/"  # Replace with the actual path
    robjects.r(f'.libPaths(c("{library_path}", .libPaths()))')
    calculate_metrics(adata, batch_key='replicate', celltype_key="Phase", all=True, n_neighbors=15,
                      embed=embed)
    print("=" * 20)

In [None]:
model_names=["cinema_ot","mixscape","scCAPE","scgen"]
embed_names=['cf','ef','latent','latent']
for i in range(len(model_names)):
    model_name=model_names[i]
    print(model_name)
    adata=sc.read_h5ad("./"+model_name+"/ECCITE.h5ad")
    evaluate_ECCITE(adata, embed_names[i])

Evaluating the performance of condition (whether or not perturbed) mixing, phase as cell type
LISI---
cLISI, bLISI, LISI_F1 0.9272810895608511 0.02052525161471544 0.04016153269706483
ASW---
asw_label 0.5390674099326134
asw_batch 0.6464528658971308
kBET---
0 labels consist of a single batch or is too small. Skip.
Adding diffusion to step 4
Adding diffusion to step 4
Adding diffusion to step 5
Adding diffusion to step 6
kbet_score 0.8291901086225025
graph_conn 0.9194584123095018
NMI, ARI ---
NMI 0.42181076768240433
ARI 0.5505103402976648
positive and true positive rate---
    ASW_label      ARI       NMI   1-cLISI     bLISI  ASW_batch  \
cf   0.539067  0.55051  0.421811  0.927281  0.020525   0.646453   

    kBET Accept Rate  graph connectivity  PCR_batch  pos rate  true pos rate  \
cf           0.82919            0.919458   0.995806  0.931449       0.805104   

     F1 LISI  
cf  0.040162  
Evaluating the performance of condition (gene target of CRISPR) mixing, phase as cell type
LISI--

# ASD dataset

In [10]:
def evaluate_ASD(adata, embed):
    import warnings
    warnings.filterwarnings("ignore")
    warnings.filterwarnings("ignore", category=FutureWarning)

    print("Evaluating the performance of condition (whether or not perturbed) mixing, phase as cell type")
    # Prepare the environment
    import rpy2.robjects as robjects
    import anndata2ri
    anndata2ri.activate()
    library_path = "/gpfs/gibbs/project/wang_zuoheng/xx244/R/4.3/"  # Replace with the actual path
    robjects.r(f'.libPaths(c("{library_path}", .libPaths()))')
    calculate_metrics(adata, batch_key='Perturbation', celltype_key="CellType", all=True, n_neighbors=15,
                      embed=embed)
    print("="*20)

    print("Evaluating the performance of condition (gene target of CRISPR) mixing, phase as cell type")
    # Prepare the environment
    import rpy2.robjects as robjects
    import anndata2ri
    anndata2ri.activate()
    library_path = "/gpfs/gibbs/project/wang_zuoheng/xx244/R/4.3/"  # Replace with the actual path
    robjects.r(f'.libPaths(c("{library_path}", .libPaths()))')
    calculate_metrics(adata, batch_key='perturb01', celltype_key="CellType", all=True, n_neighbors=15,
                      embed=embed)
    print("=" * 20)

    print("Evaluating the performance of batch mixing, phase as cell type")
    # Prepare the environment
    import rpy2.robjects as robjects
    import anndata2ri
    anndata2ri.activate()
    library_path = "/gpfs/gibbs/project/wang_zuoheng/xx244/R/4.3/"  # Replace with the actual path
    robjects.r(f'.libPaths(c("{library_path}", .libPaths()))')
    calculate_metrics(adata, batch_key="Batch", celltype_key="CellType", all=True, n_neighbors=15,
                      embed=embed)
    print("=" * 20)

In [11]:
model_names=["cinema_ot","mixscape","scCAPE","scgen"]
embed_names=['cf','ef','latent','latent']
for i in range(len(model_names)):
    model_name=model_names[i]
    print(model_name)
    adata=sc.read_h5ad("./"+model_name+"/ASD.h5ad")
    evaluate_ASD(adata, embed_names[i])

cinema_ot
Evaluating the performance of condition (whether or not perturbed) mixing, phase as cell type


KeyError: 'CellType'