In [1]:
import os
os.chdir('../')
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

In [2]:
import scanpy as sc
import torch
import scarches as sca
from scarches.dataset.trvae.data_handling import remove_sparsity
import matplotlib.pyplot as plt
import numpy as np
import gdown
import anndata as ad
import pandas as pd

import milopy

INFO:lightning_fabric.utilities.seed:Global seed set to 0
  from .autonotebook import tqdm as notebook_tqdm
 captum (see https://github.com/pytorch/captum).


In [3]:
from uncert_metric import *

In [4]:
sc.settings.set_figure_params(dpi=200, frameon=False)
sc.set_figure_params(dpi=200)
sc.set_figure_params(figsize=(4, 3))
torch.set_printoptions(precision=3, sci_mode=False, edgeitems=7)

# Pancreas normal

In [5]:
condition_key = 'study'
cell_type_key = 'cell_type'
source_adata = sc.read("source/source__pancreas.h5ad")
target_adata = sc.read("target/target__pancreas.h5ad")
surgery_path = "surgery_model/surgery_model_pancreas"

In [6]:
model = sca.models.TRVAE.load(surgery_path, source_adata)

AnnData object with n_obs × n_vars = 10294 × 1000
    obs: 'batch', 'study', 'cell_type', 'size_factors'

INITIALIZING NEW NETWORK..............
Encoder Architecture:
	Input Layer in, out and cond: 1000 128 5
	Hidden Layer 1 in/out: 128 128
	Mean/Var Layer in/out: 128 10
Decoder Architecture:
	First Layer in, out and cond:  10 128 5
	Hidden Layer 1 in/out: 128 128
	Output Layer in/out:  128 1000 



In [7]:
source_adata

AnnData object with n_obs × n_vars = 10294 × 1000
    obs: 'batch', 'study', 'cell_type', 'size_factors'

In [8]:
source_adata.obs["ref_or_query"] = "ref"
target_adata.obs["ref_or_query"] = "query"

In [9]:
combined_emb = ad.concat([source_adata, target_adata])

In [10]:
adata_all_latent = sc.AnnData(model.get_latent(combined_emb.X, combined_emb.obs[condition_key]))
adata_all_latent.obs['cell_type'] = combined_emb.obs[cell_type_key].tolist()
adata_all_latent.obs['batch'] = combined_emb.obs[condition_key].tolist()
adata_all_latent.obs['ref_or_query'] = combined_emb.obs[condition_key].tolist()

In [11]:
combined_emb

AnnData object with n_obs × n_vars = 15681 × 1000
    obs: 'batch', 'study', 'cell_type', 'size_factors', 'ref_or_query'

In [12]:
adata_all_latent

AnnData object with n_obs × n_vars = 15681 × 10
    obs: 'cell_type', 'batch', 'ref_or_query'

In [13]:
adata_all_latent.obs

Unnamed: 0,cell_type,batch,ref_or_query
0,Pancreas Endothelial,Pancreas inDrop,Pancreas inDrop
1,Pancreas Acinar,Pancreas inDrop,Pancreas inDrop
2,Pancreas Acinar,Pancreas inDrop,Pancreas inDrop
3,Pancreas Acinar,Pancreas inDrop,Pancreas inDrop
4,Pancreas Endothelial,Pancreas inDrop,Pancreas inDrop
...,...,...,...
15676,Pancreas Gamma,Pancreas SS2,Pancreas SS2
15677,Pancreas Alpha,Pancreas SS2,Pancreas SS2
15678,Pancreas Delta,Pancreas SS2,Pancreas SS2
15679,Pancreas Alpha,Pancreas SS2,Pancreas SS2


In [14]:
model.get_latent().shape

(10294, 10)

In [15]:
uncertainties, _, _ = classification_uncert_milo(adata_latent = adata_all_latent, cell_type_key="cell_type")

AnnData object with n_obs × n_vars = 15681 × 10
    obs: 'cell_type', 'batch', 'ref_or_query'
    obsm: 'X_trVAE'


  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


       nhood_ixs_refined  nhood_kth_distance
10                     1            1.101127
11                     1            1.170677
24                     1            0.924800
30                     1            0.948773
31                     1            1.054615
...                  ...                 ...
15645                  1            0.940934
15648                  1            1.157476
15649                  1            0.917160
15654                  1            1.062785
15673                  1            1.094246

[1308 rows x 2 columns]


RuntimeError: Install Bioconductor library `'edgeR'` first as `BiocManager::install('edgeR').`

In [None]:
uncertainties

# Pancreas no alpha cells in reference

In [None]:
condition_key = 'study'
cell_type_key = 'cell_type'
source_adata = sc.read("source/source__pancreas_no_alpha.h5ad")
target_adata = sc.read("target/target__pancreas_no_alpha.h5ad")
surgery_path = "surgery_model/surgery_model_pancreas_no_alpha"

In [None]:
model = sca.models.TRVAE.load(surgery_path, source_adata)

In [None]:
combined_emb = ad.concat([source_adata, target_adata])

In [None]:
adata_all_latent = sc.AnnData(model.get_latent(combined_emb.X, combined_emb.obs[condition_key]))
adata_all_latent.obs['cell_type'] = combined_emb.obs[cell_type_key].tolist()
adata_all_latent.obs['batch'] = combined_emb.obs[condition_key].tolist()

In [None]:
uncertainties, _, _ = classification_uncert_milo(adata= combined_emb, adata_latent=adata_all_latent, cell_type_key="cell_type")

In [None]:
uncertainties

# PBMC normal

In [None]:
condition_key = 'batch' 
cell_type_key = 'final_annotation'
source_adata = sc.read("source/source__pbmc.h5ad")
target_adata = sc.read("target/target__pbmc.h5ad")
surgery_path = "surgery_model/surgery_model_pbmc"

In [None]:
model = sca.models.TRVAE.load(surgery_path, source_adata)

In [None]:
combined_emb = ad.concat([source_adata, target_adata])

In [None]:
adata_all_latent = sc.AnnData(model.get_latent(combined_emb.X, combined_emb.obs[condition_key]))
adata_all_latent.obs['cell_type'] = combined_emb.obs[cell_type_key].tolist()
adata_all_latent.obs['batch'] = combined_emb.obs[condition_key].tolist()

In [None]:
uncertainties, _, _ = classification_uncert_milo(adata= combined_emb, adata_latent=adata_all_latent, cell_type_key="cell_type")

In [None]:
uncertainties

# PBMC 10X and Villani

In [None]:
condition_key = 'batch' 
cell_type_key = 'final_annotation'
source_adata = sc.read("source/source__pbmc_10x_villani.h5ad")
target_adata = sc.read("target/target__pbmc_10x_villani.h5ad")
surgery_path = "surgery_model/surgery_model_pbmc_10x_villani"

In [None]:
model = sca.models.TRVAE.load(surgery_path, source_adata)

In [None]:
combined_emb = ad.concat([source_adata, target_adata])

In [None]:
combined_emb.obs.batch.unique()

In [None]:
adata_all_latent = sc.AnnData(model.get_latent(combined_emb.X, combined_emb.obs[condition_key]))
adata_all_latent.obs['cell_type'] = combined_emb.obs[cell_type_key].tolist()
adata_all_latent.obs['batch'] = combined_emb.obs[condition_key].tolist()

In [None]:
uncertainties, _, _ = classification_uncert_milo(adata= combined_emb, adata_latent=adata_all_latent, cell_type_key="cell_type")

In [None]:
uncertainties