# Automated annotation

In [1]:
import os
import tempfile

import anndata
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import scvi
import seaborn as sns
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
scvi.settings.seed = 0
print("Last run with scvi-tools version:", scvi.__version__)

Seed set to 0


Last run with scvi-tools version: 1.2.0


In [3]:
sc.set_figure_params(figsize=(6, 6), frameon=False)
sns.set_theme()
torch.set_float32_matmul_precision("high")
# save_dir = tempfile.TemporaryDirectory()

%config InlineBackend.print_figure_kwargs={"facecolor": "w"}
%config InlineBackend.figure_format="retina"

In [4]:
ref = sc.read(
    filename="/home/sources/atlases/SRP398011.h5ad",
)
ref

AnnData object with n_obs × n_vars = 22882 × 53678
    obs: 'Orig.ident', 'nCount_RNA', 'nFeature_RNA', 'Percent.mt', 'Seurat_clusters', 'Celltype', 'Dataset', 'Tissue', 'Organ', 'Condition', 'Genotype', 'Libraries', 'ACE'
    var: 'features'
    obsm: 'X_umap'

In [15]:
ref.obs['Libraries'].nunique()

1

In [5]:
l1 = sc.read_h5ad(
    filename="L1_annotation.h5ad"
)
l2 = sc.read_h5ad(
    filename="L2_annotation.h5ad"
)

In [17]:
ref.var_names

Index(['AT1G01120', 'AT1G01610', 'AT1G01620', 'AT1G01680', 'AT1G01900',
       'AT1G02065', 'AT1G02205', 'AT1G02220', 'AT1G02310', 'AT1G02335',
       ...
       'AthLNC024295', 'AthLNC024305', 'AthLNC024543', 'AthLNC024612',
       'AthLNC024666', 'AthLNC024729', 'AthLNC024776', 'AthLNC024832',
       'AthLNC024979', 'ATMG00180'],
      dtype='object', length=2000)

In [18]:
l1.var_names

Index(['ATCG00010', 'ATCG00030', 'ATCG00060', 'ATCG00090', 'ATCG00100',
       'ATCG00110', 'ATCG00200', 'ATCG00230', 'ATCG00240', 'ATCG00250',
       ...
       'AT5G67540', 'AT5G67560', 'AT5G67570', 'AT5G67580', 'AT5G67590',
       'AT5G67600', 'AT5G67610', 'AT5G67620', 'AT5G67630',
       'TRV-PDS3-transcript'],
      dtype='object', length=16396)

In [20]:
sc.pp.highly_variable_genes(ref, n_top_genes=6000, batch_key="Orig.ident", subset=True)

shared_genes_l1 = l1.var_names.intersection(ref.var_names)
shared_genes_l2 = l2.var_names.intersection(ref.var_names)


l1 = l1[:, shared_genes_l1].copy()
l2 = l2[:, shared_genes_l2].copy()

In [None]:
scvi.model.SCVI.setup_anndata(ref, batch_key="Orig.ident", layer="counts")