In [None]:
import sys
import pandas as pd
import numpy as np
import anndata
import scvi
import scanpy as sc
import matplotlib

seed = 10

scvi.settings.seed = 10

sc.logging.print_versions()

path_to_save = ".../Atlas/human_extension/scVI/A_Ext_II_No1"

In [None]:
adata = sc.read(".../Atlas/Atlas_human_extension_II_3000HVG_integrated.h5ad")
adata

In [None]:
sc.pl.umap(adata, color=['proj'])
sc.pl.umap(adata, color=['annotation_final_level1B'], legend_loc = "on data")
sc.pl.umap(adata, color=['annotation_final_level1'], legend_loc = "on data")

In [None]:
celltypes = adata.obs.annotation_final_level1B.unique().tolist()

for ct in celltypes:

    sc.pl.umap(adata, color=['annotation_final_level1B'], groups = ct)

In [None]:
celltypes = adata.obs.proj.unique().tolist()

for ct in celltypes:

    sc.pl.umap(adata, color=['proj'], groups = ct)

In [None]:
sc.tl.leiden(adata, key_added="leiden_scVI_3_0", resolution=3.0, random_state=seed)

In [None]:
sc.pl.umap(adata, color=['leiden_scVI_3_0'], legend_loc='on data')

In [None]:
sc.set_figure_params(figsize=(15,5))

sc.pl.violin(adata, keys='nCount_RNA', groupby='leiden_scVI_3_0', rotation=90)
sc.pl.violin(adata, keys='nFeature_RNA', groupby='leiden_scVI_3_0', rotation=90)
sc.pl.violin(adata, keys='percent_mt', groupby='leiden_scVI_3_0', rotation=90)

sc.set_figure_params(figsize=(5,5))

In [None]:
sc.pl.umap(adata, color="nCount_RNA", vmax = 10000)
sc.pl.umap(adata, color="nFeature_RNA", vmax = 5000)
sc.pl.umap(adata, color="percent_mt", vmax = 10)

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden_scVI_3_0', method='wilcoxon', key_added = "wilcoxon3")
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, key = "wilcoxon3")

In [None]:
adata.write(".../Atlas/Atlas_human_extension_II_3000HVG_integrated.h5ad")

In [None]:
model = scvi.model.SCVI.load(path_to_save, adata=adata, use_gpu=False)


In [None]:
#set clustering of interest
adata.obs['leiden'] = adata.obs['leiden_scVI_3_0']
#get marker genes
de_df = model.differential_expression(
    groupby="leiden")
de_df.head()
markers = {}
cats = adata.obs.leiden.cat.categories
for i, c in enumerate(cats):
    cid = "{} vs Rest".format(c)
    leiden_scVI_df = de_df.loc[de_df.comparison == cid]
    leiden_scVI_df = leiden_scVI_df[leiden_scVI_df.lfc_mean > 0]
    leiden_scVI_df = leiden_scVI_df[leiden_scVI_df["bayes_factor"] > 3]
    leiden_scVI_df = leiden_scVI_df[leiden_scVI_df["non_zeros_proportion1"] > 0.1]
    leiden_scVI_df = leiden_scVI_df[leiden_scVI_df["lfc_mean"] > 0.75]
    markers[c] = leiden_scVI_df.index.tolist()[:5]
    sc.tl.dendrogram(adata, groupby="leiden", use_rep="X_scVI")
    sc.pl.dotplot(
    adata,
    markers,
    groupby='leiden',
    dendrogram=True,
    color_map="Blues",
    swap_axes=True,
    use_raw=True,
    standard_scale="var",
)