In [None]:
import sys
import pandas as pd
import numpy as np
import anndata
import scvi
import scanpy as sc
import matplotlib
import os

seed = 10

scvi.settings.seed = 10

sc.logging.print_versions()



In [None]:
celltypes = ["Podo"]

#celltypes = adata_raw.obs.annotation_level0.unique().tolist()

for celltype in celltypes: 
    
    path_to_save = f'.../Atlas/human_extension/Cleaning_II/Sub_cleaning/{celltype}/scVI'

    path_to_save_anndata = f'.../Atlas/human_extension/Cleaning_II/Sub_cleaning/{celltype}/{celltype}_subcluster.h5ad'

    adata = sc.read(path_to_save_anndata)
    model = scvi.model.SCVI.load(path_to_save, adata=adata, use_gpu=False)

    sc.pl.umap(adata, color=['proj'])

    sc.pl.umap(adata, color=['leiden_scVI_3_0'], legend_loc='on data')

    sc.pl.umap(adata, color="nCount_RNA", vmax = 10000)
    sc.pl.umap(adata, color="nFeature_RNA", vmax = 5000)
    sc.pl.umap(adata, color="percent_mt", vmax = 10)
    
    sc.set_figure_params(figsize=(15,5))

    sc.pl.violin(adata, keys='nCount_RNA', groupby='leiden_scVI_3_0', rotation=90)
    sc.pl.violin(adata, keys='nFeature_RNA', groupby='leiden_scVI_3_0', rotation=90)
    sc.pl.violin(adata, keys='percent_mt', groupby='leiden_scVI_3_0', rotation=90)

    sc.set_figure_params(figsize=(5,5))

    sc.tl.rank_genes_groups(adata, 'leiden_scVI_3_0', method='wilcoxon', key_added = "wilcoxon3")
    sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, key = "wilcoxon3")
    
    adata.write(path_to_save_anndata)
    
    #set clustering of interest
    adata.obs['leiden'] = adata.obs['leiden_scVI_3_0']
    #get marker genes
    de_df = model.differential_expression(
        groupby="leiden")
    de_df.head()
    markers = {}
    cats = adata.obs.leiden.cat.categories
    for i, c in enumerate(cats):
        cid = "{} vs Rest".format(c)
        leiden_scVI_df = de_df.loc[de_df.comparison == cid]
        leiden_scVI_df = leiden_scVI_df[leiden_scVI_df.lfc_mean > 0]
        leiden_scVI_df = leiden_scVI_df[leiden_scVI_df["bayes_factor"] > 3]
        leiden_scVI_df = leiden_scVI_df[leiden_scVI_df["non_zeros_proportion1"] > 0.1]
        leiden_scVI_df = leiden_scVI_df[leiden_scVI_df["lfc_mean"] > 0.75]
        markers[c] = leiden_scVI_df.index.tolist()[:5]
        sc.tl.dendrogram(adata, groupby="leiden", use_rep="X_scVI")
        sc.pl.dotplot(
        adata,
        markers,
        groupby='leiden',
        dendrogram=True,
        color_map="Blues",
        swap_axes=True,
        use_raw=True,
        standard_scale="var",
    )