# Importing modules and settings

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc

In [None]:
from matplotlib.pyplot import rc_context

General settings of Scanpy

In [None]:
sc.settings.verbosity = 3 
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')


In [None]:
import seaborn as sns

In [None]:
umap_cmap = sns.light_palette('xkcd:medium blue', as_cmap = True)

# Declaring the input and output files

In [None]:
name_of_analysis = 'pristina_atlas'

In [None]:
sc.settings.figdir = './figures/'+name_of_analysis+'_analysing_clusters_numbers'

In [None]:
results_file = './'+name_of_analysis+'.h5ad'

In [None]:
adata = sc.read_h5ad('./'+name_of_analysis+'.h5ad')

In [None]:
leiden_names = adata.obs.columns[adata.obs.columns.str.contains('leiden')].to_list()

In [None]:
leiden_names

In [None]:
adata.var

In [None]:
adata.obs

In [None]:
adata.obs.columns

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color='n_counts', legend_loc='on data', legend_fontoutline = 5, title= 'n counts', size = 30,
        frameon=False, add_outline = True, save = '_n_counts')

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color='n_genes', legend_loc='on data', legend_fontoutline = 5, title= 'n genes', size = 30,
        frameon=False, add_outline = True, save = '_n_genes')

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color='n_genes_by_counts', legend_loc='on data', legend_fontoutline = 5, title= 'n_genes_by_counts', size = 30,
        frameon=False, add_outline = True, save = '_n_genes_by_counts')

In [None]:
adata.obs['Experiment'].cat.categories

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color= 'Experiment', groups = 'lib_12', legend_loc='on data', legend_fontoutline = 5, title= 'lib 12', size = 30,
        frameon=False, add_outline = True, save = '_experiment_lib_12')

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color= 'Experiment', groups = 'lib_21', legend_loc='on data', legend_fontoutline = 5, title= 'lib_21', size = 30,
        frameon=False, add_outline = True, save = '_experiment_lib_21')

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color= 'Experiment', groups = 'lib_30', legend_loc='on data', legend_fontoutline = 5, title= 'lib_30', size = 30,
        frameon=False, add_outline = True, save = '_experiment_lib_30')

# Plots res 0.5

In [None]:
clusteringlayer = 'leiden_0.5'

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color=clusteringlayer, legend_loc='on data', legend_fontoutline = 5, title= 'Clustering layer '+str(clusteringlayer), size = 30,
        frameon=False, add_outline = True)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "n_genes" , groupby = clusteringlayer, jitter = False, save = "_n_genes_"+clusteringlayer)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "n_counts" , groupby = clusteringlayer, jitter = False, save = "_n_counts_"+clusteringlayer)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "total_counts" , groupby = clusteringlayer, jitter = False, save = "_total_counts_"+clusteringlayer)

In [None]:
adata.obs.columns

In [None]:
sc.pl.violin(adata, keys = ['n_genes', 'total_counts', 'n_counts'],
            groupby = 'Experiment', log = True, jitter = False, multi_panel = True, rotation = 90,
            save = '_experiment_'+clusteringlayer)

In [None]:
sc.pl.violin(adata, keys = ['n_genes', 'total_counts', 'n_counts'],
            groupby = 'Library', log = True, jitter = False, multi_panel = True, rotation = 90,
            save = '_library_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer, cmap = umap_cmap,
                               values_to_plot = 'scores', save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer,
                               save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_matrixplot(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer,
                               values_to_plot = 'scores', save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, n_genes=5, key = 'rank_genes_groups_wilcox_'+clusteringlayer, figsize = (15,15))

In [None]:
samples_name = 'Experiment'
cell_numbers = adata.obs[[samples_name, clusteringlayer]].groupby(samples_name)
cellcounts = {}
for i in adata.obs[samples_name].cat.categories:
    cellcounts[i] = cell_numbers.get_group(i)[clusteringlayer].value_counts().rename(i).sort_index()
counts_df = pd.DataFrame.from_dict(cellcounts)
counts_df.to_excel('./figures/'+name_of_analysis+'_analysing_clusters_numbers/'+clusteringlayer+'_'+samples_name+'.xlsx')

# Plots res 1

In [None]:
clusteringlayer = 'leiden_1'

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color=clusteringlayer, legend_loc='on data', legend_fontoutline = 5, title= 'Clustering layer '+str(clusteringlayer), size = 30,
        frameon=False, add_outline = True)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "n_genes" , groupby = clusteringlayer, jitter = False, save = "_n_genes_"+clusteringlayer)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "n_counts" , groupby = clusteringlayer, jitter = False, save = "_n_counts_"+clusteringlayer)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "total_counts" , groupby = clusteringlayer, jitter = False, save = "_total_counts_"+clusteringlayer)

In [None]:
adata.obs.columns

In [None]:
sc.pl.violin(adata, keys = ['n_genes', 'total_counts', 'n_counts'],
            groupby = 'Experiment', log = True, jitter = False, multi_panel = True, rotation = 90,
            save = '_experiment_'+clusteringlayer)

In [None]:
sc.pl.violin(adata, keys = ['n_genes', 'total_counts', 'n_counts'],
            groupby = 'Library', log = True, jitter = False, multi_panel = True, rotation = 90,
            save = '_library_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer, cmap = umap_cmap,
                               values_to_plot = 'scores', save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer,
                               save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_matrixplot(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer,
                               values_to_plot = 'scores', save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, n_genes=5, key = 'rank_genes_groups_wilcox_'+clusteringlayer, figsize = (15,15))

In [None]:
samples_name = 'Experiment'
cell_numbers = adata.obs[[samples_name, clusteringlayer]].groupby(samples_name)
cellcounts = {}
for i in adata.obs[samples_name].cat.categories:
    cellcounts[i] = cell_numbers.get_group(i)[clusteringlayer].value_counts().rename(i).sort_index()
counts_df = pd.DataFrame.from_dict(cellcounts)
counts_df.to_excel('./figures/'+name_of_analysis+'_analysing_clusters_numbers/'+clusteringlayer+'_'+samples_name+'.xlsx')

# Plots res 1.5

In [None]:
clusteringlayer = 'leiden_1.5'

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color=clusteringlayer, legend_loc='on data', legend_fontoutline = 5, title= 'Clustering layer '+str(clusteringlayer), size = 30,
        frameon=False, add_outline = True)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "n_genes" , groupby = clusteringlayer, jitter = False, save = "_n_genes_"+clusteringlayer)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "n_counts" , groupby = clusteringlayer, jitter = False, save = "_n_counts_"+clusteringlayer)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "total_counts" , groupby = clusteringlayer, jitter = False, save = "_total_counts_"+clusteringlayer)

In [None]:
adata.obs.columns

In [None]:
sc.pl.violin(adata, keys = ['n_genes', 'total_counts', 'n_counts'],
            groupby = 'Experiment', log = True, jitter = False, multi_panel = True, rotation = 90,
            save = '_experiment_'+clusteringlayer)

In [None]:
sc.pl.violin(adata, keys = ['n_genes', 'total_counts', 'n_counts'],
            groupby = 'Library', log = True, jitter = False, multi_panel = True, rotation = 90,
            save = '_library_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer, cmap = umap_cmap,
                               values_to_plot = 'scores', save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer,
                               save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_matrixplot(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer,
                               values_to_plot = 'scores', save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, n_genes=5, key = 'rank_genes_groups_wilcox_'+clusteringlayer, figsize = (15,15))

In [None]:
samples_name = 'Experiment'
cell_numbers = adata.obs[[samples_name, clusteringlayer]].groupby(samples_name)
cellcounts = {}
for i in adata.obs[samples_name].cat.categories:
    cellcounts[i] = cell_numbers.get_group(i)[clusteringlayer].value_counts().rename(i).sort_index()
counts_df = pd.DataFrame.from_dict(cellcounts)
counts_df.to_excel('./figures/'+name_of_analysis+'_analysing_clusters_numbers/'+clusteringlayer+'_'+samples_name+'.xlsx')

# Plots res 2

In [None]:
clusteringlayer = 'leiden_2'

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color=clusteringlayer, legend_loc='on data', legend_fontoutline = 5, title= 'Clustering layer '+str(clusteringlayer), size = 30,
        frameon=False, add_outline = True)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "n_genes" , groupby = clusteringlayer, jitter = False, save = "_n_genes_"+clusteringlayer)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "n_counts" , groupby = clusteringlayer, jitter = False, save = "_n_counts_"+clusteringlayer)

In [None]:
with rc_context({'figure.figsize': (15, 5)}):
    sc.pl.violin(adata, keys = "total_counts" , groupby = clusteringlayer, jitter = False, save = "_total_counts_"+clusteringlayer)

In [None]:
adata.obs.columns

In [None]:
sc.pl.violin(adata, keys = ['n_genes', 'total_counts', 'n_counts'],
            groupby = 'Experiment', log = True, jitter = False, multi_panel = True, rotation = 90,
            save = '_experiment_'+clusteringlayer)

In [None]:
sc.pl.violin(adata, keys = ['n_genes', 'total_counts', 'n_counts'],
            groupby = 'Library', log = True, jitter = False, multi_panel = True, rotation = 90,
            save = '_library_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer, cmap = umap_cmap,
                               values_to_plot = 'scores', save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer,
                               save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_matrixplot(adata, n_genes=3, key = 'rank_genes_groups_wilcox_'+clusteringlayer,
                               values_to_plot = 'scores', save = '_wilcox_'+clusteringlayer)

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, n_genes=5, key = 'rank_genes_groups_wilcox_'+clusteringlayer, figsize = (15,15))

In [None]:
samples_name = 'Experiment'
cell_numbers = adata.obs[[samples_name, clusteringlayer]].groupby(samples_name)
cellcounts = {}
for i in adata.obs[samples_name].cat.categories:
    cellcounts[i] = cell_numbers.get_group(i)[clusteringlayer].value_counts().rename(i).sort_index()
counts_df = pd.DataFrame.from_dict(cellcounts)
counts_df.to_excel('./figures/'+name_of_analysis+'_analysing_clusters_numbers/'+clusteringlayer+'_'+samples_name+'.xlsx')

In [None]:
adata.write(results_file)