In [None]:
import numpy as np
import pandas as pd
import scanpy as sc

In [None]:
import seaborn as sns

In [None]:
import matplotlib.pyplot as plt

In [None]:
import os

General settings of Scanpy

In [None]:
sc.settings.verbosity = 4
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

In [None]:
stepcount = 3

In [None]:
umap_cmap = sns.light_palette('xkcd:medium blue', as_cmap = True)

# Declaring the input and output files

In [None]:
name_of_analysis = 'pristina_atlas'

In [None]:
sc.settings.figdir = './figures/'+name_of_analysis+'_parameter_space'

In [None]:
adata = sc.read_h5ad('./'+name_of_analysis+'_prefiltering.h5ad')

In [None]:
adata

In [None]:
adata_unprocessed = adata.copy()

In [None]:
def process(minimum_genes_counts, slice_ngenes, slice_ncounts, n_top, k_neighb, princ_comps, res):
    
    adata = adata_unprocessed.copy()
    sc.pl.highest_expr_genes(adata, n_top=20)
    sc.pp.filter_cells(adata, min_counts= minimum_genes_counts) # minimum_genes_counts affect both
    sc.pp.filter_cells(adata, min_genes= minimum_genes_counts) # minimum_genes_counts affect both
    sc.pp.filter_genes(adata, max_counts = 1000000)
    sc.pl.highest_expr_genes(adata, n_top=20, show = False)
    adata.var['mt'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
    sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
    sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')
    adata = adata[adata.obs.n_genes_by_counts < slice_ngenes, :] #slice_ngenes
    adata = adata[adata.obs.total_counts < slice_ncounts, :] #slice_ncounts
    sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, n_top_genes = n_top) #n_top
    sc.pl.highly_variable_genes(adata)
    adata.raw = adata
    adata = adata[:, adata.var.highly_variable]
    sc.pp.scale(adata, zero_center=False)
    sc.tl.pca(adata, svd_solver='arpack', n_comps = 150)
    sc.pp.neighbors(adata, n_neighbors= k_neighb, n_pcs= princ_comps) #k_neighb, #princ_comps
    sc.tl.umap(adata, min_dist=0.5, spread = 1, alpha = 1, gamma = 1.0) 
    sc.tl.leiden(adata, resolution = res) #res
 
    fig, axs = plt.subplots(1, 9, figsize = (50, 5))
  
   
    gene0 = 'PrileiEVm023936t1'
    gene1 = 'PrileiEVm008309t1'
    gene2 = 'PrileiEVm011741t1'
    gene3 = 'PrileiEVm021316t1'
    gene4 = 'PrileiEVm022250t1'
    gene5 = 'PrileiEVm000325t1'
    gene6 = 'PrileiEVm013699t1'
    gene7 = 'PrileiEVm020595t1'
    
    sc.pl.umap(adata, color= gene0, title = gene0, color_map = umap_cmap, show = False, ax = axs[0])
    sc.pl.umap(adata, color= gene1, title = gene1, color_map = umap_cmap, show = False, ax = axs[1])
    sc.pl.umap(adata, color= gene2, title = gene2, color_map = umap_cmap, show = False, ax = axs[2])
    sc.pl.umap(adata, color= gene3, title = gene3, color_map = umap_cmap, show = False, ax = axs[3])
    sc.pl.umap(adata, color= gene4, title = gene4, color_map = umap_cmap, show = False, ax = axs[4])
    sc.pl.umap(adata, color= gene5, title = gene5, color_map = umap_cmap, show = False, ax = axs[5])
    sc.pl.umap(adata, color= gene6, title = gene6, color_map = umap_cmap, show = False, ax = axs[6])
    sc.pl.umap(adata, color= gene7, title = gene7, color_map = umap_cmap, show = False, ax = axs[7])

    n_of_cl = len(adata.obs['leiden'].cat.categories)
    sc.pl.umap(adata, color= 'leiden', legend_loc = 'on data', legend_fontsize = 7, title = str(n_of_cl)+' clusters', show = False, ax = axs[8])
    return fig

In [None]:
mynewfig = process(50, 700, 900, 18000, 45, 105, 1)

In [None]:
os.mkdir('./figures/pristina_atlas_parameter_space')

In [None]:
mynewfig

In [None]:
mynewfig.savefig('./figures/pristina_atlas_parameter_space/figure.pdf', format = 'pdf')

# Parameter space exploration

In [None]:
step = "minimum_genes_counts"
values_to_try = [30, 40, 50, 60, 70, 80, 90, 100]
stepcount += 1
os.mkdir('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step)
for value in values_to_try:
    newfig = process(value, 700, 900, 16000, 45, 95, 1)
    newfig.savefig('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step+'/'+str(stepcount)+'_'+step+'_'+str(value)+'.pdf', format = 'pdf')

In [None]:
step = "slice_ngenes"
values_to_try = [300, 400, 500, 600, 700, 800, 900, 1000]
stepcount += 1
os.mkdir('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step)
for value in values_to_try:
    newfig = process(50, value, 900, 16000, 45, 95, 1)
    newfig.savefig('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step+'/'+str(stepcount)+'_'+step+'_'+str(value)+'.pdf', format = 'pdf')

In [None]:
step = "slice_ncounts"
values_to_try = [500, 600, 700, 800, 900, 1000, 1100, 1200]
stepcount += 1
os.mkdir('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step)
for value in values_to_try:
    newfig = process(50, 700, value, 16000, 45, 95, 1)
    newfig.savefig('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step+'/'+str(stepcount)+'_'+step+'_'+str(value)+'.pdf', format = 'pdf')

In [None]:
step = "n_top"
values_to_try = [4000, 6000, 8000, 10000, 12000, 14000, 18000, 22000]
stepcount += 1
os.mkdir('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step)
for value in values_to_try:
    newfig = process(50, 700, 900, value, 45, 95, 1)
    newfig.savefig('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step+'/'+str(stepcount)+'_'+step+'_'+str(value)+'.pdf', format = 'pdf')

In [None]:
step = "k_neighb"
values_to_try = [15, 25, 35, 45, 55, 65, 75, 85]
stepcount += 1
os.mkdir('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step)
for value in values_to_try:
    newfig = process(50, 700, 900, 18000, value, 95, 1)
    newfig.savefig('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step+'/'+str(stepcount)+'_'+step+'_'+str(value)+'.pdf', format = 'pdf')

In [None]:
step = "princ_comps"
values_to_try = [15, 25, 45, 65, 85, 105, 125, 145]
stepcount += 1
os.mkdir('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step)
for value in values_to_try:
    newfig = process(50, 700, 900, 18000, 45, value, 1)
    newfig.savefig('./figures/pristina_atlas_parameter_space/'+str(stepcount)+'_'+step+'/'+str(stepcount)+'_'+step+'_'+str(value)+'.pdf', format = 'pdf')