# Importing modules and settings

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc

In [None]:
from matplotlib.pyplot import rc_context

In [None]:
import seaborn as sns

In [None]:
import scipy as sp

General settings of Scanpy

In [None]:
pd.options.display.float_format = "{:,.4f}".format

In [None]:
sc.settings.verbosity = 4
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')


In [None]:
umap_cmap = sns.light_palette('xkcd:medium blue', as_cmap = True)

# Declaring the input and output files

In [None]:
adata = sc.read_h5ad('pristina_atlas.h5ad')

In [None]:
adata

In [None]:
clusteringlayer = 'leiden_1.5'

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color=clusteringlayer, legend_loc='on data', legend_fontoutline = 5, title= 'Clustering layer '+str(clusteringlayer), size = 30,
        frameon=False, add_outline = True)

In [None]:
adata = adata.raw.to_adata()

In [None]:
adata

In [None]:
mean_exp = pd.DataFrame(columns=adata.var_names, index=adata.obs[clusteringlayer].cat.categories)                                                                                                 

for i in adata.obs[clusteringlayer].cat.categories: 
    mean_exp.loc[i] = adata[adata.obs[clusteringlayer].isin([i]),:].X.mean(0)

In [None]:
mean_exp

In [None]:
#nanos, #vasa, #piwi, #pcna, #mcm2

stemgenes = ['PrileiEVm016887t1', 'PrileiEVm004300t1', 'PrileiEVm003567t1', 'PrileiEVm016982t1', 'PrileiEVm003521t1']

In [None]:
sc.pl.dotplot(adata, stemgenes, groupby=clusteringlayer, dendrogram=True, swap_axes = True, cmap = umap_cmap)

In [None]:
mean_exp[stemgenes]

In [None]:
ranked_exp = mean_exp[stemgenes].mean(axis = 1).sort_values(ascending = False)

In [None]:
ranked_exp

In [None]:
top_exp = ranked_exp[ranked_exp > 0.0500].index.to_list()

In [None]:
top_exp

In [None]:
ranked_counts = adata.obs[clusteringlayer].value_counts()

In [None]:
ranked_counts

In [None]:
bot_counts = ranked_counts[ranked_counts < 175].index.to_list()

In [None]:
excluded = list(set(top_exp).intersection(set(bot_counts)))

In [None]:
excluded

In [None]:
adata = sc.read_h5ad('pristina_atlas.h5ad')

In [None]:
adata[adata.obs[clusteringlayer].isin(excluded)]

In [None]:
adata_excluded_removed = adata[~adata.obs[clusteringlayer].isin(excluded)].copy()

In [None]:
adata_excluded_removed

In [None]:
adata_excluded_removed.write('pristina_atlas_cut_PAGA.h5ad')