# Importing modules and settings

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc

In [None]:
from matplotlib.pyplot import rc_context

General settings of Scanpy

In [None]:
pd.options.display.float_format = "{:,.2f}".format

In [None]:
pd.set_option('display.max_colwidth', 500)

In [None]:
sc.settings.verbosity = 4
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')


# Declaring the input and output files

In [None]:
name_of_analysis = 'pristina_counts'

In [None]:
adata_processed = sc.read_h5ad('pristina_atlas_coloured_subcl.h5ad')

In [None]:
adata = sc.read_10x_mtx(
    "./pristina/",
    var_names='gene_symbols',
    cache=True)

In [None]:
adata_processed

In [None]:
adata.var_names_make_unique()

In [None]:
adata.var_names

In [None]:
adata.var

In [None]:
adata

In [None]:
adata.obs

In [None]:
clusteringlayer = 'leiden_1.5'

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata_processed, color=clusteringlayer, legend_loc='on data', legend_fontoutline = 5, title= 'Clustering layer '+str(clusteringlayer), size = 30,
        frameon=False, add_outline = True)

# Function to extract counts

In [None]:
def get_counts_per_cluster(adata_unprocessed, adata_processed, clusteringlayer):
    
    
#This function takes the unprocessed version and the processed version of a single cell experiment, with a given clustering layer:
#The parameters are:
#adata_unprocessed: the anndata object just after reading with read 10x
#adata_processed: the anndata object after processing
#clusteringlayer: a key in adata_processed.obs that gives the clusters



    adata_processed_raw = adata_processed.raw.to_adata() #recovering the .raw object
    filt_cells = adata_unprocessed.obs.index.isin(adata_processed.obs.index) #filtering for cells present in processed
    filt_genes = adata_unprocessed.var.index.isin(adata_processed_raw.var.index) #filtering for genes present in processed
    adata_filtered = adata_unprocessed[filt_cells,filt_genes].copy() #slicing the unprocessed object
    adata_filtered.obs[clusteringlayer] = adata_processed.obs[clusteringlayer] # copying the cluster information
    
    # create empty dataframe of the right dimensions where we will store the result
    counts = pd.DataFrame(index = adata_filtered.var.index, columns = adata_filtered.obs[clusteringlayer].cat.categories)
    
    for clust in adata_filtered.obs[clusteringlayer].cat.categories: # iterating over each cluster
        filt_clust = adata_filtered.obs[clusteringlayer] == clust # obtaining a filtering expression for cluster "clust"
        adata_filt_clust = adata_filtered[filt_clust] #filtering to obtain (n cells in cluster clust) x (genes) anndata view
        mat = adata_filt_clust.X # obtaining the matrix of this
        mat_sum = mat.sum(axis = 0) # obtaining the sum of the matrix (for each gene)
        mat_sum_ser = pd.Series(np.array(mat_sum).flatten(), dtype = 'int', index = adata_filtered.var.index) # transforming into pandas series
        counts[clust] = mat_sum_ser # storing this series into the corresponding column of the result dataframe
    return counts
    


In [None]:
counts = get_counts_per_cluster(adata, adata_processed, clusteringlayer)

In [None]:
counts

In [None]:
counts.to_csv('./figures/pristina_atlas_counts/Pristina_atlas_counts.tsv', sep="\t")

In [None]:
adata_processed.obs['broad_names']

In [None]:
counts_broad = get_counts_per_cluster(adata, adata_processed, 'broad_names')

In [None]:
counts_broad

In [None]:
counts_broad.to_csv('./figures/pristina_atlas_counts/Pristina_atlas_counts_broad.tsv', sep="\t")

In [None]:
adata_processed.obs['Experiment'].cat.categories

In [None]:
adata_processed[adata_processed.obs['Experiment'] == 'lib_12']

In [None]:
for i in adata_processed.obs['Experiment'].cat.categories:
    adata_processed_e = adata_processed[adata_processed.obs['Experiment'] == i]
    counts_exp = get_counts_per_cluster(adata, adata_processed_e, clusteringlayer)
    counts_exp.to_csv('./figures/pristina_atlas_counts/Pristina_atlas_counts_exp_'+i+'.tsv', sep="\t")
    counts_exp_broad = get_counts_per_cluster(adata, adata_processed_e, 'broad_names')
    counts_exp_broad.to_csv('./figures/pristina_atlas_counts/Pristina_atlas_counts_broad_exp_'+i+'.tsv', sep="\t")    