In [None]:
from sctoolbox.utilities import bgcolor

# Marker genes and cell type assignment
<hr style="border:2px solid black"> </hr>

<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
%bgcolor PowderBlue

#Final clustering column to use for cell type assignment
clustering_col = "clustering"

#SCSA options
species = "human"

#Known marker genes
marker_genes_dict = {"Atrial Cardiomyocyte": ["KCNJ3"],
                     "Ventricular Cardiomyocyte": ["MYH7"],
                     "Endothelium": ["APLN"],
                     "Fibroblast": ["DCN"],
                     "Macrophages": ["CD163"]
                    }

#Name of column to add with the marker gene predicted celltype
celltype_column_name = "marker_pred_celltype"

<hr style="border:2px solid black"> </hr>

## Loading packages

In [None]:
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)  #no limit to the number of columns shown

import sctoolbox.utilities as utils
import sctoolbox.celltype_annotation as celltype_anno
import sctoolbox.plotting as pl
import sctoolbox.marker_genes as mg

## Loading adata

In [None]:
adata = utils.load_anndata(is_from_previous_note=True, which_notebook=4)
adata.uns['log1p']["base"] = None #hack for scanpy error
display(adata)

In [None]:
figure_path = adata.uns["infoprocess"]["Anndata_path"] + "/"

--------------

## Automatic cell type prediction with SCSA

In [None]:
sc.tl.rank_genes_groups(adata, clustering_col)

In [None]:
celltype_anno.run_scsa(adata, species=species)

In [None]:
pl.umap_pub(adata, color="SCSA_pred_celltype", title="Automatic celltype assignment", 
            save=figure_path + "SCSA_assignment.pdf")

-------

## Automatic markers per cluster using rank_genes_groups

In [None]:
mg.run_rank_genes(adata, clustering_col)

In [None]:
g = sc.pl.rank_genes_groups_dotplot(adata, key="rank_genes_groups_filtered", n_genes=15, show=False)
g["mainplot_ax"].set_xticklabels(g["mainplot_ax"].get_xticklabels(), ha="right", rotation=45)

utils.save_figure(figure_path + "celltype_marker_genes_dotplot.pdf")

-----

## Plot expression of known markers

In [None]:
marker_gene_names = []
marker_gene_titles = []
for key, genes in marker_genes_dict.items():
    if isinstance(genes, str):
        genes = [genes]
        
    for gene in genes:
        marker_gene_names.append(gene)
        marker_gene_titles.append(f"{gene} ({key})")

In [None]:
sc.pl.umap(adata, color=marker_gene_names, title=marker_gene_titles, cmap=pl.sc_colormap(), ncols=3, show=False)
utils.save_figure(figure_path + "marker_gene_expression_umap.png")

## Assign celltypes

In [None]:
sc.tl.dendrogram(adata, clustering_col)
pl.marker_gene_clustering(adata, clustering_col, marker_genes_dict, 
                          save=figure_path + "marker_gene_expression_dotplot.png")

In [None]:
cluster2celltype = mg.get_celltype_assignment(adata, clustering_col, marker_genes_dict, column_name=celltype_column_name)
cluster2celltype

In [None]:
# Adjust cluster2celltype if needed

#cluster2celltype["1"] = "replacement"

In [None]:
adata.obs["celltype"] = adata.obs[clustering_col].map(cluster2celltype)

In [None]:
# Final plot of celltypes
ax = pl.umap_pub(adata, color=celltype_column_name, legend_loc="on data",
                 save=figure_path + "celltype_assignment_umap.png")

-------------

 ## Run DEseq2 between conditions/clusters

In [None]:
# Normalize raw counts across cells
d = sc.pp.normalize_total(adata, layer="raw", inplace=False) # returns a dict
adata.layers["raw_norm"] = d["X"]
adata.layers["raw_norm"] = adata.layers["raw_norm"].ceil().astype(int)

In [None]:
# Adjust which columns to use for DEseq2
sample_col = "sample"
condition_col = "chamber"

In [None]:
# Run DEseq2
deseq_table = mg.run_deseq2(adata, sample_col, condition_col, layer="raw_norm")

In [None]:
deseq_table.head(10)

--------------

## Save adata

In [None]:
del adata.uns["rank_genes_groups_filtered"] #error when saving filtered names

In [None]:
adata

In [None]:
utils.saving_anndata(adata, current_notebook=5)