In [None]:
from sctoolbox.utilities import bgcolor

# Marker genes and cell type assignment
<hr style="border:2px solid black"> </hr>

<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
%bgcolor PowderBlue

#Final clustering column to use for cell type assignment
clustering_col = "clustering"

#SCSA options
species = "human"

#Known marker genes
marker_genes_dict = {"Atrial Cardiomyocyte": ["KCNJ3"],
                     "Ventricular Cardiomyocyte": ["MYH7"],
                     "Endothelium": ["APLN"],
                     "Fibroblast": ["DCN"],
                     "Macrophages": ["CD163"]
                    }

#Name of column to add with the marker gene predicted celltype
celltype_column_name = "marker_pred_celltype"

<hr style="border:2px solid black"> </hr>

## Loading packages

In [None]:
import scanpy as sc
import pandas as pd
pd.set_option('display.max_columns', None)  #no limit to the number of columns shown
import sctoolbox.utilities as utils
import sctoolbox.tools as tools
import sctoolbox.plotting as pl
utils.settings_from_config("config.yaml", key="05")

## Loading adata

In [None]:
adata = utils.load_h5ad("anndata_4.h5ad")
display(adata)

## Check for custom marker genes

In [None]:
marker_genes_dict = utils.check_marker_lists(adata, marker_genes_dict)

--------------

## Automatic markers per cluster using rank_genes_groups

In [None]:
# Identify markers per cluster (adjust group fraction and fold change to filter genes)
tools.run_rank_genes(adata, clustering_col,
                     min_in_group_fraction=0.25,
                     min_fold_change=0.5,
                     max_out_group_fraction=0.8)

In [None]:
# Plot dotplot of markers
_ = pl.rank_genes_plot(adata, key="rank_genes_groups_filtered", n_genes=15, 
                       save=f"marker_genes_dotplot_{clustering_col}.pdf")

In [None]:
# Write marker genes to table
marker_tables = tools.get_rank_genes_tables(adata, out_group_fractions=True,
                                            key="rank_genes_groups_filtered",
                                            save_excel=f"rank_genes_{clustering_col}.xlsx")

In [None]:
# Markers for cluster "1" (change key to access markers for other clusters)
marker_tables["1"].head()

---------

## Automatic cell type prediction with SCSA

In [None]:
tools.run_scsa(adata, species=species)

In [None]:
pl.umap_pub(adata, color="SCSA_pred_celltype", title="Automatic celltype assignment", 
            save="SCSA_assignment.pdf")

-------

## Plot expression of known markers

In [None]:
marker_gene_names = []
marker_gene_titles = []
for key, genes in marker_genes_dict.items():
    if isinstance(genes, str):
        genes = [genes]
        
    for gene in genes:
        marker_gene_names.append(gene)
        marker_gene_titles.append(f"{gene} ({key})")

In [None]:
sc.pl.umap(adata, color=marker_gene_names, title=marker_gene_titles, cmap=pl.sc_colormap(), ncols=3, show=False)
pl._save_figure("marker_gene_expression_umap.png")

### Assign celltypes based on markers

In [None]:
sc.tl.dendrogram(adata, clustering_col)
_ = pl.marker_gene_clustering(adata, clustering_col, marker_genes_dict, 
                              save="marker_gene_expression_dotplot.png")

In [None]:
cluster2celltype = tools.get_celltype_assignment(adata, clustering_col, marker_genes_dict, column_name=celltype_column_name)
cluster2celltype

In [None]:
# Adjust cluster2celltype if needed

#cluster2celltype["1"] = "replacement"

In [None]:
adata.obs["celltype"] = adata.obs[clustering_col].map(cluster2celltype)

In [None]:
# Final plot of celltypes
ax = pl.umap_pub(adata, color=celltype_column_name, legend_loc="on data",
                 save="celltype_assignment_umap_ondata.png")

ax = pl.umap_pub(adata, color=celltype_column_name,
                 save="celltype_assignment_umap_legend.png")

-------------

 ## Run DEseq2 between conditions/clusters

In [None]:
# Normalize raw counts across cells
d = sc.pp.normalize_total(adata, layer="raw", inplace=False) # returns a dict
adata.layers["raw_norm"] = d["X"]
adata.layers["raw_norm"] = adata.layers["raw_norm"].ceil().astype(int)

In [None]:
%bgcolor PowderBlue

# Adjust which columns to use for DEseq2
sample_col = "sample"
condition_col = "chamber"

In [None]:
# Run DEseq2
deseq_table = tools.run_deseq2(adata, sample_col, condition_col, layer="raw_norm")

In [None]:
deseq_table.head(10)

--------------

## Save adata

In [None]:
#fix error when saving filtered rank gene names
import re
for key in list(adata.uns.keys()):
    if re.match("rank_genes_.*_filtered", key):
        del adata.uns[key]

In [None]:
adata

In [None]:
utils.save_h5ad(adata, "anndata_5.h5ad")