In [None]:
from sctoolbox.utilities import bgcolor

# Cell type annotation
<hr style="border:2px solid black"> </hr>

<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
%bgcolor PowderBlue

#Final clustering column to use for cell type assignment
clustering_col = "leiden"

#Name of column to add with the final cell type annotation
celltype_column_name = "pred_celltype"

#List of marker list paths
marker_lists = None

Fill out the following cell only if no marker list has been specified and you wish to assemble marker lists using the MarkerRepo.

In [None]:
%bgcolor Yellow

#Assemble marker lists from MarkerRepo using specified search terms (values) for targeted columns (keys)
#If none, marker lists can be interactively assembled
column_specific_terms = {"Organism name": "human", "Source": "panglao.se"}

#File name of assembled marker list
file_name = "marker_list"

#Specify whether your index of the .var tables are ensembl IDs (True) or gene symbols (False)
ensembl = False

#The style of the marker list: "two_column" or "score", which adds an extra column with a marker weight
style = "two_column"

#Path of MarkerRepo
repo_path = "/mnt/workspace/mkessle/projects/annotate_by_marker_and_features"

<hr style="border:2px solid black"> </hr>

## Loading packages

In [None]:
import scanpy as sc
import pandas as pd
pd.set_option('display.max_columns', None)  #no limit to the number of columns shown
import sctoolbox.utilities as utils
from sctoolbox.tools import celltype_annotation
import sctoolbox.plotting as pl
# utils.settings_from_config("config.yaml", key="05")

%load_ext autoreload
%autoreload 2

In [None]:
try:
    import markerrepo.wrappers as wrap
except ModuleNotFoundError:
    raise ModuleNotFoundError("Please install the latest MarkerRepo version.")

--------------

## Loading adata

In [None]:
adata = utils.load_h5ad("/mnt/workspace/mkessle/master/refdata/hs.h5ad")
display(adata)

--------------

## Create suitable marker lists

The paths of the marker lists will be stored in the <b>marker_lists</b> variable. They will work as input for the actual cell type annotation of the next cell. If the index of adata.var contains ensembl IDs, set <b>ensembl=True</b>, otherwise gene symbols are used.

In [None]:
if not marker_lists:
    marker_lists = wrap.create_marker_lists(organism=None, repo_path=repo_path, style=style, file_name=file_name, 
                                            ensembl=ensembl, column_specific_terms=column_specific_terms)

--------------

## Annotate adata

In [None]:
annotation_column = wrap.run_annotation(adata, marker_repo=True, SCSA=True, marker_lists=marker_lists, 
                                        mr_obs="mr", scsa_obs="scsa", rank_genes_column=None, 
                                        clustering_column=clustering_col, reference_obs=None, 
                                        show_comparison=True, ignore_overwrite=True, show_plots=True)

--------------

## Add annotation to adata .obs table

In [None]:
adata.obs.rename(columns={annotation_column: celltype_column_name}, inplace=True)
display(adata.obs)

--------------

## Save adata

In [None]:
# utils.save_h5ad(adata, "anndata_xx.h5ad")