In [None]:
from sctoolbox.utilities import bgcolor

# Cell type annotation
<hr style="border:2px solid black"> </hr>

<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
%bgcolor PowderBlue

#Final clustering column to use for cell type assignment. If None, you can select one interactively.
clustering_col = "leiden"

#Name of column to add with the final cell type annotation. If None, all annotation columns will be kept.
celltype_column_name = None

#List of marker list paths. If None, you can assemble marker lists using MarkerRepo.
marker_lists = None

if not marker_lists:
    #Assemble marker lists from MarkerRepo using specified search terms (values) for targeted columns (keys)
    #If none, marker lists can be interactively assembled
    column_specific_terms = {"Organism name": "human", "Source": "panglao.se"}

    #File name of assembled marker list
    file_name = "panglao"

    #The style of the marker list: "two_column" or "score", which adds an extra column with a marker weight
    style = "two_column"

    #Path of MarkerRepo
    repo_path = "/mnt/workspace/mkessle/projects/annotate_by_marker_and_features"

<hr style="border:2px solid black"> </hr>

## Loading packages

In [None]:
import sctoolbox.utilities as utils
import pandas as pd
pd.set_option('display.max_columns', None)  #no limit to the number of columns shown
from sctoolbox import settings

%load_ext autoreload
%autoreload 2

In [None]:
# sctoolbox settings
settings.adata_input_dir = "../adatas/"
settings.adata_output_dir = "./"
clustered_adata = "hs.h5ad"

In [None]:
try:
    import markerrepo.wrappers as wrap
    import markerrepo.marker_repo as mr
except ModuleNotFoundError:
    raise ModuleNotFoundError("Please install the latest MarkerRepo version.")

--------------

## Loading adata

In [None]:
adata = utils.load_h5ad(clustered_adata)
display(adata)

--------------

## Assemble marker lists

The paths of the marker lists will be stored in the <b>marker_lists</b> variable. They will work as input for the actual cell type annotation of the next cell. If the index of adata.var contains ensembl IDs, set <b>ensembl=True</b>, otherwise gene symbols are used.

In [None]:
if not marker_lists:
    marker_lists = wrap.create_marker_lists(organism=None, repo_path=repo_path, style=style, file_name=file_name, 
                                            ensembl=mr.check_ensembl(adata), show_lists=False,
                                            column_specific_terms=column_specific_terms)

--------------

## Annotate adata

In [None]:
annotation_column = wrap.run_annotation(adata, marker_repo=True, SCSA=True, marker_lists=marker_lists, 
                                        mr_obs="mr", scsa_obs="scsa", rank_genes_column=None, 
                                        clustering_column=clustering_col, reference_obs=None, 
                                        show_comparison=True, ignore_overwrite=True, show_plots=True,
                                        celltype_column_name=celltype_column_name)

--------------

## Show annotated .obs table

In [None]:
display(adata.obs)

--------------

## Save adata

In [None]:
# utils.save_h5ad(adata, "anndata_xx.h5ad")