In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
import sctoolbox
from sctoolbox.utils.jupyter import bgcolor

# Embedding and clustering
<hr style="border:2px solid black"> </hr>

<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
%bgcolor PowderBlue

#Column to show in UMAPs
condition_column = "timepoint"

#Number of threads to use for multiprocessing
threads = 4

# Search embedding parameters (or set parameters later)
embedding = "umap"   #umap or tsne
search_parameters = True
dist_range = (0.1, 1, 0.2) # Set min_dist range for umap
spread_range = (1.0, 2.0, 0.5) # Set spread range for umap
n_components = 2 # Number of components for umap
perplexity_range = (30, 60, 10)        # perplexity range for tsne
learning_rate_range = (600, 1000, 200)   # learning_rate for tsne


# Search different clustering resolutions
search_clustering_parameters = True
clustering_method = "leiden" #leiden/louvain

<hr style="border:2px solid black"> </hr>

## Loading packages

In [None]:
import scanpy as sc
import matplotlib.pyplot as plt
import sctoolbox.utils as utils
import sctoolbox.tools as tools
import sctoolbox.plotting as pl
import sctoolbox.tools.marker_genes as marker_genes
from sctoolbox._settings import settings

settings.settings_from_config("config.yaml", key="04")

# Set additional options for figures
sc.set_figure_params(vector_friendly=True, dpi_save=600, scanpy=False)

-----------

## Load anndata from previous notebook

In [None]:
adata = utils.adata.load_h5ad("anndata_3.h5ad")
display(adata)

------------

In [None]:
# sort by timepoint
adata_tmp = adata.copy()
tmp_obs = adata_tmp.obs.copy()
tmp_obs['tmp_1'] = tmp_obs['timepoint'].str.extract('([\d]+)').astype(int)
tmp_obs['tmp_2'] = tmp_obs['timepoint'].str.extract('([A-Za-z]+)')
cell_ids = (tmp_obs.sort_values(by=['tmp_2', 'tmp_1'], ascending=(False, True))).index
adata_tmp = adata_tmp[cell_ids]
adata_tmp.obs['timepoint'] = adata_tmp.obs['timepoint'].cat.reorder_categories(adata_tmp.obs['timepoint'].unique().to_list())
adata = adata_tmp

## Calculate UMAP/TSNE and find best setting

- NOTE: min_dist: distances between points to make the plot looks more 'clustered'
- NOTE: spread: The effective scale of embedded points value be de default is 1

In [None]:
if search_parameters:
    if embedding == "umap":
        pl.search_umap_parameters(adata, 
                                  min_dist_range=dist_range,
                                  spread_range=spread_range,
                                  color=condition_column,
                                  n_components=n_components,
                                  threads=threads,
                                  save="UMAP_parameter_search.pdf")
    elif embedding == "tsne":
        pl.search_tsne_parameters(adata, 
                                  perplexity_range=perplexity_range,
                                  learning_rate_range=learning_rate_range,
                                  color=condition_column,
                                  threads=threads,
                                  save="TSNE_parameter_search.pdf")

In [None]:
%bgcolor PowderBlue

# Final choice of spread / dist for umap
min_dist = 0.1
spread = 1.5

# Final choice of perplexity_range / perplexity_range for tsne
perplexity = 50
learning_rate = 800

In [None]:
# Calculate final embedding
if embedding == "umap":
    sc.tl.umap(adata, min_dist=min_dist, spread=spread, n_components=n_components)
elif embedding == "tsne":
    sc.tl.tsne(adata, perplexity=perplexity, learning_rate=learning_rate)

## Plot final embedding with quality measures

In [None]:
# Adjust qc columns to show in plot
qc_columns = [condition_column] + list(adata.uns["sctoolbox"]["obs_metrics"])

In [None]:
# Plot final umap/tsne with quality measures
sc.pl.embedding(adata, basis=embedding, color=qc_columns, ncols=3, show=False)
pl._save_figure("embedding_quality.pdf")

In [None]:
# Plot distribution of samples in embedding
_ = pl.plot_group_embeddings(adata, groupby=condition_column, embedding=embedding,
                             save="embedding_sample_distribution.pdf")

----------

## Cell clustering
- NOTE: resolution: controls the coarseness of the clustering. Higher values lead to more clusters.

In [None]:
if search_clustering_parameters:
    pl.search_clustering_parameters(adata, ncols=4, method=clustering_method, 
                                    embedding=embedding,
                                    save="clustering_search.png")

In [None]:
%bgcolor PowderBlue

#Choose final resolution
clustering_column = "leiden_0.1"

### Reclustering

Here you can use the `tools.recluster` function to iteratively adjust clustering

In [None]:
tools.recluster(adata, clustering_column, ["1", "3"], task="join", embedding=embedding)
clustering_column = "leiden_0.5_recluster"  #update clustering column

In [None]:
tools.recluster(adata, clustering_column, ["3"], task="split", resolution=0.15, embedding=embedding,
                   key_added=clustering_column)  #overwrite column

In [None]:
#Create final clustering
adata.obs["clustering"] = utils.rename_categories(adata.obs[clustering_column])
adata.uns["sctoolbox"]["obs_metrics"] = list(adata.uns["sctoolbox"]["obs_metrics"]) + ["clustering"]

### Final clustering of cells

In [None]:
#Plot final leiden
sc.pl.embedding(adata, basis="X_" + embedding, color=[condition_column, "clustering"], show=False)
pl._save_figure("embedding_clustering.pdf")

---------

## Plot distribution of cells across clusters

In [None]:
_ = pl.n_cells_barplot(adata, "clustering", groupby=condition_column, 
                       save="cell_distribution_barplot.pdf")

------

## Generating 3D Object with UMAP coordinates in HTML

In [None]:
#plot 3D html for the "clustering" adata.obs, change to individual leiden or other columns if needed
column_3d = "clustering"

In [None]:
if embedding == "umap" and n_components > 2:
    pl.plot_3D_UMAP(adata, column_3d, save=f"umap_3d_{column_3d}")
    html_file = sctoolbox.settings.full_figure_prefix + f"umap_3d_{column_3d}.html"
    
    from IPython.display import IFrame
    display(IFrame(src=html_file, width=800, height=400))

---------

## Saving adata for next notebook

In [None]:
utils.save_h5ad(adata, "anndata_4.h5ad")

In [None]:
sctoolbox.settings.close_logfile()