# Load Alkon dataset and predict cell type annotation using CellTypist 

The annotations that this package use as a reference are from the Reynolds dataset. With that the intention is to have subgroups of KC (differentiated and undifferentiated) and Tcells (Th, Tc).

In [0]:
!pip install scanpy

In [0]:
!pip install celltypist

In [0]:
!pip install --upgrade typing_extensions

In [0]:
dbutils.library.restartPython()

In [0]:
import scanpy as sc

In [0]:
import celltypist
from celltypist import models

In [0]:
pilot_data = sc.read_h5ad(filename="/dbfs/mnt/sandbox/TFM_PAULA/ALKON_PROCESSED_TFM.h5ad")
pilot_data

In [0]:
pilot_data.obs

In [0]:
# pilot_data.X.expm1().sum(axis = 1)
# This step is not necesary, because values on X are the scaled log transform values from Seurat

In [0]:
pilot_data.X

## Annotate with human adult skin

In [0]:
model = models.Model.load(model = 'Adult_Human_Skin.pkl')

In [0]:
model

In [0]:
model.cell_types

In [0]:
predictions = celltypist.annotate(pilot_data, model = 'Adult_Human_Skin.pkl', majority_voting = True)

In [0]:
predictions.predicted_labels

In [0]:
adata = predictions.to_adata()
adata

In [0]:
# Save this externally
adata.obs.to_csv('/dbfs/mnt/sandbox/TFM_PAULA/Celltypist_ALKON_predictions_Adult_Human_Skin.csv', index=True)
adata.obs


In [0]:
sc.pl.umap(adata, color = ['h_celltype_v4', 'predicted_labels'], legend_loc = 'on data')

In [0]:
sc.pl.umap(adata, color = ['h_celltype_v4', 'majority_voting'], legend_loc = 'on data')

In [0]:
sc.pl.umap(adata, color = ['predicted_labels', 'majority_voting'], legend_loc = 'on data')

In [0]:
sc.tl.umap(adata)
adata

In [0]:
sc.pl.umap(adata, color = ['predicted_labels', 'majority_voting'], legend_loc = 'on data')