In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc

In [None]:
import anndata2ri
import logging

import rpy2.rinterface_lib.callbacks as rcb
import rpy2.robjects as ro

rcb.logger.setLevel(logging.ERROR)
ro.pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython

In [None]:
from rpy2.robjects import pandas2ri
pandas2ri.deactivate()

In [None]:
%%R
library(Seurat)

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
#feel free to change these settings according to your needs
sc.set_figure_params(dpi=80, figsize=(4,4))
sc.settings.verbosity=0
plt.rcParams['figure.dpi'] = 80
plt.rcParams['figure.figsize'] = (4, 4)

In [None]:
adata_total=sc.read("pan_endo.h5ad")
adata_total

In [None]:
adata = adata_total[:, adata_total.var["highly_variable"]].copy()
adata

In [None]:
adata.layers["count"]=adata.X.copy()

In [None]:
adata.X=adata.layers["log1p_norm"].copy()

In [None]:
sc.pp.pca(adata, n_comps=50)

In [None]:
sc.pl.pca_variance_ratio(adata, n_pcs=50)

In [None]:
sc.pp.pca(adata, n_comps=50)

In [None]:
sc.pl.pca_scatter(adata, color=["total_counts", "day", "doublet_score"])

In [None]:
sc.tl.tsne(adata, use_rep="X_pca")

In [None]:
sc.pl.tsne(adata, color=["total_counts", "day", "doublet_score"])

In [None]:
#play around with n_neighbors
sc.pp.neighbors(adata)
#play around with min_dist
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color=["total_counts", "day", "doublet_score"])

In [None]:
sc.tl.leiden(adata, resolution=0.5, key_added="leiden_0.5")
sc.tl.leiden(adata, resolution=1, key_added="leiden_1")

In [None]:
adata

In [None]:
sc.pl.umap(adata, color=["leiden_0.5", "leiden_1"], wspace=0.4)

In [None]:
sc.tl.leiden(adata, restrict_to=["leiden_0.5", ["0"]], resolution=0.2, key_added="leiden_0.5_2")

In [None]:
sc.pl.umap(adata, color="leiden_0.5_2")

In [None]:
marker_genes = {
    "Multipotent": ["Dlk1", "Mdk"],
    "Tip": ["Vtn", "Myc", "Jam3"],
    "Trunk":["Notch2", "Cbx3"],
    "Acinar":["Cpa1", "Cel", "Rbpjl", "Reep5"],
    "Ductal":["Sox9", "Anxa2", "Spp1"],
    "EP":["Neurog3", "Hes6", "Btbd17", "Gadd45a"],
    "Fev+":["Fev", "Cck", "Neurod1", "Vwa5b2", "Tox3"],
    "Endocrine":["Rbp4", "Pyy", "Chgb", "Tmem27", "Fam183b"]}

In [None]:
for cell_type, genes in marker_genes.items():
    print(f"Cell type: {cell_type}")

    # Set up a figure with subplots in a single row
    num_genes = len(genes)
    fig, axs = plt.subplots(1, num_genes, figsize=(5 * num_genes, 5))  # Adjust figure size as needed

    for idx, gene in enumerate(genes):
        if gene in adata.var_names:
            # Use subplot axes for plotting
            ax = axs[idx] if num_genes > 1 else axs
            sc.pl.umap(adata, color=gene, title=f"{gene} in {cell_type}", ax=ax, show=False, use_raw=False, 
                       cmap="Reds", frameon=False)
        else:
            print(f"{gene} not found in the dataset")

    plt.tight_layout()
    plt.show() 

In [None]:
sc.tl.rank_genes_groups(
    adata, groupby="leiden_0.5", method="wilcoxon", key_added="dea_leiden_0.5"
)

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, groupby="leiden_0.5", n_genes=5, standard_scale="var", 
                                key="dea_leiden_0.5")

In [None]:
cluster_annotation = {
    "3": "Acinar",
    "5": "Acinar",
    "6": "Acinar"}

#you can also use dictionary comprehension: cluster_annotation = {key: "Acinar" for key in ["3", "5", "6"]}

In [None]:
adata.obs["manual_celltype_annotation"] = adata.obs["leiden_0.5"].map(cluster_annotation)

#in case you want to keep the original value when the key is not found in cluster_annotation dictionary:
##adata.obs["manual_celltype_annotation"] = adata.obs["leiden_0.5"].map(lambda x: cluster_annotation.get(x, x))

In [None]:
sc.pl.umap(adata, color="manual_celltype_annotation")

In [None]:
adata_curated=sc.read("GSE132188_adata.h5ad.h5")
adata_curated

In [None]:
sc.pl.umap(adata_curated, color=['clusters_fig3_final_noep', 'clusters_fig4_final', 'clusters_fig2_final', 'clusters_fig6_broad_final', 
                                 'clusters_fig6_fine_final', 'clusters_fig6_alpha_final'], wspace=1, ncols=3)

In [None]:
adata

In [None]:
adata.obs["annotation_curated"]=adata_curated.obs["clusters_fig6_broad_final"]

In [None]:
sc.pl.umap(adata, color="annotation_curated")

In [None]:
adata.write("adata_version02.h5ad")