In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import anndata as ad
sc.set_figure_params(frameon=False, dpi=100)
import matplotlib.pyplot as plt

In [None]:
adata = ad.read_zarr('../Finalized/adata_scpoli_final_refined.zarr')

In [None]:
# adata.write('../Finalized/adata_scpoli_final_refined_compressed.h5ad',  compression='gzip')

# import gc
# del adata
# gc.collect()

# adata = ad.read_zarr('../Finalized/adata_scpoli_final.zarr')

# adata.write('../Finalized/adata_scpoli_final_compressed.h5ad', compression='gzip')

In [None]:
adata = ad.read_zarr('../Finalized/adata_scpoli_final.zarr')


In [None]:
adata

In [None]:
adata

In [None]:
adata_all = ad.read_zarr('../Finalized/adata_all_embeddings.zarr')

In [None]:
sc.pl.umap(adata, color='Leiden_whole_object')

In [None]:
sc.tl.leiden(adata, resolution=0.75, key_added='Leiden_whole_object_075')

In [None]:
sc.pl.umap(adata, color=['Level_1_refined', 'Leiden_whole_object_075'], ncols=1)

In [None]:
cell_type_markers = {
    "Acinar Cell": ["CPB1", "PRSS1", "AMY2B"],
    "Adipocyte": ["PLIN1", "LPL"],
    "B Cell": ["CD19", "CD74", "MS4A1"],
    "Ductal Cell": ["ANXA4", "CFTR", "MUC1"],
    "Ductal Cell/Malignant": ["KRT19", "MUC1", "EPCAM", "KRT7", "KRT17"],
    "EMT": ["VIM", "CDH2", "ZEB1"],
    "Endocrine Cell": ["INS", "GCG", "SST"],
    "Endothelial Cell": ["PECAM1", "VWF", "TIE1"],
    "Erythroid Cell": ["HBB", "HBA1"],
    "Fibroblast": ["COL1A1", "FAP", "PDPN", "COL6A3"],
    "Intra-pancreatic Neurons": ["ENO2","CHAT","TH"],
    "Malignant": ["KRT19", "MUC1", "EPCAM", "KRT7", "KRT17"],
    "Myeloid Cell": ["CD163", "ITGAM", "CXCL8"],
    "Natural Killer": ["NKG7", "KLRD1", "IL18R1"],
    "Neuronal Cell": ["ENO2","CHAT","TH"],
    "Pericyte": ["DLK1", "RGS5"],
    "Schwann Cell": ["SOX10", "S100B"],
    "Smooth Muscle Cell": ["DLK1", "RGS5"],
    "T Cell": ["CD3D", "CD4", "CD8A", "THEMIS"]
}
all_markers = list({gene for markers in cell_type_markers.values() for gene in markers})
present_markers = [gene for gene in all_markers if gene in adata.var_names]
# for gene in present_markers:
#     sc.pl.umap(adata, color=gene, title=f"Expression of {gene}", show=True)

In [None]:
len(present_markers)

In [None]:
 sc.pl.umap(
        adata,
        color=['S100A8', 'CD68'],
        legend_fontsize=6,  
        layer='log_norm',
        size=1, 
        vmax=5,
        vmin=0
    )

In [None]:
flat_markers = [(cell_type, marker) for cell_type, markers in cell_type_markers.items() for marker in markers]
num_markers = len(flat_markers)
n_cols = 4  
n_rows = (num_markers + n_cols - 1) // n_cols  
fig, axs = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 5))
axs = axs.flatten()  
for idx, (cell_type, gene) in enumerate(flat_markers):
    ax = axs[idx]
    sc.pl.umap(
        adata,
        color=gene,
        title=f"{cell_type}: {gene}",  
        ax=ax,
        show=False,  
        legend_fontsize=6,  
        layer='log_norm',
        size=1, 
        vmax=5,
        vmin=0
    )
for extra_ax in axs[num_markers:]:
    extra_ax.remove()
plt.tight_layout()
plt.show()

In [None]:
sc.tl.rank_genes_groups(adata, groupby='Level_1_refined', layer='log_norm')

In [None]:
pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(10)['EMT']

In [None]:
cell_type_markers = {
    "Acinar Cell": ["PRSS1", "AMY2B"],
    "Ductal Cell": ["ANXA4", "CFTR"],
    "Ductal Cell/Malignant": ["KRT19", "EPCAM"],
    "EMT": ["VIM", "CDH2", "FN1"],
    "Malignant": ["KRT19", "EPCAM"],
    "T Cell": ["CD4", "CD8A"],
    "B Cell": ["CD74", "MS4A1"],
    "Natural Killer": ["NKG7", "IL18R1"],
    "Myeloid Cell": ["CD163", "ITGAM"],
    "Fibroblast": ["COL1A1","COL6A3"],
    "Neuronal Cell": ["ENO2","CHAT"],
    "Intra-pancreatic Neurons": ["ENO2","CHAT"],
    "Adipocyte": ["PLIN1", "LPL"],
    "Schwann Cell": ["SOX10", "S100B"],
    "Pericyte": ["DLK1", "RGS5"],
    "Smooth Muscle Cell": ["DLK1", "RGS5"],
    "Endocrine Cell": ["INS", "GCG"],
    "Endothelial Cell": ["PECAM1", "VWF"],
    "Erythroid Cell": ["HBB", "HBA1"],
}

In [None]:
new_order = [ i for i in (adata.obs['Level_1'].cat.categories).tolist() if 'Ambiguous' not in i]
adata.obs['Level_1_refined'] = pd.Categorical(adata.obs['Level_1_refined'],categories=new_order,ordered=True)

In [None]:
# use your annoatation to align author anntoation
# cell hint on top of scpoli 
# is cellbender a major effect


In [None]:
level_1_order = list(adata.obs['Level_1_refined'].cat.categories)

In [None]:
rearranged_cell_type_markers = {
    cell_type: cell_type_markers[cell_type]
    for cell_type in level_1_order
    if cell_type in cell_type_markers
}


In [None]:
plt.rcParams['figure.figsize'] = (24,4)
sc.pl.matrixplot(
    adata,
    var_names=rearranged_cell_type_markers,
    groupby="Level_1_refined",  # Replace with your cell type annotation column
    layer='log_norm',
    standard_scale='var',
    # dendogram=True
    # swap_axes=True
)

# Chat with Malte
# can you recover the fine level annotation from Regev according to Level 1 you have anntotated

In [None]:
%matplotlib inline

In [None]:
adata.obs['Level 3 Annotation_Regev'] = adata_all.obs['Level 3 Annotation'].copy()

In [None]:
adata.obs.groupby(['Dataset', 'Level 3 Annotation_Regev']).size().unstack()

In [None]:
level_1_unique = adata.obs['Level_1_refined'].unique().tolist()
level_3_unique = adata.obs['Level 3 Annotation_Regev'].unique().tolist()

# Ensure they have the same length for mapping
max_length = max(len(level_1_unique), len(level_3_unique))

# Pad the shorter list with `None` to make them equal in length
level_1_unique.extend([None] * (max_length - len(level_1_unique)))
level_3_unique.extend([None] * (max_length - len(level_3_unique)))

# Create a DataFrame
mapping_df = pd.DataFrame({
    'Level_1_refined': level_1_unique,
    'Level_3_Annotation_Regev': level_3_unique
})

In [None]:
mapping_df

In [None]:
plt.rcParams['figure.figsize'] = (6,6)
sc.pl.umap(adata, color=['Level_1_refined', 'Level 3 Annotation_Regev', 'Leiden_whole_object_075'], size=3, wspace=0.2, legend_loc="on data", legend_fontsize=5, legend_fontoutline=2)

In [None]:
stromal_cells_two = adata[adata.obs.Level_1 == 'Fibroblast']

In [None]:
stromal_cells = adata[adata.obs.Level_1 == 'Fibroblast']
stromal_cells.obsm['X_umap_global'] = stromal_cells.obsm['X_umap'].copy()
sc.pp.neighbors(stromal_cells, use_rep='X_scpoli')
sc.tl.leiden(stromal_cells, resolution=0.25)
sc.tl.umap(stromal_cells)

In [None]:
sc.pl.umap(stromal_cells, color='leiden', legend_loc="on data", legend_fontoutline=2, legend_fontsize=6)

In [None]:
sc.pl.umap(stromal_cells, color='COL5A2', layer='log_norm')

In [None]:
stromal_cells_two = adata[adata.obs.Level_1 == 'Fibroblast']

In [None]:
stromal_cells.obsm['X_umap_global'] = stromal_cells_two.obsm['X_umap'].copy()
stromal_cells.obsm['X_umap_local'] = stromal_cells.obsm['X_umap'].copy()
stromal_cells.obsm['X_umap'] = stromal_cells.obsm['X_umap_global']


In [None]:
sc.pl.umap(stromal_cells, color='leiden')

In [None]:
sc.pl.umap(epi_cells, color='leiden')