In [4]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
from natsort import natsorted
import matplotlib.pyplot as plt
import decoupler as dc

sns.set_context("paper", font_scale=2) # set some nice global plotting settings.

In [5]:
tissue = 'DiencephalonPituitary'

In [6]:
plate_palette = sns.color_palette("husl", n_colors=3)
sex_palette = ['hotpink','dodgerblue']
geno_palette = ['#DA9CC1', '#F4C245', '#C0BFBF', '#55AF5B', '#4F6EAF', '#52A5DB', '#D83026', '#683C91']


In [7]:
adata = sc.read_h5ad(f'../IGVF_analysis/cellbender_tissues/processed/{tissue}_processed_subclustered_res0.1.h5ad')



KeyboardInterrupt: 

In [None]:
adata

In [None]:
adata.obs.head()

In [None]:
adata.var.head()

In [None]:
def stacked_barplot_proportions(adata, cluster_key, var_key, fsize=(12, 6), annotations=True, reverse_order=False, custom_order=None, custom_colors=None):
    colors = sns.color_palette("husl", n_colors=len(adata[var_key].unique()))

    # Group the data by 'cluster_key' and 'var_key', count occurrences, and calculate proportions
    grouped_data = adata.groupby([cluster_key, var_key]).size().unstack().fillna(0)
    proportions = grouped_data.div(grouped_data.sum(axis=1), axis=0)

    # Apply custom order if provided
    if custom_order:
        proportions = proportions.loc[custom_order]
    if custom_colors:
        colors = custom_colors

    # Reverse the order of the DataFrame if reverse_order is True
    if reverse_order:
        proportions = proportions.iloc[::-1]
        m = adata.groupby([cluster_key]).size().to_frame().iloc[::-1]
    else:
        m = adata.groupby([cluster_key]).size().to_frame()

    #sns.set_style('white')
    # Create the stacked bar plot
    ax = proportions.plot(kind='barh', color=colors, stacked=True, figsize=fsize, width=0.8, edgecolor=None)

    if annotations:
        for i, txt in enumerate(m[0]):
            ax.text(0.875, i, txt, fontsize=14, va='center', transform=ax.get_yaxis_transform())

    plt.xlim(0, 1.15)
    ax.tick_params(axis="x", labelsize=14)
    ax.tick_params(axis="y", labelsize=14)
    ax.set_xlabel("Proportion")
    ax.set_ylabel(cluster_key)
    ax.set_title(f'{var_key} by {cluster_key}')

    if annotations:
        ax.legend(title=var_key, bbox_to_anchor=(1.05, 1), loc='upper left')
    else:
        ax.get_legend().remove()

    ax.grid(False)
    
    plt.show()

In [None]:
stacked_barplot_proportions(adata.obs, 
                            'leiden_R', 'Sex',
                            reverse_order = True,
                            fsize = (6.5,15), 
                            custom_colors = sex_palette,
                            annotations = True)


In [None]:
stacked_barplot_proportions(adata.obs, 
                            'leiden_R', 'Genotype',
                            reverse_order = True,
                            fsize = (6.5,15), 
                            custom_colors = geno_palette,
                            annotations = True)


In [None]:
stacked_barplot_proportions(adata.obs, 
                            'leiden_R', 'plate',
                            reverse_order = True,
                            fsize = (6.5,15), 
                            custom_colors = plate_palette,
                            annotations = True)


In [None]:
marker_genes_dict = {
    "Endothelial": ["Flt1","Pecam1"],
    "Pericyte": ["Vtn"],
    "Fibroblast": ["Bnc2","Fbxl7"],
    "VLMC1": ["Dcn"],
    "VLMC2": ['Egfr','Tbx15','Rspo3','Slc26a7'],
    "Epithelial":["Kl","Ttr","Clic6"], # http://mousebrain.org/celltypes/CHOR.html
    "Ependymal":["Tmem212","Dnah6"],
    "Hypendymal":['Spp2','Sspo'], # http://mousebrain.org/celltypes/HYPEN.html
    "Microglia": ["Cx3cr1"],
    "Astrocyte": ["Gfap","Clu"],
    "Excitatory neuron, thalamus": ["Synpo2","Prkcd","Ramp3","Ptpn3","Shox2"], #http://mousebrain.org/celltypes/DEGLU1.html
    "Cholinergic neurons, habenula": ['Chat','Lrrc55','Tac2','Nwd2'],
    "Dopaminergic neuron, midbrain": ["Slc6a3","En1"], # http://mousebrain.org/celltypes/MBDOP2.html
    "GABAergic": ["Gad1","Gad2"],
    "OPC": ["Pdgfra"],
    "COP": ["C1ql1"],
    "MFOL": ["Mag","Mog"],
    "MOL": ["Plp1"],
    "Corticotrope": ["Crhr1","Tbx19"],
    "Gonadotrope": ["Cga","Fshb","Nr5a1"],
    "Lactotrope": ["Prl","Greb1","Agtr1a"],
    "Melanotrope": ["Oacyl","Pomc","Esm1"],
    "Tanycyte": ['Col23a1','Rax','Lhx2','Prdx6','Ptn'],
    "Pituitary_stem": ["Rbpms","Cyp2f2","Sox2","Aldh1a2"],
    "Somatotrope": ["Gh","Pappa2"],
    "Thyrotrope": ["Tshb","Dio2"],
    "Cycling": ["Top2a","Mki67"],
}


In [None]:
sc.pl.dotplot(adata, marker_genes_dict, 'leiden_R', mean_only_expressed = True,
              dendrogram=True, log=True)


In [None]:
ctx_hc_marker_genes_dict = {
    "Endothelial": ["Flt1"],
    "Pericyte": ["Vtn"],
    "VLMC": ["Dcn",'Slc6a13','Ptgds'], 
    "ABC (type of VLMC)": ['Mgp','Slc47a1','Dapl1','Bnc2'],
    "OEC": ['Prss56'],
    "Epithelial":["Kl","Ttr","Clic6"],
    "Ependymal":["Tmem212","Dnah6"],
    "Microglia": ["Cx3cr1","Hexb","Inpp5d"],
    "Astrocyte": ["Gfap","Clu","Slc1a3"],
    "Neuron": ["Mir124a-1hg", "Snap25"],
    "Neuroblast":["Igfbpl1","Dlx2",'Cdca7','Top2a'],
    "Dopaminergic neuron": ['Slc18a2', 'Th'],
    "Glutamatergic neuron": ["Slc17a7"],
    "Cajal-Retzius": ["Reln"],
    "GABAergic neuron": ["Gad1","Gad2",],
    "Medium spiny neuron" : ["Drd1","Drd2","Ppp1r1b","Adora2a"],
    "Lamp5 GABAergic": ["Lamp5"],
    "Vip GABAergic": ["Vip"],
    "Sst GABAergic": ["Sst"],
    "Pvalb GABAergic": ["Pvalb"],
    "Car3": ["Car3","Oprk1"],
    "OPC": ["Pdgfra","Cspg4"],
    "COP": ["C1ql1"],
    "MFOL": ["Mag","Mog","Mbp"],
    "MOL": ["Plp1"],
    "DG_early": ["Prox1"],
    "DG": ["Calb1"],
    "Heart": ["Ryr1","Gata4"],
    "Gastrocnemius": ["Myh4","Ttn"]
}

In [None]:
sc.pl.dotplot(adata, ctx_hc_marker_genes_dict, 'leiden_R', mean_only_expressed = True,
              dendrogram=True, log=True)


In [None]:
plt.rcParams['figure.figsize'] = (16, 6)

sc.pl.violin(adata, ['pct_counts_mt_cb', 'doublet_score', 'total_counts_raw', 'n_genes_by_counts_cb'],
             groupby = "leiden_R", 
             jitter=0.4, multi_panel=True, size=0)

In [None]:
plt.rcParams['figure.figsize'] = (8, 6)

sc.pl.violin(adata, ['pct_counts_mt_cb', 'doublet_score', 'total_counts_raw', 'n_genes_by_counts_cb'],
             groupby = "plate", 
             jitter=0.4, multi_panel=True, size=0)

In [None]:
plt.rcParams['figure.figsize'] = (8, 8)


In [None]:
sc.pl.umap(adata, color=['leiden_R'], size=1, legend_fontsize = 10, legend_loc = 'on data')


In [None]:
sc.pl.umap(adata, color=['Genotype'], size = 0.8, palette = geno_palette)


In [None]:
sc.pl.umap(adata, color=['plate'], size = 0.8, palette = plate_palette)


In [None]:
sc.pl.umap(adata, color=['Sex'], size = 0.8, palette = sex_palette)


In [None]:
adata

In [None]:
plt.rcParams['figure.figsize'] = (8, 8)


In [None]:
sc.pl.umap(adata, color=['pct_counts_mt_cb','doublet_score'], size = 0.8, palette = plate_palette)


In [None]:
sc.pl.umap(adata, color=['Slc17a7','Gad2','Snap25'], size = 0.8, palette = plate_palette)


In [None]:
adata

In [None]:
old_annots = pd.read_csv(f'../IGVF_analysis/annotated_tissues/HypothalamusPituitary_annotated_metadata.csv')
old_annots['subpool'] = old_annots['subpool'].str.replace(r'Sublibrary_(\d+)', r'Subpool_\1', regex=True)

subpool_mapping = {
    "13A": "Subpool_1",
    "13B": "Subpool_2",
    "13C": "Subpool_3",
    "13D": "Subpool_4",
    "13E": "Subpool_5",
    "13F": "Subpool_6",
    "13G": "Subpool_7",
    "13H": "Subpool_8"
}

# Update the subpool column for the specified plate
old_annots.loc[old_annots['plate'] == 'igvf_012', 'subpool'] = (
    old_annots['subpool'].map(subpool_mapping)
)


old_annots['cellID'] = old_annots['bc1_well'] + "_" + old_annots['bc2_well'] + "_" + old_annots['bc3_well'] + "_" + old_annots['subpool']  + "_" + old_annots['plate'] 



In [None]:
new_cells = adata.obs.index.tolist()
old_cells = old_annots['cellID'].tolist()

new_cells_series = pd.Series(new_cells)
old_cells_series = pd.Series(old_cells)


old_in_new = old_cells_series.isin(new_cells)
new_in_old = new_cells_series.isin(old_cells)


old_not_in_new = old_cells_series[~old_in_new]
new_not_in_old = new_cells_series[~new_in_old]

print(f"Number of old_cells not in new_cells: {len(old_not_in_new)}")

print(f"Number of new_cells not in old_cells: {len(new_not_in_old)}")


In [None]:
old_annots.set_index('cellID', inplace=True)
transfer_df = old_annots[['subtype', 'celltype']]

adata_obs = adata.obs.copy()

# Merge the dataframes, adding the 'subtype' and 'celltype' columns to adata.obs
adata_obs = adata_obs.join(transfer_df, how='left')


In [None]:
# Ensure missing values are explicitly set as NA
adata_obs['subtype'].fillna(value="NA", inplace=True)
adata_obs['celltype'].fillna(value="NA", inplace=True)

In [None]:
adata_obs.head()

In [None]:
adata.obs['celltype_carryover'] = adata_obs['celltype']
adata.obs['subtype_carryover'] = adata_obs['subtype']

In [None]:
sc.pl.umap(adata, color=['subtype_carryover'], size = 1, legend_fontsize = 14, legend_loc = "on data")


In [None]:
# Plot UMAP
ax = sc.pl.umap(adata, color=['subtype_carryover'], size=1, legend_fontsize=20, show=False)
handles, labels = ax.get_legend_handles_labels()
plt.legend(handles, labels, loc='center left', bbox_to_anchor=(1, 0.5), markerscale=2, fontsize=20, ncol=2)
plt.show()

In [None]:
stacked_barplot_proportions(adata.obs, 
                            'leiden_R', 'subtype_carryover',
                            reverse_order = True,
                            custom_colors = adata.uns["subtype_carryover_colors"],
                            fsize = (6.5,15), 
                            annotations = True)


In [None]:
sc.pl.umap(adata, color=['leiden_R'], size=1, legend_fontsize = 20, legend_loc = 'on data')


# Annotate

In [None]:
annots = pd.read_csv("post_cellbender_annotations.csv")
annots = annots[annots['tissue'] == tissue]
annots['leiden_R'] = annots['leiden_R'].astype(str)

annotation_dict = annots.groupby('leiden_R').head(1).set_index('leiden_R')['general_celltype'].to_dict()
adata.obs['general_celltype'] = adata.obs['leiden_R'].map(annotation_dict)
annotation_dict = annots.groupby('leiden_R').head(1).set_index('leiden_R')['general_CL_ID'].to_dict()
adata.obs['general_CL_ID'] = adata.obs['leiden_R'].map(annotation_dict)

annotation_dict = annots.groupby('leiden_R').head(1).set_index('leiden_R')['celltype'].to_dict()
adata.obs['celltype'] = adata.obs['leiden_R'].map(annotation_dict)
annotation_dict = annots.groupby('leiden_R').head(1).set_index('leiden_R')['CL_ID'].to_dict()
adata.obs['CL_ID'] = adata.obs['leiden_R'].map(annotation_dict)

annotation_dict = annots.groupby('leiden_R').head(1).set_index('leiden_R')['subtype'].to_dict()
adata.obs['subtype'] = adata.obs['leiden_R'].map(annotation_dict)


In [None]:
plt.rcParams['figure.figsize'] = (8, 8)


In [None]:
sc.pl.umap(adata, color=['subtype'], size=1, legend_fontsize = 10, legend_loc = 'on data')


In [None]:
sc.pl.umap(adata, color=['celltype'], size=1, legend_fontsize = 10, legend_loc = 'on data')


In [None]:
sc.pl.umap(adata, color=['general_celltype'], size=1, legend_fontsize = 10, legend_loc = 'on data')


In [None]:
adata

In [None]:
adata.obs.drop(columns='celltype_carryover', inplace=True)
adata.obs.drop(columns='subtype_carryover', inplace=True)

In [None]:
meta = adata.obs
meta.to_csv(f"../IGVF_analysis/cellbender_tissues/obs_tables/{tissue}_annotated_metadata.csv")


In [None]:
meta['subtype'].value_counts() # 4600

In [None]:
adata.write_h5ad(f'../IGVF_analysis/cellbender_tissues/{tissue}_annotated.h5ad')
