In [None]:
import scanpy as sc
import matplotlib.pyplot as plt
import warnings
import anndata
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
from scipy.stats import spearmanr
import seaborn as sns

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)
# R interface
from rpy2.robjects import pandas2ri
from rpy2.robjects import r
import rpy2.rinterface_lib.callbacks
import anndata2ri
#import numpy2ri

pandas2ri.activate()
anndata2ri.activate()

plt.rcParams.update({
    'font.family': 'Arial'
})


%load_ext rpy2.ipython

In [None]:
adata_dev_brain = sc.read('/maps/projects/dan1/people/bns631/Reference/Data/human_dev_POMC_subtypes.h5ad')
adata_dev_pomc = adata_dev_brain[(adata_dev_brain.obs.pomc_subtype_1 == 'POMC+/TBX3+/NR5A1-') | (adata_dev_brain.obs.pomc_subtype_1 == 'POMC+/NR5A1+/TBX3-')].copy()
adata_dev_pomc.obs['Cell_types'] = adata_dev_pomc.obs['pomc_subtype_1']
adata_dev_pomc.obs['Timepoint'] = 'PCW' + adata_dev_pomc.obs['Age_round'].astype(str)
adata_dev_pomc.obs['batch_key'] = adata_dev_pomc.obs['Age_round'].astype(str)
adata_dev_pomc.obs['Cell_types_orig'] = adata_dev_pomc.obs.Subregion
adata_dev_pomc.obs['Study'] = 'Braun'
adata_dev_pomc.obs['Stage'] = 'Fetal'

adata_dev_pomc.obs['Cell_types'] = adata_dev_pomc.obs['Cell_types'].astype(str) + ' | Fetal'

print(adata_dev_pomc.obs.Cell_types.value_counts())

print('X min:', adata_dev_pomc.X.min(), '| X max:', adata_dev_pomc.X.max())
print('Layers log min:', adata_dev_pomc.layers['log_transformed'].min(), '| Layers log max:', adata_dev_pomc.layers['log_transformed'].max())
print('Layers counts min:', adata_dev_pomc.layers['counts'].min(), '| Layers counts max:', adata_dev_pomc.layers['counts'].max())

adata_neurons = sc.read('Data/adata_d50_d70_neurons.h5ad')
adata_in_vitro_pomc = adata_neurons[adata_neurons.obs.Cell_types.isin(['POMC+/NR5A2+/TRH+', 'POMC+/PRDM12+/LEPR+'])]

adata_in_vitro_pomc.obs['Timepoint'] = adata_in_vitro_pomc.obs['day']
adata_in_vitro_pomc.obs['batch_key'] = adata_in_vitro_pomc.obs['diff_batch_2']
adata_in_vitro_pomc.obs['Study'] = 'Abay-Nørgaard'
adata_in_vitro_pomc.obs['Stage'] = 'In Vitro'

adata_in_vitro_pomc.obs['Cell_types'] = adata_in_vitro_pomc.obs['Cell_types'].astype(str) + ' | In vitro'


print(adata_in_vitro_pomc.obs.Cell_types.value_counts())

print('X min:', adata_in_vitro_pomc.X.min(), '| X max:', adata_in_vitro_pomc.X.max())
print('Layers log min:', adata_in_vitro_pomc.layers['log_transformed'].min(), '| Layers log max:', adata_in_vitro_pomc.layers['log_transformed'].max())
print('Layers counts min:', adata_in_vitro_pomc.layers['counts'].min(), '| Layers counts max:', adata_in_vitro_pomc.layers['counts'].max())

In [None]:
adata_parse = sc.read('../Data/SC/parse_annotated_late_stage.h5ad')
adata_in_vitro_pomc = adata_parse[adata_parse.obs.Cell_types.isin(['POMC+/SOX14+/NR5A1+', 'POMC+/TBX3+/NR5A2+'])]

adata_in_vitro_pomc.obs['Timepoint'] = adata_in_vitro_pomc.obs['day'].astype(str) + adata_in_vitro_pomc.obs['cell_line'].astype(str)
adata_in_vitro_pomc.obs['batch_key'] = adata_in_vitro_pomc.obs['cell_line']
adata_in_vitro_pomc.obs['Study'] = 'Abay-Nørgaard'
adata_in_vitro_pomc.obs['Stage'] = 'In Vitro'

adata_in_vitro_pomc.obs['Cell_types'] = adata_in_vitro_pomc.obs['Cell_types'].astype(str) + ' | In vitro'

#adata_in_vitro_pomc = adata_in_vitro_pomc[adata_in_vitro_pomc.obs.day.isin(['d50','d60'])]



print(adata_in_vitro_pomc.obs.Cell_types.value_counts())

print('X min:', adata_in_vitro_pomc.X.min(), '| X max:', adata_in_vitro_pomc.X.max())
print('Layers log min:', adata_in_vitro_pomc.layers['log_transformed'].min(), '| Layers log max:', adata_in_vitro_pomc.layers['log_transformed'].max())
print('Layers counts min:', adata_in_vitro_pomc.layers['counts'].min(), '| Layers counts max:', adata_in_vitro_pomc.layers['counts'].max())

In [None]:
adata_hypo = sc.read('Data/fetal_adult_hypo_ref_annotations.h5ad')

adata_hypo = adata_hypo[adata_hypo.obs.Cell_types.isin(['ARC', 'VMH'])].copy()

adata_hypo.layers['counts'] = adata_hypo.X.copy()

sc.pp.normalize_total(adata_hypo)
sc.pp.log1p(adata_hypo)

adata_hypo.layers['log_transformed'] = adata_hypo.X.copy()

print(adata_hypo.obs.Cell_types.value_counts())

adata_fetal_arc = adata_hypo[(adata_hypo.obs.Cell_types == 'ARC') & (adata_hypo.obs.Stage == 'Fetal')]
print(adata_fetal_arc.obs.Timepoint.value_counts())

print('X min:', adata_hypo.X.min(), '| X max:', adata_hypo.X.max())
print('Layers log min:', adata_hypo.layers['log_transformed'].min(), '| Layers log max:', adata_hypo.layers['log_transformed'].max())
print('Layers counts min:', adata_hypo.layers['counts'].min(), '| Layers counts max:', adata_hypo.layers['counts'].max())

In [None]:
# Remove the timepoints with less than 50 cells to avoid errors in batch correction
adata_fetal_arc = adata_fetal_arc[adata_fetal_arc.obs.Timepoint.isin(adata_fetal_arc.obs.Timepoint.value_counts()[lambda x: x >= 50].index)]
adata_fetal_arc.obs.Timepoint.value_counts()

In [None]:
%%R -i adata_fetal_arc -o adata_integrated -o umap_emb -o pca_emb -o hvg_list

Csparse_validate = "CsparseMatrix_validate"
library(Seurat)
library(dplyr)

sobj <- as.Seurat(adata_fetal_arc, counts = "counts", data = NULL)

sobj_list <- SplitObject(sobj, split.by = 'Timepoint')

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

features <- SelectIntegrationFeatures(object.list = sobj_list, nfeatures = 2000)

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
  })

anchors <- FindIntegrationAnchors(object.list = sobj_list, anchor.features = features, reduction = "rpca")
sobj <- IntegrateData(anchorset = anchors,k.weight=60)
DefaultAssay(sobj) <- "integrated"
sobj <- ScaleData(sobj) %>% RunPCA(.)
sobj <- RunUMAP(sobj, dims = 1:40)
sobj <- FindNeighbors(sobj, dims = 1:40)

sobj <- FindClusters(sobj,resolution=0.10)
sobj <- FindClusters(sobj,resolution=0.08)
sobj <- FindClusters(sobj,resolution=0.07)
sobj <- FindClusters(sobj,resolution=0.06)
sobj <- FindClusters(sobj,resolution=0.05)


pca_emb = Embeddings(object = sobj, reduction = "pca")
hvg_list = rownames(sobj)

DefaultAssay(sobj) <- "originalexp"

umap_emb = Embeddings(object = sobj, reduction = "umap")
adata_integrated = as.SingleCellExperiment(sobj) 

In [None]:
adata_fetal_arc.obsm['X_umap'] = umap_emb
adata_fetal_arc.obsm['X_pca'] = pca_emb
#adata_fetal_arc.var['highly_variable'] = adata_fetal_arc.var.index.isin(hvg_list)

adata_fetal_arc.obs['integrated_snn_res.0.1'] = adata_integrated.obs['integrated_snn_res.0.1']
adata_fetal_arc.obs['integrated_snn_res.0.08'] = adata_integrated.obs['integrated_snn_res.0.08']
adata_fetal_arc.obs['integrated_snn_res.0.07'] = adata_integrated.obs['integrated_snn_res.0.07']
adata_fetal_arc.obs['integrated_snn_res.0.05'] = adata_integrated.obs['integrated_snn_res.0.05']
adata_fetal_arc.obs['integrated_snn_res.0.06'] = adata_integrated.obs['integrated_snn_res.0.06']

In [None]:
with plt.rc_context({ "figure.dpi": 250, "figure.figsize": (4,4)}):
    sc.pl.umap(adata_fetal_arc, color=['integrated_snn_res.0.1', 'integrated_snn_res.0.08',
       'integrated_snn_res.0.07', 'integrated_snn_res.0.06',
       'integrated_snn_res.0.05'],frameon=False, use_raw=False, size=8,
               ncols=5,cmap='jet', colorbar_loc=None,wspace=-0.02, legend_loc='on data')

In [None]:
with plt.rc_context({ "figure.dpi": 250, "figure.figsize": (4,4)}):
    sc.pl.umap(adata_fetal_arc, color=['POMC','PRDM12', 'TBX3'],frameon=False, use_raw=False, size=8,
               ncols=4,cmap='jet', colorbar_loc=None,wspace=-0.02, legend_loc='on data')

In [None]:
# Annotate cluster 3 as POMC and extract the annotated Fetal ARC POMC cluster and entire VMH cluster

adata_fetal_arc.obs["Cell_types"] = 'ARC'

pomc = pd.Series(list(adata_fetal_arc[adata_fetal_arc.obs['integrated_snn_res.0.07'].isin(['3'])].obs.index), dtype="category")
adata_fetal_arc.obs["Cell_types"].loc[pomc] = "POMC+"


adata_fetal_pomc_vmh = adata_fetal_arc[adata_fetal_arc.obs.Cell_types == "POMC+"].concatenate(adata_hypo[(adata_hypo.obs.Cell_types == 'VMH') & (adata_hypo.obs.Stage == 'Fetal') & (adata_hypo.obs.Timepoint != 'GW22')], batch_key=None, join='outer')

adata_fetal_pomc_vmh.obs['Cell_types'] = adata_fetal_pomc_vmh.obs['Cell_types'].astype(str) + ' | Fetal'


adata_fetal_pomc_vmh.obs.Cell_types.value_counts()

In [None]:
# Extract Adult ARC POMC clusters and entire VMH 

adata_adult_pomc_vmh = adata_hypo[((adata_hypo.obs.Cell_types == 'VMH') & (adata_hypo.obs.Stage == 'Adult')) | (adata_hypo.obs.C4_named == 'C4-373 Mid-2 GABA-GLU-3 POMC PRDM12')]

adata_adult_pomc_vmh.obs['Cell_types'] = adata_adult_pomc_vmh.obs['Cell_types'].astype(str)
adata_adult_pomc_vmh.obs.loc[adata_adult_pomc_vmh.obs.Cell_types=='ARC','Cell_types'] = adata_hypo[adata_hypo.obs.Cell_types=='ARC'].obs.C4_named
adata_adult_pomc_vmh.obs['Cell_types'] = adata_adult_pomc_vmh.obs['Cell_types'].astype('category')

adata_adult_pomc_vmh.obs['Cell_types'] = adata_adult_pomc_vmh.obs['Cell_types'].replace({'C4-373 Mid-2 GABA-GLU-3 POMC PRDM12':'POMC+/PRDM12+', 'VMH':'VMH'})

adata_adult_pomc_vmh.obs['Cell_types'] = adata_adult_pomc_vmh.obs['Cell_types'].astype(str) + ' | Adult'
adata_adult_pomc_vmh.obs.Cell_types.value_counts()

In [None]:
#adata_concat.obs

In [None]:
# Concat in vitro POMC clusters with fetal and adult VMH and POMC clusters
adata_concat = adata_fetal_pomc_vmh.concatenate([adata_adult_pomc_vmh, adata_dev_pomc, adata_in_vitro_pomc], batch_key=None, join='inner')


adata_concat.obs = adata_concat.obs[[i for i in list(adata_concat.obs.columns) if i not in adata_concat.obs.columns[adata_concat.obs.isna().any()].tolist()]] # Keep columns present in both datasets
adata_concat.var = adata_concat.var[[]]

adata_concat.obs['Timepoint_study'] = adata_concat.obs['Timepoint'].astype(str) + adata_concat.obs['Study'].astype(str)

print(adata_concat.obs.Stage.value_counts())

sc.pp.highly_variable_genes(adata_concat, n_top_genes=3000, inplace=True, batch_key='Timepoint_study', subset=True)
features = list(adata_concat[:, adata_concat.var.highly_variable].var_names)

adata_concat.obs.groupby('Cell_types')['Stage'].value_counts().reset_index()

# Run metaneighbor analysis

In [None]:
%%R -i adata_concat -i features -o auroc -o auroc_col -o auroc_row
Csparse_validate = "CsparseMatrix_validate"

library(MetaNeighbor)
library(SummarizedExperiment)
library(Seurat)

sobj <- as.Seurat(adata_concat, counts = "counts", data = NULL)

sce_data = as.SingleCellExperiment(sobj)

auroc = MetaNeighborUS(var_genes = features, dat = sce_data, i = 'counts',fast_version=T,
                      study_id=sce_data$Timepoint_study, cell_type = sce_data$Cell_types)

auroc_col = colnames(auroc)
auroc_row = rownames(auroc)

In [None]:
# Extract the portion after '|' for both rows and columns
auroc_df = pd.DataFrame(auroc, index=auroc_row, columns=auroc_col)

group_rows = auroc_df.index.str.split('|').str[1]
group_cols = auroc_df.columns.str.split('|').str[1]

# Group rows and columns and compute the mean
mean_auroc_df = (auroc_df.groupby(group_rows, axis=0).mean().groupby(group_cols, axis=1).mean())

mean_auroc_df.columns = [column.replace('.', '|') for column in mean_auroc_df.columns]
mean_auroc_df.index = [column.replace('.', '|') for column in mean_auroc_df.index]

mean_auroc_df

In [None]:
nuclei_dict = {'POMC+ | Fetal':'ARC','POMC+/SOX14+/NR5A1+ | In vitro':'VMH','POMC+/NR5A1+/TBX3- | Fetal':'VMH', 'POMC+/PRDM12+ | Adult':'ARC',
       'POMC+/TBX3+/NR5A2+ | In vitro':'ARC', 'POMC+/TBX3+/NR5A1- | Fetal':'ARC','VMH | Adult':'VMH', 'VMH | Fetal':'VMH'}

publication_dict = {'POMC+ | Fetal':'Herb','POMC+/SOX14+/NR5A1+ | In vitro':'Abay-Nørgaard','POMC+/NR5A1+/TBX3- | Fetal':'Braun', 'POMC+/PRDM12+ | Adult':'Tadross',
       'POMC+/TBX3+/NR5A2+ | In vitro':'Abay-Nørgaard', 'POMC+/TBX3+/NR5A1- | Fetal':'Braun','VMH | Adult':'Tadross', 'VMH | Fetal':'Herb'}

nuclei_color_dict = {'VMH':'#33A02C','ARC':'#FDBF6F'}
publication_color_dict = {'Herb':'#ACC3A6','Abay-Nørgaard':'#B26E63','Braun':'#CEC075','Tadross':'#F49D6E'}



df = pd.DataFrame(mean_auroc_df.columns.values.tolist(), columns=['cell_type'])
df['nuclei'] = df['cell_type'].map(nuclei_dict)
df['nuclei_color'] = df['nuclei'].map(nuclei_color_dict)

df['publication'] = df['cell_type'].map(publication_dict)
df['publication_color'] = df['publication'].map(publication_color_dict)

df

In [None]:
with plt.rc_context({ "figure.dpi": 400}): 
    from matplotlib.gridspec import GridSpec
    from matplotlib.lines import Line2D
    
    kws = dict(cbar_kws=dict(ticks=[0.25,0.5,0.75], orientation='horizontal'))

    # Initial clustermap
    g = sns.clustermap(mean_auroc_df, cmap="coolwarm",
                       row_colors=[df['publication_color'].values], col_colors=[df['publication_color'].values],
                       dendrogram_ratio=(.1, .2),
                       figsize=(8, 9), **kws)

    # Remove dendrograms
    g.ax_row_dendrogram.remove()
    g.ax_col_dendrogram.remove()

    # Extract new row and column order
    order = g.dendrogram_row.reordered_ind

    # Reorder the heatmap data
    reordered_data = mean_auroc_df.iloc[order, order]

    # Clear existing heatmap
    g.ax_heatmap.clear()

    # Redraw heatmap manually with reordered data
    sns.heatmap(reordered_data, cmap="coolwarm", ax=g.ax_heatmap, cbar=False)

    
    # Move the heatmap's y-axis ticks to the left
    g.ax_heatmap.yaxis.tick_left()
    plt.setp(g.ax_heatmap.get_yticklabels(), rotation=360, fontsize=22, rotation_mode="anchor")

    plt.setp(g.ax_heatmap.get_xticklabels(), rotation=45, ha='right', fontsize=22, rotation_mode="anchor")

    
    g.ax_cbar.set_position([0.77, 0.36, 0.25, 0.03])
    g.ax_cbar.set_title('AUROC', fontsize=24)
    g.ax_cbar.tick_params(labelsize=16)



    # Adjust the position of the heatmap to add a gap between row_colors and the heatmap
    g.ax_row_colors.set_position([.7055, .2675, .027, .5502])  # X, Y, width, hight
    g.ax_col_colors.set_position([.1075, .8237, .591, .022])  


    
    handles2 = [Line2D([0], [0], marker='o', color='w', markerfacecolor=color, markersize=16, label=label)
                              for label, color in {'Abay-Nørgaard':'#B26E63','Braun':'#CEC075','Herb':'#ACC3A6','Tadross':'#F49D6E'}.items()]


    # Add the second legend
    legend2 = g.ax_heatmap.legend(
        handles=handles2, loc='upper right', bbox_to_anchor=(1.76, 0.9), frameon=False,
        handletextpad=0.1, prop={'size': 24}, title='Publication', title_fontsize=24
    ).get_title().set_position((-10,0))

    
    # Show the plot
    plt.savefig('arc_vmh_correlation.pdf', dpi=400, bbox_inches='tight')
    
    plt.tight_layout()
    plt.show()    

# Dotplot

In [None]:
# Concat in vitro POMC clusters with fetal and adult VMH and POMC clusters
adata_concat = adata_fetal_pomc_vmh.concatenate([adata_adult_pomc_vmh, adata_dev_pomc, adata_in_vitro_pomc], batch_key=None, join='inner')

adata_concat.obs = adata_concat.obs[[i for i in list(adata_concat.obs.columns) if i not in adata_concat.obs.columns[adata_concat.obs.isna().any()].tolist()]] # Keep columns present in both datasets
adata_concat.obs.Cell_types = adata_concat.obs.Cell_types.astype('category')
adata_concat.obs.Cell_types = adata_concat.obs.Cell_types.cat.reorder_categories([item.get_text().split(', ')[0] for item in g.ax_heatmap.get_yticklabels()])

In [None]:
with plt.rc_context({"figure.dpi": 300}): 
    plt.rcParams.update({'font.size': 12})
    dp = sc.pl.dotplot(adata_concat, ['GPR149', 'CBLN1', 'FEZF1', 'SOX14', 'NR5A1', 'POMC', 'PRDM12', 'TBX3', 'NR5A2'], "Cell_types",
                       var_group_rotation=45, show=False, cmap='Blues')
    ax = dp["mainplot_ax"]
    ax.xaxis.set_tick_params(labelsize=12)
    ax.yaxis.set_tick_params(labelsize=12)
    plt.tight_layout()
    plt.savefig('figures/pomc_marker_dotplot.pdf',bbox_inches='tight' )
    plt.show()