In [None]:
import scanpy as sc
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import os, scipy
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as pyplot
from matplotlib.ticker import MaxNLocator
from sklearn.mixture import GaussianMixture as GMM

# R interface
from rpy2.robjects import pandas2ri
from rpy2.robjects import r
import rpy2.rinterface_lib.callbacks
import anndata2ri
#import numpy2ri


plt.rcParams.update({
    'font.family': 'Arial'
})

pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython


# Function determining cutoff for cell positive for marker
def expression_cutoff(gene, adata_temp):
    data = adata_temp[: , gene].layers['log_transformed'].toarray()
    gmm = GMM(n_components=2, random_state=42).fit(data)
    labels = gmm.predict(data)
    df = pd.DataFrame({'expression': data.ravel(), 'label': labels})
    label0 = df[df['label'].isin([0])]['expression'].values
    label1 = df[df['label'].isin([1])]['expression'].values
    #print(label0.min())
    if label0.min() > label1.min():
        return label0.min()
    return label1.min()

# d50_d70 all cells

## umaps/Featureplots

In [None]:
adata_d50_d70 = sc.read('../Data/adata_d50_d70.h5ad')

#'#EF6B46', 'Immature ARC neurons': '#3A89BE', 'Tanycytes': '#FAAA4E', 'Telencephalic neurons': '#EDDB7E'

In [None]:
with plt.rc_context({ "figure.dpi": 250, "figure.figsize": (4,4)}):
    sc.pl.umap(
    adata_d50_d70,color='Cell_types',
    frameon=False,
    size=8, palette = ['#EF6B46','#3A89BE','#FAAA4E','#EDDB7E']
    )
    

with plt.rc_context({ "figure.dpi": 250, "figure.figsize": (4,4)}):
    ax = sc.pl.umap(
    adata_d50_d70,
    frameon=False,
    show=False,size=8
    )
    sc.pl.umap(
        adata_d50_d70[adata_d50_d70.obs.Cell_types.isin(['Tanycytes'])],
        color=['day'],
        frameon=False,
        #title="Query predictions",
        ax=ax,palette=['#C8B3D7','#A7CFE4'],
        legend_loc=False,
        size=9, save='_d50_pseudotime_color.pdf'
    )

In [None]:
for color in ['NPFFR2']:
    with plt.rc_context({ "figure.dpi": 300, "figure.figsize": (4,4) }):
        sc.pl.umap(adata_d50_d70, color=color,cmap='jet', ncols=2, use_raw=False, frameon=False, colorbar_loc=None,size=12, save=f'd50_d70_{color}.pdf', show=False,layer='log_transformed')
        #sc.pl.umap(adata_d50_d70, color=color, ncols=2, use_raw=False, cmap='jet', frameon=False, colorbar_loc=None,size=12)

# Receptor analysis

In [None]:
adata_adult = sc.read('/datasets/renew_kirkeby/erno/human_HYPOMAP.h5ad')
adata_adult_arc = adata_adult[adata_adult.obs.region == 'ARC']
adata_adult_arc.layers['log_transformed'] = adata_adult_arc.X.copy()

del adata_adult
adata_adult_arc

In [None]:
gene_list, gene_l, diff_l, value_l, data_l  = ['LEPR','GLP1R'],[],[], [],[]

adata_adult_arc.obs.Donor_ID = adata_adult_arc.obs.Donor_ID.astype('category')

for gene in gene_list:
    cutoff = expression_cutoff(gene,adata_d50_d70)

    print(gene, cutoff)
    for batch in adata_d50_d70.obs.diff_batch.cat.categories:
        adata_temp = adata_d50_d70[adata_d50_d70.obs.diff_batch == batch]
        
        print(batch)
        gene_l.append(gene)
        diff_l.append(batch)
        value_l.append(adata_temp[adata_temp[: , gene].layers['log_transformed'] >= cutoff, :].shape[0] / adata_temp.shape[0] * 100)
        data_l.append('In vitro')
        
    cutoff = expression_cutoff(gene,adata_adult_arc)
    
        
    for batch in adata_adult_arc.obs.Donor_ID.cat.categories:
        adata_temp = adata_adult_arc[adata_adult_arc.obs.Donor_ID == batch]
        
        print(batch)
        gene_l.append(gene)
        diff_l.append(batch)
        value_l.append(adata_temp[adata_temp[: , gene].layers['log_transformed'] >= cutoff, :].shape[0] / adata_temp.shape[0] * 100)
        data_l.append('Adult human')

        


In [None]:
df = pd.DataFrame({'gene' : gene_l, 'diff' : diff_l, 'value': value_l, 'data':data_l})
df

In [None]:
from matplotlib.lines import Line2D

with plt.rc_context({ "figure.dpi": 300, "figure.figsize": [2.8, 2.3]}):
    ax = sns.barplot(x='gene', y='value', hue='data',
                     palette=['gray', "#C0C0C0"], data=df, errorbar=None, alpha=1)

    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.set_ylabel('% positive cells', fontsize=15)

    plt.xticks(fontsize=11)
    plt.yticks(fontsize=10)

    # Add stripplot but exclude it from the legend
    sns.stripplot(
        x="gene", y="value", hue='data',
        data=df, dodge=True, size=4, color='black',
        ax=ax, edgecolor='black', alpha=0.9, legend=False
    )

    ax.set_xlabel('')
    
    # Custom round markers with same colors as the palette
    handles, labels = ax.get_legend_handles_labels()
    custom_lines = [
        Line2D([0], [0], marker='o', color='w', label=labels[0],
               markerfacecolor='gray', markersize=8),
        Line2D([0], [0], marker='o', color='w', label=labels[1],
               markerfacecolor='#C0C0C0', markersize=8)
    ]

    ax.legend(custom_lines, [labels[0], labels[1]],
              loc='center left', bbox_to_anchor=(0.85, 0.5),
              frameon=False, handletextpad=-0.3)

    plt.tight_layout()
    plt.savefig('receptor_exp.pdf')
    plt.show()

# Tanycyte barplot

In [None]:
adata_2d = adata_d50_d70[adata_d50_d70.obs.dimensionality == '2D']
adata_3d = adata_d50_d70[adata_d50_d70.obs.dimensionality == 'spheroid']

values = [adata_2d.obs.Cell_types.value_counts(normalize=True)['Tanycytes']*100, adata_3d.obs.Cell_types.value_counts(normalize=True)['Tanycytes']*100]
keys =['2D','3D']
#tany_df

In [None]:
cell = adata_2d[adata_2d.obs.Cell_types == 'Tanycytes'].obs.diff_batch.value_counts()
batch = adata_2d.obs.diff_batch.value_counts()


In [None]:
with plt.rc_context({ "figure.dpi": 500, "figure.figsize": [2.1, 2.6 ]}):
    # Create the horizontal bar plot
    ax = sns.barplot(x=keys, y=values, palette=['#faaa4e'], width=0.75)

    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')

    ax.set_ylabel(f'% of cells', fontsize=15)
    ax.set_xlabel(None)
    plt.xticks(fontsize=15)
    plt.yticks(fontsize=12)
    plt.locator_params(axis='y', nbins=4)
    plt.title('Tanycytes', fontsize=15)
    #plt.title('Cells positive for receptor', fontsize=12)
    plt.tight_layout()
    plt.savefig('tanycyte_percentage.pdf')
    plt.show()  # Show your plot

# Dotplot

In [None]:
with plt.rc_context({"figure.dpi": 300}): 
    plt.rcParams.update({'font.size': 12})
    dp = sc.pl.dotplot(adata_d50_d70, ['STMN2','TBX3', 'NR5A2', 'SOX2','RAX','NFIA','CRYM','DIO2','LHX8', 'FOXG1'], "Cell_types",
                       show=False, cmap='Blues')
    ax = dp["mainplot_ax"]
    ax.xaxis.set_tick_params(labelsize=15)
    ax.yaxis.set_tick_params(labelsize=15)
    
    # Add a color strip next to the y-ticks
    ytick_positions = ax.get_yticks()
    cell_type_colors = dict(zip(list(adata_d50_d70.obs.Cell_types.cat.categories), list(adata_d50_d70.uns['Cell_types_colors'])))
    
    from matplotlib.patches import Circle
    
    for y_pos, cell_type in zip(ytick_positions, list(adata_d50_d70.obs.Cell_types.cat.categories)):
        color = cell_type_colors.get(cell_type, "black")  # Default to black if no match

        # Add a circle at the left of the plot (adjust -9.4 and radius as needed)
        circle = Circle((-9.6, y_pos), radius=0.3, color=color, transform=ax.transData, clip_on=False)
        ax.add_patch(circle)
    
    #ax.tick_params(axis='y', pad=11)
    #ax.tick_params(axis='y', length=3.5, width=2)

    plt.tight_layout()
    plt.savefig('figures/d50_70_marker_dotplot.pdf', bbox_inches='tight')
    plt.show()

# Cluster precentages

In [None]:
# Group by 'diff_batch' and 'Cell_types' and count the number of occurrences
counts = adata_d50_d70.obs.groupby(['diff_batch_2', 'Cell_types']).size().reset_index(name='counts')

# Calculate the total counts per batch
totals = adata_d50_d70.obs.groupby('diff_batch_2').size().reset_index(name='total_counts')

# Merge the counts with the totals
counts = counts.merge(totals, on='diff_batch_2')

# Normalize the counts
counts['normalized_counts'] = counts['counts'] / counts['total_counts']

# Print the result
counts

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.lines as mlines

category_names = list(adata_d50_d70.obs['Cell_types'].cat.categories)


# Create a dictionary with the percentage lists for each dataset
results = {
    'batch-3': list(adata_d50_d70[adata_d50_d70.obs.diff_batch_2.isin(['batch-3'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    'batch-2': list(adata_d50_d70[adata_d50_d70.obs.diff_batch_2.isin(['batch-2'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    'batch-1': list(adata_d50_d70[adata_d50_d70.obs.diff_batch_2.isin(['batch-1'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    
}



def survey_horizontal(results, category_names):
    category_names = category_names[::-1]
    labels = list(results.keys())
    data = np.array(list(results.values()))[:, ::-1]
    data_cum = data.cumsum(axis=1)
    category_colors = list(reversed(['#EF6B46','#3A89BE','#FAAA4E','#EDDB7E']))

    with plt.rc_context({"figure.dpi": 250}):
        fig, ax = plt.subplots(figsize=(3.5, 2.5))
        ax.set_xlim(0, np.sum(data, axis=1).max())
        ax.set_ylim(-0.5, len(labels) - 0.5)

        for i, (colname, color) in enumerate(zip(category_names, category_colors)):
            widths = data[:, i]
            starts = data_cum[:, i] - widths
            rects = ax.barh(labels, widths, left=starts, height=0.7,
                            label=colname, color=color)

        #ax.spines['top'].set_visible(False)
        #ax.spines['right'].set_visible(False)
        
        # Optional: Adjust or customize legend
        line1 = mlines.Line2D([], [], color="white", marker='o',label='Optic area progenitors', markersize=8, markerfacecolor='#4F9E46')
        line2 = mlines.Line2D([], [], color="white", marker='o',label='STMN2+ neurons', markersize=8, markerfacecolor='#4a95c8')
        line3 = mlines.Line2D([], [], color="white", marker='o',label='Telencephalic progenitors', markersize=8,  markerfacecolor='#EDDB7E')
        line4 = mlines.Line2D([], [], color="white", marker='o',label='Tuberal progenitors', markersize=8, markerfacecolor='#ED9892')
        line5 = mlines.Line2D([], [], color="white", marker='o',label='Unassigned', markersize=8, markerfacecolor='#a79098')

        # Uncomment below to add custom legend
        # ax.legend(handles=[line1, line2, line3, line4, line5], loc='lower right', fontsize='small', frameon=False)

        ax.set_xlabel('% of cells', fontsize=11)
        ax.set_ylabel('')
        ax.set_yticks(range(len(labels)))
        ax.set_yticklabels(labels, fontsize=11)
        ax.set_xticks(np.linspace(0, 100, 5))
        ax.tick_params(axis='x', labelsize=8)

    return fig, ax

survey_horizontal(results, category_names)
plt.xticks(fontsize=9)
plt.yticks(fontsize=11)
plt.savefig('figures/cluster_percentages_d50.pdf', bbox_inches='tight')
plt.show()
#plt.ylabel('% of cells', fontsize=11, labelpad=-1)

## DE analysis

In [None]:
adata_d50_d70 = sc.read('../Data/adata_d50_d70.h5ad')


In [None]:
%%R -i adata_d50_d70

Csparse_validate = "CsparseMatrix_validate"
library(Seurat)
library(edgeR)

seur <- as.Seurat(adata_d50_d70, counts = "counts", data = NULL)

#seur <- readRDS("Data/d50_d70_neurons_seurat.rds")
seur <- RenameAssays(seur, originalexp="RNA")

y <- Seurat2PB(seur, sample = "diff_batch_2", cluster = "Cell_types")
keep.samples <- y$samples$lib.size > 5e4
y <- y[, keep.samples]
keep.genes <- filterByExpr(y, group=y$samples$cluster)
y <- y[keep.genes, , keep=FALSE]
y <- normLibSizes(y)

cluster <- as.factor(y$samples$cluster)

batch <- factor(y$samples$sample)
design <- model.matrix(~ cluster + batch)
colnames(design) <- gsub("batch", "", colnames(design))
colnames(design)[1] <- "Int"
head(design)

y <- estimateDisp(y, design, robust=TRUE)
fit <- glmQLFit(y, design, robust=TRUE)

ncls <- nlevels(cluster)
contr <- rbind( matrix(1/(1-ncls), ncls, ncls), matrix(0, ncol(design)-ncls, ncls) )
diag(contr) <- 1
contr[1,] <- 0
rownames(contr) <- colnames(design)
colnames(contr) <- paste0("cluster", levels(cluster))
contr

qlf <- list()
for(i in 1:ncls){
 qlf[[i]] <- glmQLFTest(fit, contrast=contr[,i])
 qlf[[i]]$comparison <- paste0("cluster", levels(cluster)[i], "_vs_others")
}

top <- 500
topMarkers <- list()

de_df = data.frame(matrix( 
  vector(), 0, 7, dimnames=list(c(), c("gene","logFC","logCPM","F","PValue","FDR",'comparison'))), 
                stringsAsFactors=F)

for(i in 1:ncls) {
    #print(head(qlf[[i]])$comparison)
    ord <- order(qlf[[i]]$table$PValue, decreasing=FALSE)
    up <- qlf[[i]]$table$logFC[ord] > 0
    topMarkers[[i]] <- rownames(y)[ord[up][1:top]]
    #genes = 
    df = as.data.frame(topTags(qlf[[i]], n='all'))
    df =df[rownames(df) %in% rownames(y)[ord[up][1:top]], ]  
    df$comparison <- head(qlf[[i]])$comparison
    de_df = rbind(de_df, df)
}
print(dim(de_df))

write.csv(de_df, "DE_lists/d50_70_arc_de_list.csv")

In [None]:
de_genes = pd.read_csv('DE_lists/d50_70_arc_de_list.csv',index_col=0)
de_genes['cluster'] = de_genes['comparison'].str.extract(r'cluster(.*?)_vs_others')
de_genes = de_genes[['gene','logFC','logCPM','F','PValue','FDR','comparison','cluster']]
de_genes = de_genes[(de_genes.FDR < 0.05) & (de_genes.logFC > 1)]
de_genes.to_excel("DE_lists/d50_70_arc_de_list.xlsx")  
de_genes

In [None]:
de_dict = {}
for cluster in list(de_genes.cluster.unique()):
    de_dict[str(cluster)] = list(de_genes.groupby('cluster').get_group(cluster).gene)[0:15]
#de_dict
de_genes['cluster'].value_counts()

In [None]:
with plt.rc_context({ "figure.dpi": 600, "figure.figsize": (100,10)}):
    sc.pl.heatmap(adata_d50_d70,de_dict, groupby='Cell_types', show_gene_labels=True, layer='log_transformed',cmap='jet')

# Neurons d50_d70


In [None]:
adata_neurons = sc.read('../Data/adata_d50_d70_neurons.h5ad')


In [None]:
gene_list, gene_l, diff_l, value_l, data_l  = ['SST','PNOC','TRH'],[],[], [],[]


for gene in gene_list:
    cutoff = expression_cutoff(gene,adata_neurons)

    print(gene, cutoff)
    for batch in adata_neurons.obs.diff_batch.cat.categories:
        adata_temp = adata_neurons[adata_neurons.obs.diff_batch == batch]
        
        print(batch)
        gene_l.append(gene)
        diff_l.append(batch)
        value_l.append(adata_temp[adata_temp[: , gene].layers['log_transformed'] >= cutoff, :].shape[0] / adata_temp.shape[0] * 100)
        data_l.append('In vitro')
        
    
df = pd.DataFrame({'gene' : gene_l, 'diff' : diff_l, 'value': value_l, 'data':data_l})

In [None]:
gene1 = 'PNOC'
gene2 = 'LEPR'
#gene3 = 'GLP1R'

cutoff_gene1 = expression_cutoff(gene1,adata_neurons)
cutoff_gene2 = expression_cutoff(gene2,adata_neurons)



adata_neurons.obs['coex'] = (adata_neurons[:,'{}'.format(gene1)].X.todense() >= cutoff_gene1) & (adata_neurons[:,'{}'.format(gene2)].X.todense() >= cutoff_gene2)
adata_neurons.obs['coex'] = adata_neurons.obs['coex'].astype(str)
adata_neurons.obs.loc[adata_neurons.obs['coex'] == 'True', 'coex'] = f'{gene1}+/{gene2}+'




from matplotlib.colors import ListedColormap
adata_neurons.uns['coex_colors'] = ['#4C72B0', '#C44E52', '#C44E52']
# Now specify the custom cmap in the UMAP plot
with pyplot.rc_context({ "figure.dpi": 300, "figure.figsize": (3.8, 4)}):
    sc.pl.umap(adata_neurons, color=['coex'], 
               frameon=False, colorbar_loc=None, ncols=4, size=15, title='Co-expression',
                na_in_legend=False,  groups=[f'{gene1}+/{gene2}+'], save='co_exp_pnoc_lepr.pdf')

## Dotplot

In [None]:
adata_neurons = sc.read('../Data/adata_d50_d70_neurons.h5ad')

In [None]:
with plt.rc_context({"figure.dpi": 300}): 
    plt.rcParams.update({'font.size': 12})
    dp = sc.pl.dotplot(adata_neurons, ['OTP','AGRP','CRABP1','TRH','DLX6-AS1','FOXP2',  'GHRH','PNOC','NR5A2','ONECUT1','ADGRL4','PCSK1','NPFFR2','POMC','PRDM12','UNC13C'], "Cell_types",
                       show=False, cmap='Blues')
    ax = dp["mainplot_ax"]
    ax.xaxis.set_tick_params(labelsize=15)
    ax.yaxis.set_tick_params(labelsize=15)
    
    # Add a color strip next to the y-ticks
    ytick_positions = ax.get_yticks()
    cell_type_colors = dict(zip(list(adata_neurons.obs.Cell_types.cat.categories), list(adata_neurons.uns['Cell_types_colors'])))
    
    from matplotlib.patches import Circle
    
    for y_pos, cell_type in zip(ytick_positions, list(adata_neurons.obs.Cell_types.cat.categories)):
        color = cell_type_colors.get(cell_type, "black")  # Default to black if no match

        # Add a circle at the left of the plot (adjust -9.4 and radius as needed)
        circle = Circle((-9.8, y_pos), radius=0.3, color=color, transform=ax.transData, clip_on=False)
        ax.add_patch(circle)
    
    #ax.tick_params(axis='y', pad=11)
    #ax.tick_params(axis='y', length=3.5, width=2)

    plt.tight_layout()
    plt.savefig('figures/d50_70_neurons_marker_dotplot.pdf', bbox_inches='tight')
    plt.show()

# Cluster percentages

In [None]:
# Group by 'diff_batch' and 'Cell_types' and count the number of occurrences
counts = adata_neurons.obs.groupby(['diff_batch_2', 'Cell_types']).size().reset_index(name='counts')

# Calculate the total counts per batch
totals = adata_neurons.obs.groupby('diff_batch_2').size().reset_index(name='total_counts')

# Merge the counts with the totals
counts = counts.merge(totals, on='diff_batch_2')

# Normalize the counts
counts['normalized_counts'] = counts['counts'] / counts['total_counts']

# Print the result
counts

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.lines as mlines

category_names = list(adata_neurons.obs['Cell_types'].cat.categories)


# Create a dictionary with the percentage lists for each dataset
results = {
    'batch-3': list(adata_neurons[adata_neurons.obs.diff_batch_2.isin(['batch-3'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    'batch-2': list(adata_neurons[adata_neurons.obs.diff_batch_2.isin(['batch-2'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    'batch-1': list(adata_neurons[adata_neurons.obs.diff_batch_2.isin(['batch-1'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    
}



def survey_horizontal(results, category_names):
    category_names = category_names[::-1]
    labels = list(results.keys())
    data = np.array(list(results.values()))[:, ::-1]
    data_cum = data.cumsum(axis=1)
    category_colors = list(reversed(['#3586bd','#FAAA4E','#ED9892','#B15A27','#e85b3d','#51a148','#91cc6a','#EDDB7E','#91c2da','#D0A9B7']))

    with plt.rc_context({"figure.dpi": 250}):
        fig, ax = plt.subplots(figsize=(3.5, 2.5))
        ax.set_xlim(0, np.sum(data, axis=1).max())
        ax.set_ylim(-0.5, len(labels) - 0.5)

        for i, (colname, color) in enumerate(zip(category_names, category_colors)):
            widths = data[:, i]
            starts = data_cum[:, i] - widths
            rects = ax.barh(labels, widths, left=starts, height=0.7,
                            label=colname, color=color)


        # Uncomment below to add custom legend
        # ax.legend(handles=[line1, line2, line3, line4, line5], loc='lower right', fontsize='small', frameon=False)

        ax.set_xlabel('% of cells', fontsize=11)
        ax.set_ylabel('')
        ax.set_yticks(range(len(labels)))
        ax.set_yticklabels(labels, fontsize=11)
        ax.set_xticks(np.linspace(0, 100, 5))
        ax.tick_params(axis='x', labelsize=8)

    return fig, ax, category_colors

_,_, col = survey_horizontal(results, category_names)
plt.xticks(fontsize=9)
plt.yticks(fontsize=11)
plt.savefig('figures/cluster_percentages_d50_neurons.pdf', bbox_inches='tight')
plt.show()

## Subtype proportions

In [None]:
adata_neurons = sc.read('../Data/adata_d50_d70_neurons.h5ad')

In [None]:
gene_list, perc_2d, perc_3d = ['AGRP','POMC/PRDM12','GHRH','PNOC','SST','TRH'], [], []

adata_2d = adata_neurons[adata_neurons.obs.dimensionality.isin(['2D'])].copy()
adata_3d = adata_neurons[adata_neurons.obs.dimensionality.isin(['spheroid'])].copy()
gene_dict = {}
for batch in adata_neurons.obs.diff_batch_2.cat.categories.to_list():

    for gene in gene_list:
        co_genes = gene.split('/')
        if len(co_genes) > 1:
            print(co_genes)

            cutoff_gene1 = expression_cutoff(co_genes[0],adata_neurons)
            cutoff_gene2 = expression_cutoff(co_genes[1],adata_neurons)

            adata_2d_temp = adata_2d[adata_2d.obs.diff_batch_2 == batch].copy()
            adata_3d_temp = adata_3d[adata_3d.obs.diff_batch_2 == batch].copy()

            coex = ((adata_2d_temp[:,'{}'.format(co_genes[0])].layers['log_transformed']>=cutoff_gene1).todense() & 
                       (adata_2d_temp[:,'{}'.format(co_genes[1])].layers['log_transformed']>=cutoff_gene2).todense() )

            coex_list = [item for sublist in coex.tolist() for item in sublist]
            adata_2d_temp.obs['coex'] = pd.Categorical(coex_list, categories=[True, False])

            gene_2d = adata_2d_temp[adata_2d_temp.obs.coex.isin([True])].shape[0] / adata_2d_temp.shape[0] * 100


            coex = ((adata_3d_temp[:,'{}'.format(co_genes[0])].layers['log_transformed']>=cutoff_gene1).todense() & 
               (adata_3d_temp[:,'{}'.format(co_genes[1])].layers['log_transformed']>=cutoff_gene2).todense() )

            coex_list = [item for sublist in coex.tolist() for item in sublist]
            adata_3d_temp.obs['coex'] = pd.Categorical(coex_list, categories=[True, False])

            gene_3d = adata_3d_temp[adata_3d_temp.obs.coex.isin([True])].shape[0] / adata_3d_temp.shape[0] * 100

        else:
            cutoff = expression_cutoff(gene,adata_neurons)

            adata_2d_temp = adata_2d[adata_2d.obs.diff_batch_2 == batch].copy()
            adata_3d_temp = adata_3d[adata_3d.obs.diff_batch_2 == batch].copy()

            print(cutoff)
            gene_2d = adata_2d_temp[adata_2d_temp[: , gene].layers['log_transformed'] >= cutoff, :].shape[0] / adata_2d_temp.shape[0] * 100
            gene_3d = adata_3d_temp[adata_3d_temp[: , gene].layers['log_transformed'] >= cutoff, :].shape[0] / adata_3d_temp.shape[0] * 100

        gene_dict.update({gene+batch:[gene_2d,gene_3d, batch]})
    
    
    #print(gene, cutoff)
    
    #perc_2d.append(adata_2d[adata_2d[: , gene].layers['log_transformed'] >= cutoff, :].shape[0] / adata_2d.shape[0] * 100)
    #perc_3d.append(adata_3d[adata_3d[: , gene].layers['log_transformed'] >= cutoff, :].shape[0] / adata_3d.shape[0] * 100)


In [None]:
data['difference'] = data['3d'] - data['2d']


df = data.groupby(['genes'])['3d'].agg(
    mean='mean',
    std=lambda x: x.std(ddof=0)
).reset_index()

df['mean'] = df['mean'].round(2)
df['std'] = df['std'].round(2)

df

In [None]:
data = pd.DataFrame({'genes': gene_dict.keys(), '2d': [v[0] for v in gene_dict.values()],'3d':[v[1] for v in gene_dict.values()], 'batch':[v[2] for v in gene_dict.values()]})
data.genes = [gene.split('batch')[0] for gene in data.genes.values]

gene_df = pd.melt(data, id_vars=['genes'], value_vars=['2d', '3d'], 
                      var_name='batch', value_name='percentage')
gene_df

In [None]:
from matplotlib.lines import Line2D

with plt.rc_context({ "figure.dpi": 500, "figure.figsize": [2.8, 2.6 ]}):
    # Create the horizontal bar plot
    ax = sns.barplot(gene_df, x="percentage", y="genes", hue="batch", palette=['#E1664D','#698BF0'], width=0.85, orient='h', ci="sd", errwidth=0.7,capsize=0.2)
    sns.stripplot(
        y="genes", orient='h',
        x="percentage", hue='batch',color='black',size=2,
        data=gene_df, dodge=True, ax=ax, edgecolor='black', linewidth=0, alpha=0.7)
    
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')

    ax.set_xlabel(f'% of neurons', fontsize=8)
    ax.set_ylabel(None)

    line1 = Line2D([], [], color="white", marker='o', markerfacecolor='#E1664D', markersize=8)
    line2 = Line2D([], [], color="white", marker='o', markerfacecolor='#698BF0', markersize=8)
    ax.legend((line1, line2), ('2D', '3D'), loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, handletextpad=-0.2, fontsize=8)

    plt.yticks(fontsize=8)
    ax.set_xticks([0, 10, 20, 30, 40])
    plt.xticks(fontsize=6)
    

   
    plt.title('Cell positive for neuropeptide', fontsize=10)
    plt.tight_layout()
    plt.savefig('figures/neuropeptide_percentages_neurons.pdf', bbox_inches='tight')

    plt.show()  # Show your plot
    


In [None]:
adata_hypomap = sc.read('/datasets/renew_kirkeby/erno/human_HYPOMAP.h5ad')
adata_hypomap_arc = adata_hypomap[adata_hypomap.obs.region == 'ARC']
adata_hypomap_arc.layers['log_transformed'] = adata_hypomap_arc.X.copy()
adata_hypomap_arc

In [None]:
gene_list = ['AGRP','POMC/PRDM12','GHRH','PNOC','SST','TRH']
gene_dict = {}

adata_hypomap_arc.obs.Donor_ID = adata_hypomap_arc.obs.Donor_ID.astype('category')


for batch in adata_hypomap_arc.obs.Donor_ID.cat.categories.to_list():

    for gene in gene_list:
        co_genes = gene.split('/')
        if len(co_genes) > 1:
            print(co_genes)

            cutoff_gene1 = expression_cutoff(co_genes[0],adata_hypomap_arc)
            cutoff_gene2 = expression_cutoff(co_genes[1],adata_hypomap_arc)

            adata_arc_temp = adata_hypomap_arc[adata_hypomap_arc.obs.Donor_ID == batch].copy()

            coex = ((adata_arc_temp[:,'{}'.format(co_genes[0])].layers['log_transformed']>=cutoff_gene1).todense() & 
                       (adata_arc_temp[:,'{}'.format(co_genes[1])].layers['log_transformed']>=cutoff_gene2).todense() )

            coex_list = [item for sublist in coex.tolist() for item in sublist]
            adata_arc_temp.obs['coex'] = pd.Categorical(coex_list, categories=[True, False])

            gene_exp = adata_arc_temp[adata_arc_temp.obs.coex.isin([True])].shape[0] / adata_arc_temp.shape[0] * 100




        else:
            print(gene)
            
            cutoff = expression_cutoff(gene,adata_hypomap_arc)

            adata_arc_temp = adata_hypomap_arc[adata_hypomap_arc.obs.Donor_ID == batch].copy()

            gene_exp = adata_arc_temp[adata_arc_temp[: , gene].layers['log_transformed'] >= cutoff, :].shape[0] / adata_arc_temp.shape[0] * 100

        gene_dict.update({gene+'+'+batch:[gene_exp, batch]})
    
    



In [None]:
data = pd.DataFrame({'genes': gene_dict.keys(), 'exp': [v[0] for v in gene_dict.values()], 'batch':[v[1] for v in gene_dict.values()]})
data.genes = [gene.split('+')[0] for gene in data.genes.values]
data


In [None]:
df = data.groupby(['genes'])['exp'].agg(
    mean='mean',
    std=lambda x: x.std(ddof=0)
).reset_index()

df['mean'] = df['mean'].round(2)
df['std'] = df['std'].round(2)

df

In [None]:
from matplotlib.lines import Line2D

with plt.rc_context({ "figure.dpi": 500, "figure.figsize": [2.5, 2.6 ]}):
    # Create the horizontal bar plot
    ax = sns.barplot(data, x="exp", y="genes", palette=['#C0C0C0'], width=0.85, orient='h', errorbar=None)
    sns.stripplot(
        y="genes", orient='h',
        x="exp", hue='batch',color='black',size=2,
        data=data, dodge=False, ax=ax, edgecolor='black', linewidth=0, alpha=0.8)
    
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')

    ax.set_xlabel(f'% of neurons', fontsize=8)
    ax.set_ylabel(None)

    line1 = Line2D([], [], color="white", marker='o', markerfacecolor='white', markersize=8)
    line2 = Line2D([], [], color="white", marker='o', markerfacecolor='white', markersize=8)
    ax.legend((line1, line2), ('2D', '3D'), loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, handletextpad=-0.2, fontsize=8, labelcolor='linecolor')

    plt.yticks(fontsize=8)
    ax.set_xticks([0, 10, 20, 30, 40])
    plt.xticks(fontsize=6)
    

   
    plt.title('Cell positive for neuropeptide', fontsize=10)
    plt.tight_layout()
    plt.savefig('figures/neuropeptide_percentages_hypomap.pdf', bbox_inches='tight')
    plt.show()

## DEG analysis

In [None]:
adata_neurons = sc.read('../Data/adata_d50_d70_neurons.h5ad')


In [None]:
%%R -i adata_neurons

Csparse_validate = "CsparseMatrix_validate"
library(Seurat)
library(edgeR)

seur <- as.Seurat(adata_neurons, counts = "counts", data = NULL)

#seur <- readRDS("Data/d50_d70_neurons_seurat.rds")
seur <- RenameAssays(seur, originalexp="RNA")

y <- Seurat2PB(seur, sample = "diff_batch_2", cluster = "Cell_types")
keep.samples <- y$samples$lib.size > 5e4
y <- y[, keep.samples]
keep.genes <- filterByExpr(y, group=y$samples$cluster)
y <- y[keep.genes, , keep=FALSE]
y <- normLibSizes(y)

cluster <- as.factor(y$samples$cluster)

batch <- factor(y$samples$sample)
design <- model.matrix(~ cluster + batch)
colnames(design) <- gsub("batch", "", colnames(design))
colnames(design)[1] <- "Int"
head(design)

y <- estimateDisp(y, design, robust=TRUE)
fit <- glmQLFit(y, design, robust=TRUE)

ncls <- nlevels(cluster)
contr <- rbind( matrix(1/(1-ncls), ncls, ncls), matrix(0, ncol(design)-ncls, ncls) )
diag(contr) <- 1
contr[1,] <- 0
rownames(contr) <- colnames(design)
colnames(contr) <- paste0("cluster", levels(cluster))
contr

qlf <- list()
for(i in 1:ncls){
 qlf[[i]] <- glmQLFTest(fit, contrast=contr[,i])
 qlf[[i]]$comparison <- paste0("cluster", levels(cluster)[i], "_vs_others")
}

top <- 200
topMarkers <- list()

de_df = data.frame(matrix( 
  vector(), 0, 7, dimnames=list(c(), c("gene","logFC","logCPM","F","PValue","FDR",'comparison'))), 
                stringsAsFactors=F)

for(i in 1:ncls) {
    #print(head(qlf[[i]])$comparison)
    ord <- order(qlf[[i]]$table$PValue, decreasing=FALSE)
    up <- qlf[[i]]$table$logFC[ord] > 0
    topMarkers[[i]] <- rownames(y)[ord[up][1:top]]
    #genes = 
    df = as.data.frame(topTags(qlf[[i]], n='all'))
    df =df[rownames(df) %in% rownames(y)[ord[up][1:top]], ]  
    df$comparison <- head(qlf[[i]])$comparison
    de_df = rbind(de_df, df)
}
print(dim(de_df))

write.csv(de_df, "DE_lists/d50_70_arc_neurons_de_list.csv")

In [None]:
de_genes = pd.read_csv("DE_lists/d50_70_arc_neurons_de_list.csv",index_col=0)
de_genes['cluster'] = de_genes['comparison'].str.extract(r'cluster(.*?)_vs_others')
de_genes = de_genes[['gene','logFC','logCPM','F','PValue','FDR','comparison','cluster']]
de_genes = de_genes[(de_genes.FDR < 0.05) & (de_genes.logFC > 1)]
de_genes.to_excel("DE_lists/d50_70_arc_neurons_de_list.xlsx")  
de_genes

In [None]:
with plt.rc_context({ "figure.dpi": 600, "figure.figsize": (100,10)}):
    sc.pl.heatmap(adata_neurons,de_dict, groupby='Cell_types', show_gene_labels=True, layer='log_transformed',cmap='jet')

# Subtype correlation

In [None]:
adata_neurons = sc.read('../Data/adata_d50_d70_neurons.h5ad')
adata_neurons = adata_neurons[adata_neurons.obs.Cell_types.isin(['AGRP+/OTP+', 'POMC+/TBX3+/NR5A2+','GHRH+/PNOC+'])]


adata_neurons.obs.Cell_types = adata_neurons.obs.Cell_types.astype(str) + ' | In vitro'

adata_neurons.obs['Timepoint'] = adata_neurons.obs['day']
adata_neurons.obs['batch_key'] = adata_neurons.obs['diff_batch_2']
adata_neurons.obs['Study'] = 'Abay-Nørgaard'
adata_neurons.obs['Stage'] = 'In Vitro'

#adata_neurons.obs['Cell_types'] = 'ARC | In vitro'

del adata_neurons.var

print(adata_neurons.obs.Cell_types.value_counts())

print('X min:', adata_neurons.X.min(), '| X max:', adata_neurons.X.max())
print('Layers log min:', adata_neurons.layers['log_transformed'].min(), '| Layers log max:', adata_neurons.layers['log_transformed'].max())
print('Layers counts min:', adata_neurons.layers['counts'].min(), '| Layers counts max:', adata_neurons.layers['counts'].max())

In [None]:
adata_hypo = sc.read('../Data/fetal_adult_hypo_ref_annotations.h5ad')

# Extract the cells and normalize
adata_hypo = adata_hypo[((adata_hypo.obs.Cell_types.isin(['ARC'])) & (adata_hypo.obs.Stage.isin(['Fetal']))) | adata_hypo.obs.C4_named.isin(['C4-373 Mid-2 GABA-GLU-3 POMC PRDM12',
                        'C4-355 Mid-2 GABA-GLU-1 RGS22 AGRP','C4-160 Mid-1 GABA-6 IL13RA1 GHRH', 'C4-161 Mid-1 GABA-6 IL13RA1 GHRH'])].copy()
                          
adata_hypo.obs['dimensionality'] = ''

adata_hypo.layers['counts'] = adata_hypo.X.copy()

sc.pp.normalize_total(adata_hypo)
sc.pp.log1p(adata_hypo)

adata_hypo.layers['log_transformed'] = adata_hypo.X.copy()

# Format the adult arc data
adata_adult_arc = adata_hypo[adata_hypo.obs.Stage.isin(['Adult'])]
                          
adata_adult_arc.obs.Cell_types = adata_adult_arc.obs.C4_named.copy()

adata_adult_arc.obs['Cell_types'] = adata_adult_arc.obs['Cell_types'].replace({'C4-373 Mid-2 GABA-GLU-3 POMC PRDM12':'POMC+/PRDM12+', 'VMH':'VMH',
                                'C4-355 Mid-2 GABA-GLU-1 RGS22 AGRP':'AGRP+/RGS22+', 'C4-160 Mid-1 GABA-6 IL13RA1 GHRH':'GHRH+/IL13RA1',
                                'C4-161 Mid-1 GABA-6 IL13RA1 GHRH':'GHRH+/IL13RA1'})

adata_adult_arc.obs.Cell_types = adata_adult_arc.obs.Cell_types.astype(str) + ' | Adult'

print(adata_adult_arc.obs.Cell_types.value_counts())



print('X min:', adata_hypo.X.min(), '| X max:', adata_hypo.X.max())
print('Layers log min:', adata_hypo.layers['log_transformed'].min(), '| Layers log max:', adata_hypo.layers['log_transformed'].max())
print('Layers counts min:', adata_hypo.layers['counts'].min(), '| Layers counts max:', adata_hypo.layers['counts'].max())


In [None]:
# Extract the fetal data for integration
adata_fetal_arc = adata_hypo[(adata_hypo.obs.Cell_types == 'ARC') & (adata_hypo.obs.Stage == 'Fetal')]
adata_fetal_arc = adata_fetal_arc[adata_fetal_arc.obs.Timepoint.isin(adata_fetal_arc.obs.Timepoint.value_counts()[lambda x: x >= 50].index)]
adata_fetal_arc.obs.Timepoint.value_counts()

In [None]:
%%R -i adata_fetal_arc -o adata_integrated -o umap_emb -o pca_emb -o hvg_list

Csparse_validate = "CsparseMatrix_validate"
library(Seurat)
library(dplyr)

sobj <- as.Seurat(adata_fetal_arc, counts = "counts", data = NULL)

sobj_list <- SplitObject(sobj, split.by = 'Timepoint')

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

features <- SelectIntegrationFeatures(object.list = sobj_list, nfeatures = 2000)

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
  })

anchors <- FindIntegrationAnchors(object.list = sobj_list, anchor.features = features, reduction = "rpca")
sobj <- IntegrateData(anchorset = anchors,k.weight=60)
DefaultAssay(sobj) <- "integrated"
sobj <- ScaleData(sobj) %>% RunPCA(.)
sobj <- RunUMAP(sobj, dims = 1:40)
sobj <- FindNeighbors(sobj, dims = 1:40)

sobj <- FindClusters(sobj,resolution=0.10)
sobj <- FindClusters(sobj,resolution=0.15)
sobj <- FindClusters(sobj,resolution=0.18)
sobj <- FindClusters(sobj,resolution=0.20)
sobj <- FindClusters(sobj,resolution=0.25)


pca_emb = Embeddings(object = sobj, reduction = "pca")
hvg_list = rownames(sobj)

DefaultAssay(sobj) <- "originalexp"

umap_emb = Embeddings(object = sobj, reduction = "umap")
adata_integrated = as.SingleCellExperiment(sobj) 

In [None]:
adata_fetal_arc.obsm['X_umap'] = umap_emb
adata_fetal_arc.obsm['X_pca'] = pca_emb
#adata_fetal_arc.var['highly_variable'] = adata_fetal_arc.var.index.isin(hvg_list)

adata_fetal_arc.obs['integrated_snn_res.0.1'] = adata_integrated.obs['integrated_snn_res.0.1']
adata_fetal_arc.obs['integrated_snn_res.0.15'] = adata_integrated.obs['integrated_snn_res.0.15']
adata_fetal_arc.obs['integrated_snn_res.0.18'] = adata_integrated.obs['integrated_snn_res.0.18']
adata_fetal_arc.obs['integrated_snn_res.0.2'] = adata_integrated.obs['integrated_snn_res.0.2']
adata_fetal_arc.obs['integrated_snn_res.0.25'] = adata_integrated.obs['integrated_snn_res.0.25']

In [None]:
with plt.rc_context({ "figure.dpi": 250, "figure.figsize": (4,4)}):
    sc.pl.umap(adata_fetal_arc, color=['integrated_snn_res.0.1', 'integrated_snn_res.0.15',
       'integrated_snn_res.0.18', 'integrated_snn_res.0.2',
       'integrated_snn_res.0.25'],frameon=False, use_raw=False, size=8,
               ncols=5,cmap='jet', colorbar_loc=None,wspace=-0.02, legend_loc='on data')
    
with plt.rc_context({ "figure.dpi": 250, "figure.figsize": (4,4)}):
    sc.pl.umap(adata_fetal_arc, color=['POMC','AGRP', 'GHRH','KISS1'],frameon=False, use_raw=False, size=8,
               ncols=4,cmap='jet', wspace=-0.02, legend_loc='on data')

In [None]:
# Annotate AGRP POMC GHRH clusters

# 0 AGRP, 
# 3, 7 GHRH
# 4 POMC

adata_fetal_arc.obs["Cell_types"] = 'unknown'

agrp = pd.Series(list(adata_fetal_arc[adata_fetal_arc.obs['integrated_snn_res.0.25'].isin(['0'])].obs.index), dtype="category")
adata_fetal_arc.obs["Cell_types"].loc[agrp] = "AGRP+"

ghrh = pd.Series(list(adata_fetal_arc[adata_fetal_arc.obs['integrated_snn_res.0.25'].isin(['3','7'])].obs.index), dtype="category")
adata_fetal_arc.obs["Cell_types"].loc[ghrh] = "GHRH+"

pomc = pd.Series(list(adata_fetal_arc[adata_fetal_arc.obs['integrated_snn_res.0.25'].isin(['4'])].obs.index), dtype="category")
adata_fetal_arc.obs["Cell_types"].loc[pomc] = "POMC+"

adata_fetal_arc = adata_fetal_arc[adata_fetal_arc.obs.Cell_types.isin(['AGRP+','GHRH+','POMC+'])].copy()

adata_fetal_arc.obs['Cell_types'] = adata_fetal_arc.obs['Cell_types'].astype(str) + ' | Fetal'


print(adata_fetal_arc.obs.Cell_types.value_counts())

with plt.rc_context({ "figure.dpi": 250, "figure.figsize": (4,4)}):
    sc.pl.umap(adata_fetal_arc, color=[ 'Cell_types'],frameon=False, use_raw=False, size=8,ncols=5,cmap='jet', colorbar_loc=None,wspace=-0.02)

In [None]:
adata_concat = adata_fetal_arc.concatenate([adata_adult_arc, adata_neurons], batch_key=None, join='inner')

adata_concat.obs = adata_concat.obs[[i for i in list(adata_concat.obs.columns) if i not in adata_concat.obs.columns[adata_concat.obs.isna().any()].tolist()]] # Keep columns present in both datasets
adata_concat.var = adata_concat.var[[]]

adata_concat.obs['Timepoint_study_dimensionality'] = adata_concat.obs['Timepoint'].astype(str) + adata_concat.obs['Study'].astype(str)  + adata_concat.obs['dimensionality'].astype(str)

print(adata_concat.obs.Stage.value_counts())

sc.pp.highly_variable_genes(adata_concat, n_top_genes=2000, batch_key='Timepoint_study_dimensionality')
features = list(adata_concat[:, adata_concat.var.highly_variable].var_names)

adata_concat.obs.groupby('Cell_types')['Timepoint_study_dimensionality'].value_counts().reset_index()

In [None]:
%%R -i adata_concat -i features -o auroc -o auroc_col -o auroc_row
Csparse_validate = "CsparseMatrix_validate"

library(MetaNeighbor)
library(SummarizedExperiment)
library(Seurat)

sobj <- as.Seurat(adata_concat, counts = "counts", data = NULL)

sce_data = as.SingleCellExperiment(sobj)

auroc = MetaNeighborUS(var_genes = features, dat = sce_data, i = 'counts',fast_version=T,
                      study_id=sce_data$Timepoint_study_dimensionality, cell_type = sce_data$Cell_types)

auroc_col = colnames(auroc)
auroc_row = rownames(auroc)

In [None]:
auroc_df = pd.DataFrame(auroc, index=auroc_row, columns=auroc_col)

group_rows = auroc_df.index.str.split('|').str[1]
group_cols = auroc_df.columns.str.split('|').str[1]

# Group rows and columns and compute the mean
mean_auroc_df = (auroc_df.groupby(group_rows, axis=0).mean().groupby(group_cols, axis=1).mean())

mean_auroc_df.columns = [column.replace('.', '|') for column in mean_auroc_df.columns]
mean_auroc_df.index = [column.replace('.', '|') for column in mean_auroc_df.index]

mean_auroc_df

In [None]:
mean_auroc_df.columns = sorted(mean_auroc_df.columns, reverse=True)
mean_auroc_df.index = sorted(mean_auroc_df.index, reverse=True)
mean_auroc_df

import re
genes = [re.search(r'^(.*?)\+', cluster).group(1) for cluster in mean_auroc_df.columns if re.search(r'^(.*?)\+', cluster)]

color_df = pd.DataFrame(genes, columns=['genes'],index = mean_auroc_df.columns)

color_df['color'] = color_df['genes'].map({'AGRP':'#fb7e0e','GHRH':'#fdbb79','POMC':'#2fa02c'})
color_df

In [None]:


publication_dict = {'POMC+ | Fetal':'Herb','POMC+/TBX3+/NR5A2+ | In vitro':'Abay-Nørgaard', 'POMC+/PRDM12+ | Adult':'Tadross',
       'GHRH+/PNOC+ | In vitro':'Abay-Nørgaard','GHRH+/IL13RA1 | Adult':'Tadross', 'GHRH+ | Fetal':'Herb','AGRP+/OTP+ | In vitro':'Abay-Nørgaard','AGRP+/RGS22+ | Adult': 'Tadross','AGRP+ | Fetal':'Herb'}


publication_color_dict = {'Herb':'#ACC3A6','Abay-Nørgaard':'#B26E63','Braun':'#FB9A99','Tadross':'#F49D6E'}



genes = [re.search(r'^(.*?)\+', cluster).group(1) for cluster in mean_auroc_df.columns if re.search(r'^(.*?)\+', cluster)]

df = pd.DataFrame(genes, columns=['genes'],index = mean_auroc_df.columns)

df['cluster_color'] = df['genes'].map({'AGRP':'#3b89bf','GHRH':'#B15A27','POMC':'#EDDB7E'})



df['publication'] = df.index.map(publication_dict)
df['publication_color'] = df['publication'].map(publication_color_dict)

df

In [None]:
with plt.rc_context({ "figure.dpi": 400}): 
    from matplotlib.gridspec import GridSpec
    from matplotlib.lines import Line2D
    kws = dict(cbar_kws=dict(ticks=[0.25,0.5,0.75], orientation='horizontal'))


    # Draw the full plot with the `kws` dictionary containing colorbar parameters
    g = sns.clustermap(mean_auroc_df, cmap="coolwarm",
                       row_colors=[df['publication_color'].values], col_colors=[df['publication_color'].values],
                       dendrogram_ratio=(.1, .2),
                       figsize=(8, 9), **kws)

    # Remove the dendrograms
    g.ax_row_dendrogram.remove()
    g.ax_col_dendrogram.remove()
    
    g.ax_cbar.set_position([0.82, 0.4, 0.25, 0.03])
    g.ax_cbar.set_title('AUROC', fontsize=24)
    g.ax_cbar.tick_params(labelsize=16)

    g.ax_heatmap.yaxis.tick_left()
    g.ax_heatmap.tick_params(axis='y', labelsize=22)
    plt.setp(g.ax_heatmap.get_xticklabels(), rotation=45, ha='right', fontsize=22, rotation_mode="anchor")


    # Adjust the position of the heatmap to add a gap between row_colors and the heatmap
    #g.ax_heatmap.set_position([0.1, 0.1, 0.6, 0.8])  # [left, bottom, width, height]
    g.ax_row_colors.set_position([.7185, .2558, .026, .56])  # Adjust left and width
    g.ax_col_colors.set_position([.109, .8215, .6022, .022])  # Adjust left and width


    
    handles2 = [Line2D([0], [0], marker='o', color='w', markerfacecolor=color, markersize=16, label=label)
                              for label, color in {'Abay-Nørgaard':'#B26E63','Herb':'#ACC3A6','Tadross':'#F49D6E'}.items()]



    # Add the second legend
    legend2 = g.ax_heatmap.legend(
        handles=handles2, loc='upper right', bbox_to_anchor=(1.807, 0.9), frameon=False,
        handletextpad=0.1, prop={'size': 24}, title='Publication', title_fontsize=24
    ).get_title().set_position((-10,0))
    plt.tight_layout()
    
    
    # Show the plot
    plt.savefig('Subtype_correlation.pdf', dpi=400, bbox_inches='tight')
    
    plt.show()    