In [None]:
import scanpy as sc
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import os, scipy
import scipy.sparse as sp
import warnings
import anndata
warnings.filterwarnings("ignore")

# R interface
from rpy2.robjects import pandas2ri
from rpy2.robjects import r
import rpy2.rinterface_lib.callbacks
import anndata2ri
#import numpy2ri

pandas2ri.activate()
anndata2ri.activate()

plt.rcParams.update({
    'font.family': 'Arial'
})

%load_ext rpy2.ipython

In [None]:
adata_concat = sc.read('Data/adata_concat.h5ad')
adata_D25 = adata_concat[adata_concat.obs['day'].isin(['D25'])]

# Integration and annotation

In [None]:
%%R -i adata_D25 -o adata_integrated -o umap_emb -o pca_emb -o hvg_list

Csparse_validate = "CsparseMatrix_validate"
library(Seurat)
library(dplyr)

sobj <- as.Seurat(adata_D25, counts = "counts", data = NULL)

sobj_list <- SplitObject(sobj, split.by = 'reactionID')

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

features <- SelectIntegrationFeatures(object.list = sobj_list,nfeatures = 2000)

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
  })

anchors <- FindIntegrationAnchors(object.list = sobj_list, anchor.features = features, reduction = "rpca")
sobj <- IntegrateData(anchorset = anchors,k.weight=80)
DefaultAssay(sobj) <- "integrated"
sobj <- ScaleData(sobj) %>% RunPCA(.)
sobj <- RunUMAP(sobj, dims = 1:40)
sobj <- FindNeighbors(sobj, dims = 1:40)
sobj <- FindClusters(sobj,resolution=0.85)
sobj <- FindClusters(sobj,resolution=1)
sobj <- FindClusters(sobj,resolution=1.2)

pca_emb = Embeddings(object = sobj, reduction = "pca")
hvg_list = rownames(sobj)

DefaultAssay(sobj) <- "originalexp"

umap_emb = Embeddings(object = sobj, reduction = "umap")
adata_integrated = as.SingleCellExperiment(sobj) 

saveRDS(sobj,'Data/d25_seurat.rds')

In [None]:
adata_D25.obsm['X_umap'] = umap_emb
adata_D25.obsm['X_pca'] = umap_emb
adata_D25.var['highly_variable'] = adata_D25.var.index.isin(hvg_list)

adata_D25.obs['integrated_snn_res.0.85'] = adata_integrated.obs['integrated_snn_res.0.85']

adata_D25.obs['integrated_snn_res.1'] = adata_integrated.obs['integrated_snn_res.1']
adata_D25.obs['integrated_snn_res.1.2'] = adata_integrated.obs['integrated_snn_res.1.2']





#with plt.rc_context({ "figure.dpi": 250, "figure.figsize": (4,4)}):
with plt.rc_context({ "figure.dpi": 250}):
    sc.pl.umap(adata_D25, color=['integrated_snn_res.0.5','integrated_snn_res.0.55','integrated_snn_res.0.6',
                                'integrated_snn_res.0.65','integrated_snn_res.0.7','integrated_snn_res.0.75',
                                'integrated_snn_res.0.8','integrated_snn_res.0.85'],legend_loc='on data',
    use_raw=False, size=10,frameon=False,  ncols=2,cmap='jet', colorbar_loc=None,wspace=-0.05)

In [None]:
#with plt.rc_context({ "figure.dpi": 250, "figure.figsize": (4,4)}):
with plt.rc_context({ "figure.dpi": 250}):
    sc.pl.umap(adata_D25, color=['integrated_snn_res.0.85','integrated_snn_res.1.2'],legend_loc='on data',
    use_raw=False, size=10,frameon=False,  ncols=2,cmap='jet', colorbar_loc=None,wspace=-0.05)

In [None]:
adata_D25.obs['Cell_types'] = 'Unassigned'

pomc = pd.Series(list(adata_D25[adata_D25.obs["integrated_snn_res.0.85"].isin(['3'])].obs.index), dtype="category")
adata_D25.obs["Cell_types"].loc[pomc] = "POMC+ neurons"

dlx = pd.Series(list(adata_D25[adata_D25.obs["integrated_snn_res.0.85"].isin(['6','14'])].obs.index), dtype="category")
adata_D25.obs["Cell_types"].loc[dlx] = "DLX6-AS1+ neurons"

otp = pd.Series(list(adata_D25[adata_D25.obs["integrated_snn_res.0.85"].isin(['10','8'])].obs.index), dtype="category")
adata_D25.obs["Cell_types"].loc[otp] = "OTP+ neurons"

nr5a2 = pd.Series(list(adata_D25[adata_D25.obs["integrated_snn_res.0.85"].isin(['15'])].obs.index), dtype="category")
adata_D25.obs["Cell_types"].loc[nr5a2] = "NR5A2/ONECUT1/3+ neurons"

tele = pd.Series(list(adata_D25[adata_D25.obs["integrated_snn_res.0.85"].isin(['17','21'])].obs.index), dtype="category")
adata_D25.obs["Cell_types"].loc[tele] = "Telencephalic neurons"

arc_imm = pd.Series(list(adata_D25[adata_D25.obs["integrated_snn_res.0.85"].isin(['7','5','19','16'])].obs.index), dtype="category")
adata_D25.obs["Cell_types"].loc[arc_imm] = "Posterior tuberal precursors"

optic = pd.Series(list(adata_D25[adata_D25.obs["integrated_snn_res.0.85"].isin(['18','13','22'])].obs.index), dtype="category")
adata_D25.obs["Cell_types"].loc[optic] = "Optic area progenitors"

tany = pd.Series(list(adata_D25[adata_D25.obs["integrated_snn_res.0.85"].isin(['12','4','1','0', '9','2','11','20'])].obs.index), dtype="category")
adata_D25.obs["Cell_types"].loc[tany] = "Tanycytes"

vlmc = pd.Series(list(adata_D25[adata_D25.obs['integrated_snn_res.1.2'].isin(['21'])].obs.index), dtype="category")
adata_D25.obs["Cell_types"].loc[vlmc] = "Unassigned"

POMC = '#eddb7e'
OTP/AGRP = '#3586bd'
Tele = '#d0a9b7'
Tany = '#faaa4e'

In [None]:
palette_cell_types = ['#bb9c8a','#f4a989','#3586bd','#4F9E46','#eddb7e','#e85b3d','#FAAA4E','#d0a9b7','#a4cde0']
with plt.rc_context({ "figure.dpi": 300, "figure.figsize": (4.2,4) }):
    sc.pl.umap(adata_D25, color=['Cell_types'],palette=palette_cell_types, ncols=2, use_raw=False, cmap='jet', frameon=False, colorbar_loc=None,size=14, save='_d25_matched_colors.pdf')
    
#palette_batch=['#ff7f0eff', '#8c564bff', '#279e68ff']
#with plt.rc_context({ "figure.dpi": 300, "figure.figsize": (4.2,4) }):
#    sc.pl.umap(adata_D25, color=['diff_batch'],palette=palette_batch, ncols=2, use_raw=False, cmap='jet', frameon=False, colorbar_loc=None,size=14)


In [None]:
adata_D25.write('../Data/adata_d25_annotated.h5ad')

# Dotplot

In [None]:
adata_D25 = sc.read('../Data/adata_d25_annotated.h5ad')


In [None]:
with plt.rc_context({"figure.dpi": 300}): 
    plt.rcParams.update({'font.size': 12})
    dp = sc.pl.dotplot(adata_D25, ['DLX6-AS1','DLX6', 'DLX1','DLX2', 'NR5A2','ONECUT1','ONECUT3','OTP','BSX', 'NR2F1', 'VSX2', 'POMC', 'PRDM12', 'DLL1','NFIA', 'DIO2','LHX8', 'FOXG1'], "Cell_types",
                       show=False, cmap='Blues')
    ax = dp["mainplot_ax"]
    ax.xaxis.set_tick_params(labelsize=16)
    ax.yaxis.set_tick_params(labelsize=16)
    
    # Add a color strip next to the y-ticks
    ytick_positions = ax.get_yticks()
    cell_type_colors = dict(zip(list(adata_D25.obs.Cell_types.cat.categories), list(adata_D25.uns['Cell_types_colors'])))
    
    # Assuming df_flipped has the "Cell_Type" and cell_type_colors is a dictionary with colors for each cell type
    #cell_type_colors = {'cell_type_1': 'red', 'cell_type_2': 'green', 'cell_type_3': 'blue'}  # Example color map
    from matplotlib.patches import Circle
    
    for y_pos, cell_type in zip(ytick_positions, list(adata_D25.obs.Cell_types.cat.categories)):
        color = cell_type_colors.get(cell_type, "black")  # Default to black if no match

        # Add a circle at the left of the plot (adjust -9.4 and radius as needed)
        circle = Circle((-12.4, y_pos), radius=0.3, color=color, transform=ax.transData, clip_on=False)
        ax.add_patch(circle)
    
    #ax.tick_params(axis='y', pad=11)
    #ax.tick_params(axis='y', length=3.5, width=2)

    plt.tight_layout()
    plt.savefig('figures/d25_marker_dotplot.pdf', bbox_inches='tight')
    plt.show()

# Cluster percentage

In [None]:
# Group by 'diff_batch' and 'Cell_types' and count the number of occurrences
counts = adata_D25.obs.groupby(['diff_batch_2', 'Cell_types']).size().reset_index(name='counts')

# Calculate the total counts per batch
totals = adata_D25.obs.groupby('diff_batch_2').size().reset_index(name='total_counts')

# Merge the counts with the totals
counts = counts.merge(totals, on='diff_batch_2')

# Normalize the counts
counts['normalized_counts'] = counts['counts'] / counts['total_counts']
counts['normalized_counts'] = counts['normalized_counts'].mul(100)


# Print the result
counts

In [None]:
df = counts.groupby(['Cell_types'])['normalized_counts'].agg(
    mean='mean',
    std=lambda x: x.std(ddof=0)
).reset_index()

df['mean'] = df['mean'].round(2)
df['std'] = df['std'].round(2)

df

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.lines as mlines

category_names = list(adata_D25.obs['Cell_types'].cat.categories)


# Create a dictionary with the percentage lists for each dataset
results = {
    'batch-3': list(adata_D25[adata_D25.obs.diff_batch_2.isin(['batch-3'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    'batch-2': list(adata_D25[adata_D25.obs.diff_batch_2.isin(['batch-2'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    'batch-1': list(adata_D25[adata_D25.obs.diff_batch_2.isin(['batch-1'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    
}



def survey_horizontal(results, category_names):
    category_names = category_names[::-1]
    labels = list(results.keys())
    data = np.array(list(results.values()))[:, ::-1]
    data_cum = data.cumsum(axis=1)
    category_colors = list(reversed(['#bb9c8a', '#f4a989', '#3586bd', '#4f9e46', '#eddb7e', '#e85b3d',
       '#faaa4e', '#d0a9b7', '#a4cde0']))

    with plt.rc_context({"figure.dpi": 250}):
        fig, ax = plt.subplots(figsize=(3.5, 2.5))
        ax.set_xlim(0, np.sum(data, axis=1).max())
        ax.set_ylim(-0.5, len(labels) - 0.5)

        for i, (colname, color) in enumerate(zip(category_names, category_colors)):
            widths = data[:, i]
            starts = data_cum[:, i] - widths
            rects = ax.barh(labels, widths, left=starts, height=0.7,
                            label=colname, color=color)

        #ax.spines['top'].set_visible(False)
        #ax.spines['right'].set_visible(False)
        
        # Optional: Adjust or customize legend
        line1 = mlines.Line2D([], [], color="white", marker='o',label='Optic area progenitors', markersize=8, markerfacecolor='#4F9E46')
        line2 = mlines.Line2D([], [], color="white", marker='o',label='STMN2+ neurons', markersize=8, markerfacecolor='#4a95c8')
        line3 = mlines.Line2D([], [], color="white", marker='o',label='Telencephalic progenitors', markersize=8,  markerfacecolor='#EDDB7E')
        line4 = mlines.Line2D([], [], color="white", marker='o',label='Tuberal progenitors', markersize=8, markerfacecolor='#ED9892')
        line5 = mlines.Line2D([], [], color="white", marker='o',label='Unassigned', markersize=8, markerfacecolor='#a79098')

        # Uncomment below to add custom legend
        # ax.legend(handles=[line1, line2, line3, line4, line5], loc='lower right', fontsize='small', frameon=False)

        ax.set_xlabel('% of cells', fontsize=11)
        ax.set_ylabel('')
        ax.set_yticks(range(len(labels)))
        ax.set_yticklabels(labels, fontsize=11)
        ax.set_xticks(np.linspace(0, 100, 5))
        ax.tick_params(axis='x', labelsize=8)

    return fig, ax

survey_horizontal(results, category_names)
plt.xticks(fontsize=9)
plt.yticks(fontsize=11)
plt.savefig('figures/cluster_percentages_d25.pdf', bbox_inches='tight')
plt.show()
#plt.ylabel('% of cells', fontsize=11, labelpad=-1)


# Featureplots

In [None]:
adata = sc.read('Data/adata_d25_annotated.h5ad')

In [None]:
#RAX, TBX3 and NKX2-1
for color in ['CRYM']:
    with plt.rc_context({ "figure.dpi": 300, "figure.figsize": (4.2,4) }):
        sc.pl.umap(adata, color=color, ncols=2, use_raw=False, cmap='jet', frameon=False, colorbar_loc=None,size=18, save=f'd25_{color}.pdf', show=False,layer='log_transformed')
        #sc.pl.umap(adata, color=color, ncols=2, use_raw=False, cmap='jet', frameon=False, colorbar_loc=None,size=18)

# DGE list

In [None]:
adata_D25 = sc.read('../Data/adata_d25_annotated.h5ad')

In [None]:
%%R -i adata_D25

Csparse_validate = "CsparseMatrix_validate"
library(Seurat)
library(edgeR)

seur <- as.Seurat(adata_D25, counts = "counts", data = NULL)

#seur <- readRDS("Data/d50_d70_neurons_seurat.rds")
seur <- RenameAssays(seur, originalexp="RNA")

y <- Seurat2PB(seur, sample = "diff_batch_2", cluster = "Cell_types")
keep.samples <- y$samples$lib.size > 5e4
y <- y[, keep.samples]
keep.genes <- filterByExpr(y, group=y$samples$cluster)
y <- y[keep.genes, , keep=FALSE]
y <- normLibSizes(y)

cluster <- as.factor(y$samples$cluster)

batch <- factor(y$samples$sample)
design <- model.matrix(~ cluster + batch)
colnames(design) <- gsub("batch", "", colnames(design))
colnames(design)[1] <- "Int"

y <- estimateDisp(y, design, robust=TRUE)
fit <- glmQLFit(y, design, robust=TRUE)

ncls <- nlevels(cluster)
contr <- rbind( matrix(1/(1-ncls), ncls, ncls), matrix(0, ncol(design)-ncls, ncls) )
diag(contr) <- 1
contr[1,] <- 0
rownames(contr) <- colnames(design)
colnames(contr) <- paste0("cluster", levels(cluster))

qlf <- list()
for(i in 1:ncls){
 qlf[[i]] <- glmQLFTest(fit, contrast=contr[,i])
 qlf[[i]]$comparison <- paste0("cluster", levels(cluster)[i], "_vs_others")
}

top <- 500
topMarkers <- list()

de_df = data.frame(matrix( 
  vector(), 0, 7, dimnames=list(c(), c("gene","logFC","logCPM","F","PValue","FDR",'comparison'))), 
                stringsAsFactors=F)

for(i in 1:ncls) {
    #print(head(qlf[[i]])$comparison)
    ord <- order(qlf[[i]]$table$PValue, decreasing=FALSE)
    up <- qlf[[i]]$table$logFC[ord] > 0
    topMarkers[[i]] <- rownames(y)[ord[up][1:top]]
    #genes = 
    df = as.data.frame(topTags(qlf[[i]], n='all'))
    df =df[rownames(df) %in% rownames(y)[ord[up][1:top]], ]  
    df$comparison <- head(qlf[[i]])$comparison
    de_df = rbind(de_df, df)
}
print(dim(de_df))

write.csv(de_df, "DE_lists/d25_arc_de_list.csv")

In [None]:
de_genes = pd.read_csv("DE_lists/d25_arc_de_list.csv",index_col=0)
de_genes['cluster'] = de_genes['comparison'].str.extract(r'cluster(.*?)_vs_others')
de_genes = de_genes[['gene','logFC','logCPM','F','PValue','FDR','comparison','cluster']]
de_genes = de_genes[(de_genes.FDR < 0.05) & (de_genes.logFC > 1)]
de_genes.to_excel("DE_lists/d25_arc_de_list.xlsx")  
de_genes

In [None]:
de_dict = {}
for cluster in list(de_genes.cluster.unique()):
    de_dict[str(cluster)] = list(de_genes.groupby('cluster').get_group(cluster).gene)[0:8]
#de_dict
de_genes['cluster'].value_counts()

In [None]:
with plt.rc_context({ "figure.dpi": 600, "figure.figsize": (100,10)}):
    sc.pl.heatmap(adata_D25,de_dict, groupby='Cell_types', show_gene_labels=True, layer='log_transformed', 
                        cmap='jet')