In [None]:
import scanpy as sc
import scvelo as scv
import pandas as pd
import numpy as np
import loompy as lp
import warnings
import matplotlib as mpl
from copy import copy
warnings.filterwarnings("ignore")
import seaborn as sns

# R interface
#from rpy2.robjects import pandas2ri
from rpy2.robjects import r

import rpy2.rinterface_lib.callbacks
import anndata2ri
from scipy import sparse
import matplotlib.pyplot as plt

import numpy as np
from matplotlib.colors import LogNorm
from sklearn.mixture import GaussianMixture as GMM

anndata2ri.activate()

plt.rcParams.update({
    'font.family': 'Arial'
})


%load_ext rpy2.ipython

In [None]:
adata = sc.read('../Data/adata_d16_annotated.h5ad')



In [None]:
with plt.rc_context({ "figure.dpi": 300, "figure.figsize": (4.5,4) }):

    sc.pl.umap(adata, size=20,color='Cell_types', frameon=False,  )


In [None]:
with plt.rc_context({"figure.dpi": 300}): 
    plt.rcParams.update({'font.size': 12})
    dp = sc.pl.dotplot(adata, ['DLX6-AS1', 'DLX1','DLX2', 'DLX6', 'OTP', 'NR2F1', 'VSX2', 'POMC', 'PRDM12','ASCL1','DLL1', 'FGF10','FOXG1'], "Cell_types",
                       show=False, cmap='Blues')
    ax = dp["mainplot_ax"]
    ax.xaxis.set_tick_params(labelsize=12)
    ax.yaxis.set_tick_params(labelsize=12)
    
    # Add a color strip next to the y-ticks
    ytick_positions = ax.get_yticks()
    cell_type_colors = dict(zip(list(adata.obs.Cell_types.cat.categories), list(adata.uns['Cell_types_colors'])))
    
    # Assuming df_flipped has the "Cell_Type" and cell_type_colors is a dictionary with colors for each cell type
    #cell_type_colors = {'cell_type_1': 'red', 'cell_type_2': 'green', 'cell_type_3': 'blue'}  # Example color map
    from matplotlib.patches import Circle
    
    for y_pos, cell_type in zip(ytick_positions, list(adata.obs.Cell_types.cat.categories)):
        color = cell_type_colors.get(cell_type, "black")  # Default to black if no match

        # Add a circle at the left of the plot (adjust -9.4 and radius as needed)
        circle = Circle((-9.2, y_pos), radius=0.3, color=color, transform=ax.transData, clip_on=False)
        ax.add_patch(circle)
    
    #ax.tick_params(axis='y', pad=11)
    #ax.tick_params(axis='y', length=3.5, width=2)

    plt.tight_layout()
    plt.savefig('figures/d16_marker_dotplot.pdf', bbox_inches='tight')
    plt.show()

# Compute cluster precentages

In [None]:
# Group by 'diff_batch' and 'Cell_types' and count the number of occurrences
counts = adata.obs.groupby(['diff_batch_2', 'Cell_types']).size().reset_index(name='counts')

# Calculate the total counts per batch
totals = adata.obs.groupby('diff_batch_2').size().reset_index(name='total_counts')

# Merge the counts with the totals
counts = counts.merge(totals, on='diff_batch_2')

# Normalize the counts
counts['normalized_counts'] = counts['counts'] / counts['total_counts']
counts['normalized_counts'] = counts['normalized_counts'].mul(100)
# Print the result
counts

In [None]:
df = counts.groupby(['Cell_types'])['normalized_counts'].agg(
    mean='mean',
    std=lambda x: x.std(ddof=0)
).reset_index()

df['mean'] = df['mean'].round(2)
df['std'] = df['std'].round(2)

df

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.lines as mlines

category_names = list(adata.obs['Cell_types'].cat.categories)


# Create a dictionary with the percentage lists for each dataset
results = {
    'batch-3': list(adata[adata.obs.diff_batch_2.isin(['batch-3'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    'batch-2': list(adata[adata.obs.diff_batch_2.isin(['batch-2'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    'batch-1': list(adata[adata.obs.diff_batch_2.isin(['batch-1'])].obs['Cell_types'].value_counts(normalize=True).mul(100).round(1).sort_index(ascending=True).values),
    
}



def survey_horizontal(results, category_names):
    category_names = category_names[::-1]
    labels = list(results.keys())
    data = np.array(list(results.values()))[:, ::-1]
    data_cum = data.cumsum(axis=1)
    category_colors = list(reversed(['#bb9c8a', '#3586bd', '#4f9e46', '#eddb7e', '#e85b3d', '#ed9892',
        '#fac384', '#a4cde0']))

    with plt.rc_context({"figure.dpi": 250}):
        fig, ax = plt.subplots(figsize=(3.5, 2.5))
        ax.set_xlim(0, np.sum(data, axis=1).max())
        ax.set_ylim(-0.5, len(labels) - 0.5)

        for i, (colname, color) in enumerate(zip(category_names, category_colors)):
            widths = data[:, i]
            starts = data_cum[:, i] - widths
            rects = ax.barh(labels, widths, left=starts, height=0.7,
                            label=colname, color=color)

        #ax.spines['top'].set_visible(False)
        #ax.spines['right'].set_visible(False)
        
        # Optional: Adjust or customize legend
        line1 = mlines.Line2D([], [], color="white", marker='o',label='Optic area progenitors', markersize=8, markerfacecolor='#4F9E46')
        line2 = mlines.Line2D([], [], color="white", marker='o',label='STMN2+ neurons', markersize=8, markerfacecolor='#4a95c8')
        line3 = mlines.Line2D([], [], color="white", marker='o',label='Telencephalic progenitors', markersize=8,  markerfacecolor='#EDDB7E')
        line4 = mlines.Line2D([], [], color="white", marker='o',label='Tuberal progenitors', markersize=8, markerfacecolor='#ED9892')
        line5 = mlines.Line2D([], [], color="white", marker='o',label='Unassigned', markersize=8, markerfacecolor='#a79098')

        # Uncomment below to add custom legend
        # ax.legend(handles=[line1, line2, line3, line4, line5], loc='lower right', fontsize='small', frameon=False)

        ax.set_xlabel('% of cells', fontsize=11)
        ax.set_ylabel('')
        ax.set_yticks(range(len(labels)))
        ax.set_yticklabels(labels, fontsize=11)
        ax.set_xticks(np.linspace(0, 100, 5))
        ax.tick_params(axis='x', labelsize=8)

    return fig, ax

survey_horizontal(results, category_names)
plt.xticks(fontsize=9)
plt.yticks(fontsize=11)
#plt.ylabel('% of cells', fontsize=11, labelpad=-1)



plt.savefig('figures/cluster_percentages_d16.pdf', bbox_inches='tight')
plt.show()

## Velocity

In [None]:
adata = sc.read('../Data/adata_d16_annotated.h5ad')

In [None]:
scv.pp.moments(adata)
scv.tl.velocity(adata)
scv.tl.velocity_graph(adata, n_jobs=1)

In [None]:
with plt.rc_context({ "figure.dpi": 300}):
    scv.pl.velocity_embedding_stream(adata, basis='umap', color='Cell_types', legend_loc='right', save='d16_velocity.png')


# Featureplots

In [None]:
for color in ['VSX2','NR2F1']:
    with plt.rc_context({ "figure.dpi": 300, "figure.figsize": (5.1,4) }):
        sc.pl.umap(adata, color=color, ncols=2, use_raw=False, cmap='jet', frameon=False, colorbar_loc=None,size=18, save=f'd16_{color}.pdf', show=False,layer='log_transformed')
        #sc.pl.umap(adata, color=color, ncols=2, use_raw=False, cmap='jet', frameon=False, colorbar_loc=None,size=18)
    

# DGE list

In [None]:
adata = sc.read('../Data/adata_d16_annotated.h5ad')


In [None]:
%%R -i adata

Csparse_validate = "CsparseMatrix_validate"
library(Seurat)
library(edgeR)

seur <- as.Seurat(adata, counts = "counts", data = NULL)

seur <- RenameAssays(seur, originalexp="RNA")

y <- Seurat2PB(seur, sample = "diff_batch_2", cluster = "Cell_types")
keep.samples <- y$samples$lib.size > 5e4
y <- y[, keep.samples]
keep.genes <- filterByExpr(y, group=y$samples$cluster)
y <- y[keep.genes, , keep=FALSE]
y <- normLibSizes(y)

cluster <- as.factor(y$samples$cluster)

batch <- factor(y$samples$sample)
design <- model.matrix(~ cluster + batch)
colnames(design) <- gsub("batch", "", colnames(design))
colnames(design)[1] <- "Int"

y <- estimateDisp(y, design, robust=TRUE)
fit <- glmQLFit(y, design, robust=TRUE)

ncls <- nlevels(cluster)
contr <- rbind( matrix(1/(1-ncls), ncls, ncls), matrix(0, ncol(design)-ncls, ncls) )
diag(contr) <- 1
contr[1,] <- 0
rownames(contr) <- colnames(design)
colnames(contr) <- paste0("cluster", levels(cluster))

qlf <- list()
for(i in 1:ncls){
 qlf[[i]] <- glmQLFTest(fit, contrast=contr[,i])
 qlf[[i]]$comparison <- paste0("cluster", levels(cluster)[i], "_vs_others")
}

top <- 500
topMarkers <- list()

de_df = data.frame(matrix( 
  vector(), 0, 7, dimnames=list(c(), c("gene","logFC","logCPM","F","PValue","FDR",'comparison'))), 
                stringsAsFactors=F)

for(i in 1:ncls) {
    ord <- order(qlf[[i]]$table$PValue, decreasing=FALSE)
    up <- qlf[[i]]$table$logFC[ord] > 0
    topMarkers[[i]] <- rownames(y)[ord[up][1:top]]
    df = as.data.frame(topTags(qlf[[i]], n='all'))
    df =df[rownames(df) %in% rownames(y)[ord[up][1:top]], ]  
    df$comparison <- head(qlf[[i]])$comparison
    de_df = rbind(de_df, df)
}
print(dim(de_df))

write.csv(de_df, "DE_lists/d16_arc_de_list.csv")

In [None]:
de_genes = pd.read_csv("DE_lists/d16_arc_de_list.csv",index_col=0)
de_genes['cluster'] = de_genes['comparison'].str.extract(r'cluster(.*?)_vs_others')
de_genes = de_genes[['gene','logFC','logCPM','F','PValue','FDR','comparison','cluster']]
de_genes = de_genes[(de_genes.FDR < 0.05) & (de_genes.logFC > 1)]
de_genes.to_excel("DE_lists/d16_arc_de_list.xlsx")  
de_genes

In [None]:
de_dict = {}
for cluster in list(de_genes.cluster.unique()):
    de_dict[str(cluster)] = list(de_genes.groupby('cluster').get_group(cluster).gene)[0:8]
#de_dict
de_genes['cluster'].value_counts()

In [None]:
with plt.rc_context({ "figure.dpi": 600, "figure.figsize": (100,10)}):
    sc.pl.heatmap(adata,de_dict, groupby='Cell_types', show_gene_labels=True, layer='log_transformed', 
                        cmap='jet')