In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', 500)
import warnings
warnings.filterwarnings("ignore")
from sklearn.neighbors import KNeighborsClassifier
import scanpy.external as sce

from rpy2.robjects import pandas2ri
from rpy2.robjects import r
import rpy2.rinterface_lib.callbacks
import anndata2ri
import rpy2.robjects.numpy2ri
#import numpy2ri
import anndata
pandas2ri.activate()
anndata2ri.activate()
rpy2.robjects.numpy2ri.activate()

plt.rcParams.update({
    'font.family': 'Arial'
})


%load_ext rpy2.ipython

In [None]:
adata = sc.read('../Data/SC/processed_bmp_timing_exp.h5ad')
adata = adata[adata.obs.day == 'd16']

In [None]:
%%R -i adata -o adata_integrated -o umap_emb -o pca_emb -o hvg_list

Csparse_validate = "CsparseMatrix_validate"
library(Seurat)
library(dplyr)

sobj <- as.Seurat(adata, counts = "counts", data = NULL)

sobj_list <- SplitObject(sobj, split.by = 'cell_line')

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 3000)
})

features <- SelectIntegrationFeatures(object.list = sobj_list,nfeatures = 3000)

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
  })

anchors <- FindIntegrationAnchors(object.list = sobj_list, anchor.features = features, reduction = "rpca")
sobj <- IntegrateData(anchorset = anchors, k.weight=70)
DefaultAssay(sobj) <- "integrated"
sobj <- ScaleData(sobj) %>% RunPCA(.)
sobj <- RunUMAP(sobj, dims = 1:50)
sobj <- FindNeighbors(sobj, dims = 1:50)


sobj <- FindClusters(sobj,resolution=1.2)
sobj <- FindClusters(sobj,resolution=1.3)
sobj <- FindClusters(sobj,resolution=1.6)

pca_emb = Embeddings(object = sobj, reduction = "pca")
hvg_list = rownames(sobj)

DefaultAssay(sobj) <- "originalexp"

umap_emb = Embeddings(object = sobj, reduction = "umap")
adata_integrated = as.SingleCellExperiment(sobj)  

saveRDS(sobj,'../Data/SC/d16_parse_seurat.rds')

In [None]:
adata.obsm['X_umap'] = umap_emb
adata.obsm['X_pca'] = pca_emb
adata.var['highly_variable'] = adata.var.index.isin(hvg_list)


adata.obs['integrated_snn_res.1.2'] = adata_integrated.obs['integrated_snn_res.1.2']
adata.obs['integrated_snn_res.1.3'] = adata_integrated.obs['integrated_snn_res.1.3']
adata.obs['integrated_snn_res.1.6'] = adata_integrated.obs['integrated_snn_res.1.6']


In [None]:
adata.write('../Data/SC/parse_integrated_early_stage.h5ad')

In [None]:
adata = sc.read('../Data/SC/parse_integrated_early_stage.h5ad')

In [None]:
#with plt.rc_context({ "figure.dpi": 250, "figure.figsize": (4,4)}):
with plt.rc_context({ "figure.dpi": 250}):
    sc.pl.umap(adata, color=['bmp_treatment','cell_line'],
    use_raw=False, size=10,frameon=False,  ncols=2,cmap='jet', colorbar_loc=None,wspace=0.1)

    sc.pl.umap(adata, color=["integrated_snn_res.1.3","integrated_snn_res.1.6"],legend_loc='on data',
    use_raw=False, size=10,frameon=False,  ncols=2,cmap='jet', colorbar_loc=None,wspace=0.1)


In [None]:
adata.obs['Cell_types'] = 'Unassigned'

ant_prog = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['3', '0','8','2','26','19','10', '15'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[ant_prog] = "Anterior tuberal progenitors"

tel_prog = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['1','7','30'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[tel_prog] = "Telencephalic progenitors"

post_prog = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['4','5', '9','20','21','27','23'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[post_prog] = "Posterior tuberal progenitors"

ant_pre = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['11'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[ant_pre] = "Anterior tuberal precursors"

tel_pre = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['16'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[tel_pre] = "Telencephalic precursors"

post_pre = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['23'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[post_pre] = "Posterior tuberal precursors"

dev_eye = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['17','18'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[dev_eye] = "Optic area progenitors"

prdm12_nptx2 = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['12','25','24'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[prdm12_nptx2] = "PRDM12+/NPTX2+ neurons"

pomc_prdm12 = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['6'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[pomc_prdm12] = "POMC+ neurons"


nr5a1_sox14_nptx2 = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['14'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[nr5a1_sox14_nptx2] = "NR5A1+/SOX14+ neurons"

otp_sst = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['22','29'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[otp_sst] = "OTP+ neurons"

otp_sst = pd.Series(list(adata[adata.obs["integrated_snn_res.1.6"].isin(['21'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[otp_sst] = "OTP+ neurons"

tele_neurons = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['13'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[tele_neurons] = "Telencephalic neurons"

lhx1 = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['31'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[lhx1] = "LHX1+/ARX+ neurons"

vm = pd.Series(list(adata[adata.obs["integrated_snn_res.1.3"].isin(['28'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[vm] = "Ventral midbrain progenitors"


In [None]:
#palette_cell_types = ['#bb9c8a','#faaa4e','#d0a9b7','#8B67AD','#e85b3d','#4f9e46','#85c668','#929bfc','#3586bd','#ed9892','#fa8016','#71B09B','#eddb7e','#a4cde0','#b15a27']
palette_cell_types = ['#bb9c8a','#85c668','#8ebfaf','#b15a27','#3586bd','#4f9e46','#eddb7e','#929bfc','#e85b3d','#ed9892','#d0a9b7','#fa8016','#fac384','#a4cde0','#8B67AD']


with plt.rc_context({ "figure.dpi": 300}):
    sc.pl.umap(adata, color=['Cell_types'],
    use_raw=False, size=15,palette=palette_cell_types,frameon=False,  ncols=3,cmap='jet', colorbar_loc=None, save='_d16_parse_annotations.pdf')
    


In [None]:
adata.write('../Data/SC/parse_annotated_early_stage.h5ad')

In [None]:
adata = adata[adata.obs.sample(frac=1, random_state=42).index]

In [None]:
#palette_cell_types = ['#bb9c8a','#faaa4e','#d0a9b7','#8B67AD','#e85b3d','#4f9e46','#85c668','#8690fc','#3b89bf','#ed9892','#F7831F','#71B09B','#eddb7e','#a4cde0','#b15a27']
palette=['#8B67AD','#32A02D','#B2DF8A','#2078B4','#A6CEE3']

with plt.rc_context({ "figure.dpi": 300}):
    sc.pl.umap(adata, color=['bmp_treatment'],palette = palette,
    use_raw=False, size=15,frameon=False,  ncols=3,cmap='jet', colorbar_loc=None, save='_d16_bmp_timing.pdf')
    


In [None]:
adata.obs['cell_line'] = adata.obs['cell_line'].cat.reorder_categories( ['KOLF','RC17','Bio-N'])
palette = ['#CDBD6F','#9CA99F','#B58379']

with plt.rc_context({ "figure.dpi": 300}):
    sc.pl.umap(adata, color=['cell_line'],palette = palette,
    use_raw=False, size=15,frameon=False,  ncols=3,cmap='jet', colorbar_loc=None, save='_d16_cell_line.pdf')

In [None]:
adata = sc.read('../Data/SC/parse_annotated_early_stage.h5ad')