In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', 500)

import warnings
warnings.filterwarnings("ignore")
from sklearn.neighbors import KNeighborsClassifier


from rpy2.robjects import pandas2ri
from rpy2.robjects import r
import rpy2.rinterface_lib.callbacks
import anndata2ri
import rpy2.robjects.numpy2ri
#import numpy2ri
import anndata
pandas2ri.activate()
anndata2ri.activate()
rpy2.robjects.numpy2ri.activate()

plt.rcParams.update({
    'font.family': 'Arial'
})


%load_ext rpy2.ipython

In [None]:
adata = sc.read('../Data/SC/processed_bmp_timing_exp.h5ad')
adata = adata[adata.obs.day != 'd16']
adata.obs.day.value_counts()

In [None]:
%%R -i adata -o adata_integrated -o umap_emb -o pca_emb -o hvg_list

Csparse_validate = "CsparseMatrix_validate"
library(Seurat)
library(dplyr)

sobj <- as.Seurat(adata, counts = "counts", data = NULL)

sobj_list <- SplitObject(sobj, split.by = 'cell_line')

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 3000)
})

features <- SelectIntegrationFeatures(object.list = sobj_list,nfeatures = 3000)

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
  })

anchors <- FindIntegrationAnchors(object.list = sobj_list, anchor.features = features, reduction = "rpca")
sobj <- IntegrateData(anchorset = anchors, k.weight=60)
DefaultAssay(sobj) <- "integrated"
sobj <- ScaleData(sobj) %>% RunPCA(.)
sobj <- RunUMAP(sobj, dims = 1:50)
sobj <- FindNeighbors(sobj, dims = 1:50)
sobj <- FindClusters(sobj,resolution=0.5)
sobj <- FindClusters(sobj,resolution=0.6)
sobj <- FindClusters(sobj,resolution=0.7)
sobj <- FindClusters(sobj,resolution=0.8)
sobj <- FindClusters(sobj,resolution=1.2)
sobj <- FindClusters(sobj,resolution=1.4)





pca_emb = Embeddings(object = sobj, reduction = "pca")
hvg_list = rownames(sobj)

DefaultAssay(sobj) <- "originalexp"

umap_emb = Embeddings(object = sobj, reduction = "umap")
adata_integrated = as.SingleCellExperiment(sobj)  

saveRDS(sobj,'../Data/SC/late_stage_parse_seurat.rds')

In [None]:
%%R
dim(Embeddings(object = sobj, reduction = "pca"))

In [None]:
adata.obsm['X_umap'] = umap_emb
adata.obsm['X_pca'] = pca_emb
adata.var['highly_variable'] = adata.var.index.isin(hvg_list)

adata.obs['integrated_snn_res.0.5'] = adata_integrated.obs['integrated_snn_res.0.5']
adata.obs['integrated_snn_res.0.6'] = adata_integrated.obs['integrated_snn_res.0.6']
adata.obs['integrated_snn_res.0.7'] = adata_integrated.obs['integrated_snn_res.0.7']
adata.obs['integrated_snn_res.0.8'] = adata_integrated.obs['integrated_snn_res.0.8']
adata.obs['integrated_snn_res.1.2'] = adata_integrated.obs['integrated_snn_res.1.2']
adata.obs['integrated_snn_res.1.4'] = adata_integrated.obs['integrated_snn_res.1.4']



In [None]:
adata.write('../Data/SC/parse_integrated_late_stage.h5ad')

In [None]:
adata = sc.read('../Data/SC/parse_integrated_late_stage.h5ad')

In [None]:
with plt.rc_context({ "figure.dpi": 600, }):
    #sc.pl.umap(adata, color=['sampleID'], ncols=1)
    sc.pl.umap(adata, color=['sample','bmp_treatment','cell_line'], ncols=3, use_raw=False,cmap='jet', frameon=False, colorbar_loc=None)

In [None]:
with plt.rc_context({ "figure.dpi": 250}):
    sc.pl.umap(adata, color=['integrated_snn_res.0.5','integrated_snn_res.0.6', 'integrated_snn_res.0.7','integrated_snn_res.0.8', 'integrated_snn_res.1.2', 'integrated_snn_res.1.4'],legend_loc='on data',
    use_raw=False, size=10,frameon=False,  ncols=3,cmap='jet', colorbar_loc=None,wspace=-0.05)

In [None]:
adata10x = sc.read('Data/adata_d50_d70.h5ad')
adata10x.obs['batch'] = adata10x.obs['reactionID'].copy()
adata10x.obs['bmp_treatment'] = '-'
adata10x.obs['cell_line'] = '-'
adata10x.obs['source'] = 'ZA'

adata.layers['log_transformed'] = adata.X.copy()
adata.obs['batch'] = adata.obs['cell_line'].copy()
adata.obs['Cell_types_2'] = '-'
adata.obs['source'] = 'AM'

adata.obs['clusters'] = adata.obs["integrated_snn_res.0.7"]
adata.obs['clusters2'] = adata.obs["integrated_snn_res.1.2"]

adata_concat = adata.concatenate(adata10x, batch_key=None, join='outer')
adata_concat.obs = adata_concat.obs[['bmp_treatment','cell_line','batch','Cell_types','Cell_types_2','day', 'source','clusters']]
del adata_concat.var

In [None]:
%%R -i adata_concat -o umap_emb -o pca_emb

Csparse_validate = "CsparseMatrix_validate"
library(Seurat)
library(dplyr)

sobj <- as.Seurat(adata_concat, counts = "counts", data = NULL)

sobj_list <- SplitObject(sobj, split.by = 'batch')

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 3000)
})

features <- SelectIntegrationFeatures(object.list = sobj_list,nfeatures = 3000)

sobj_list <- lapply(X = sobj_list, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
  })

anchors <- FindIntegrationAnchors(object.list = sobj_list, anchor.features = features, reduction = "rpca")
sobj <- IntegrateData(anchorset = anchors, k.weight=60)
DefaultAssay(sobj) <- "integrated"
sobj <- ScaleData(sobj) %>% RunPCA(.)
sobj <- RunUMAP(sobj, dims = 1:50)

pca_emb = Embeddings(object = sobj, reduction = "pca")
hvg_list = rownames(sobj)

DefaultAssay(sobj) <- "originalexp"

umap_emb = Embeddings(object = sobj, reduction = "umap")
#adata_integrated = as.SingleCellExperiment(sobj)  

saveRDS(sobj,'../Data/SC/late_stage_parse_10x_integration.rds')

In [None]:
#adata_integrated.obsm['X_pca'] = pca_emb
#adata_integrated.obsm['X_umap'] = umap_emb
from sklearn.metrics.pairwise import pairwise_distances

adata_concat.obsm['X_umap'] = umap_emb
adata_concat.obsm['X_pca'] = pca_emb


ref_idx = adata_concat.obs["source"] == "ZA"
train_X = adata_concat[ref_idx].obsm["X_pca"]
query_X = adata_concat[adata_concat.obs["source"] == "AM"].obsm["X_pca"]

train_Y = adata_concat[ref_idx].obs['Cell_types_2'].to_numpy()
knn = KNeighborsClassifier(n_neighbors=100)
knn.fit(train_X, train_Y)

# Predict probabilities for fetal cells
knn_probs = knn.predict_proba(query_X)
knn_pred = knn.classes_[np.argmax(knn_probs, axis=1)]
max_probs = np.max(knn_probs, axis=1)

# Distances between query cells and reference cells
distances = pairwise_distances(query_X, train_X, metric='euclidean')
min_distances = distances.min(axis=1)
distance_threshold = np.percentile(distances[:, :train_X.shape[0]].min(axis=1), 70)


adata_concat.obs.loc[adata_concat.obs["source"] == "AM", "predicted_cell_types"] = np.where((max_probs >= 0.5) 
                                                    & (min_distances <= distance_threshold), knn_pred, "Unassigned")

adata.obs.loc[adata.obs["source"] == "AM", "predicted_cell_types"] = np.where((max_probs >= 0.5) 
                                                    & (min_distances <= distance_threshold), knn_pred, "Unassigned")


In [None]:
with plt.rc_context({ "figure.dpi": 250}):
    sc.pl.umap(adata_concat, color=['source'], use_raw=False, size=10,frameon=False,  ncols=3,cmap='jet', colorbar_loc=None)

In [None]:

fig, axes = plt.subplots(1, 3, figsize= (20,4))

#axes = axes.flatten()
with plt.rc_context({"figure.dpi": 400}):
    #sc.pl.umap(adata_concat, color=['source'],
               #frameon=False,ax=axes[0], size=15, show=False)
        
    sc.pl.umap(adata_concat[adata_concat.obs.source == 'AM'], color=['clusters'],
               frameon=False,ax=axes[0], size=15, title='AM', show=False)
    
    sc.pl.umap(adata_concat[adata_concat.obs.source == 'AM'], color=['predicted_cell_types'],
               frameon=False,ax=axes[1], size=15, title='AM', show=False)
    
    sc.pl.umap(adata_concat[adata_concat.obs.source == 'ZA'], color=['Cell_types_2'],
               frameon=False,ax=axes[2], size=15, title='ZA', show=False)    
    
fig.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(1, 3, figsize= (20,4))
#axes = axes.flatten()
with plt.rc_context({"figure.dpi": 400}):
    #sc.pl.umap(adata_concat, color=['source'],
               #frameon=False,ax=axes[0], size=15, show=False)
    
    sc.pl.umap(adata, color=['predicted_cell_types'],
               frameon=False,ax=axes[0], size=15, show=False)
    
    sc.pl.umap(adata, color=['integrated_snn_res.0.7'],
               frameon=False,ax=axes[1], size=15, show=False, legend_loc='on data')    
    
    sc.pl.umap(adata, color=['integrated_snn_res.1.2'],
               frameon=False,ax=axes[2], size=15, show=False, legend_loc='on data')   
    
fig.tight_layout()
plt.show()

In [None]:
#adata_concat.write('../Data/SC/parse_10x_integration.h5ad')
adata_concat = sc.read('../Data/SC/parse_10x_integration.h5ad')

In [None]:
adata.obs['Cell_types'] = 'Unassigned'

tany = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['2','20'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[tany] = "Tanycytes"

unassigned = pd.Series(list(adata[adata.obs["integrated_snn_res.1.2"].isin(['35'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[unassigned] = "Unassigned"



optic = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['26', '27'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[optic] = "Optic area neurons"

immature = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['1','15'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[immature] = "Immature ARC neurons"

tele = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['6', '8', '5', '18', '23', '9','16'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[tele] = "Telencephalic neurons"

pomc_nr5a1 = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['0','21'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[pomc_nr5a1] = "POMC+/SOX14+/NR5A1+"


pomc_prdm12 = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['4'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[pomc_prdm12] = "POMC+/TBX3+/NR5A2+"

otp_sst = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['7'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[otp_sst] = "OTP+/SST+/BNC2+"

dlx6_foxp2 = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['11'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[dlx6_foxp2] = "DLX6-AS1+/FOXP2+"

agrp_otp = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['12'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[agrp_otp] = "AGRP+/OTP+"

astro = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['13'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[astro] = "Astrocytes"

pnoc_npffr2 = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['14'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[pnoc_npffr2] = "PNOC+/NPFFR2+"

lhx1_arx = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['17'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[lhx1_arx] = "LHX1+/ARX+"

gpr149 = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['19'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[gpr149] = "GPR149+/LHX8+"

fezf1_sox14 = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['22'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[fezf1_sox14] = "FEZF1+/SOX14+"

ghrh_pnoc = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['24'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[ghrh_pnoc] = "GHRH+/PNOC+"

nr5a2_onecut = pd.Series(list(adata[adata.obs["integrated_snn_res.0.7"].isin(['10'])].obs.index), dtype="category")
adata.obs["Cell_types"].loc[nr5a2_onecut] = "NR5A2+/ONECUT1/3+"


In [None]:
#palette_cell_types = ['#bb9c8a','#','#d0a9b7','#8B67AD','#','#4f9e46','#','#8690fc','#','#','#F7831F','#','#','#a4cde0','#']
palette_cell_types = ['#3b89bf','#bb9c8a','#ed9892','#d0a9b7','#b15a27','#8B67AD','#fa8016','#8690fc','#e85b3d','#71B09B','#a4cde0','#85c668','#a11d02','#eddb7e','#4f9e46','#faaa4e','#b281e6']



    
fig, axes = plt.subplots(1, 2, figsize= (24,8))
#axes = axes.flatten()
with plt.rc_context({"figure.dpi": 400}):

    sc.pl.umap(adata, color=['Cell_types'],
               frameon=False,ax=axes[0], size=25, show=False, legend_fontsize='x-large', palette=palette_cell_types)
    
    sc.pl.umap(adata, color=['integrated_snn_res.0.7'],
               frameon=False,ax=axes[1], size=25, show=False, legend_loc='on data', legend_fontsize='xx-large')    
    

    
fig.tight_layout()
plt.show()

POMC = '#eddb7e'
OTP/AGRP = '#3586bd'
Tele = '#d0a9b7'
Tany = '#faaa4e'

In [None]:
#palette_cell_types = ['#bb9c8a','#','#d0a9b7','#8B67AD','#','#4f9e46','#','#8690fc','#','#','#F7831F','#','#','#a4cde0','#']
palette_cell_types = ['#3b89bf','#8B67AD','#bb9c8a','#fa8016','#b15a27','#71B09B','#e85b3d','#8690fc','#f4a989','#89969c','#4f9e46','#85c668','#a11d02','#eddb7e','#faaa4e','#d0a9b7','#a4cde0']

#ed9892
#axes = axes.flatten()
with plt.rc_context({"figure.dpi": 400}):

    sc.pl.umap(adata, color=['Cell_types'],
               frameon=False, size=15, show=False,  palette=palette_cell_types, save='d50_parse_annotations.pdf')
    

    
#fig.tight_layout()
#plt.show()


In [None]:
palette=['#32A02D','#B2DF8A','#2078B4','#A6CEE3']

with plt.rc_context({ "figure.dpi": 300}):
    sc.pl.umap(adata, color=['bmp_treatment'],palette=palette,
    use_raw=False, size=15,frameon=False,  ncols=3,cmap='jet', colorbar_loc=None, na_in_legend=False, save='d50_bmp_treatment.pdf')

In [None]:
adata.obs['cell_line'] = adata.obs['cell_line'].cat.reorder_categories( ['KOLF','RC17','Bio-N'])
palette = ['#CDBD6F','#9CA99F','#B58379']

with plt.rc_context({ "figure.dpi": 300}):
    sc.pl.umap(adata, color=['cell_line'],palette = palette,
    use_raw=False, size=15,frameon=False,  ncols=3,cmap='jet', colorbar_loc=None, save='_d50_cell_line.pdf')

In [None]:
adata.write('../Data/SC/parse_annotated_late_stage.h5ad')

In [None]:
adata = sc.read('../Data/SC/parse_annotated_late_stage.h5ad')