# Embedding

In [None]:
import scanpy as sc
from scipy.stats import pearsonr
from copy import deepcopy
import numpy as np
sc.set_figure_params(facecolor="white", figsize=(8, 8))
sc.set_figure_params(scanpy=True, dpi_save=400,dpi=150)
import matplotlib.pyplot as plt
import matplotlib as mpl
plt.rcParams["figure.figsize"] = [8, 8]
plt.rcParams["figure.autolayout"] = True
plt.rcParams["font.family"] = "Arial"
plt.rcParams['pdf.fonttype'] = 42
SUPER_MAGMA = mpl.colors.LinearSegmentedColormap.from_list('super_magma', colors=['#e0e0e0', '#dedede', '#fff68f', '#ffec8b', '#ffc125', '#ee7600', '#ee5c42', '#cd3278', '#c71585', '#68228b'], N=500)
mpl.rc('image', cmap = SUPER_MAGMA)

adata = sc.read_h5ad('END_OF_ANALYSIS.h5ad')


adata = adata[adata.obs['Type'].isin(['AFE','PFE'])]
sc.pp.highly_variable_genes(adata, n_top_genes=1000)
adata.var['highly_variable'] *= np.any(adata.var[['mt', 'ribo']],axis=1) == False
cell_cycle_genes = [x.strip() for x in open('./regev_lab_cell_cycle_genes.txt')]
adata.var['highly_variable'] *= np.in1d(adata.var.index,cell_cycle_genes) == False
adata.layers['pre_combat'] = deepcopy(adata.X)
sc.pp.combat(adata,'phase',['Experiment'])
sc.pp.pca(adata, use_highly_variable=True,n_comps=150)
sc.external.pp.harmony_integrate(adata,'Experiment')


In [None]:
sc.pp.neighbors(adata,use_rep='X_pca_harmony',method='gauss',n_neighbors=10)
sc.tl.diffmap(adata,n_comps=15)
rp = []
for i in range(adata.obsm['X_diffmap'].shape[1]):
    adata.obs[f'diffmap{i}'] = adata.obsm['X_diffmap'][:,i]
    rp += [pearsonr(adata.obs['log1p_total_counts'],adata.obs[f'diffmap{i}'])]
r,p = list(zip(*rp))
not_count_effects = (np.array(p)<0.01)*(np.abs(np.array(r)) > 0.5) == False
print(f'Count Effect Dimentions: {not_count_effects==False}')
adata.obsm['X_diffmapWithCountEffects'] = adata.obsm['X_diffmap']
adata.obsm['X_diffmap'] = adata.obsm['X_diffmapWithCountEffects'][:,not_count_effects]
adata.uns['diffmap_evalsWithCountEffects'] = adata.uns['diffmap_evals']
adata.uns['diffmap_evals'] = adata.uns['diffmap_evalsWithCountEffects'][not_count_effects]
print(f"Remaining diffmap components: {adata.obsm['X_diffmap'].shape[1]}")
sc.pl.diffmap(adata, color=['Type','Experiment','log1p_total_counts','doublet_score','pct_counts_mt','pct_counts_ribo','phase'],save='Covariates',show=False)
sc.pp.neighbors(adata, use_rep='X_diffmap',method='gauss',n_neighbors=10,)

leids = [0.5,1,]
for i in leids:
    leid = f'leiden_res{i}'
    sc.tl.leiden(adata,resolution=i,key_added=leid)
    name = adata.obs[[leid,'Type']].value_counts().unstack(level=1).idxmax(axis=1)
    adata.obs[leid] = adata.obs[leid].apply(lambda x: name.loc[x]+'-'+x).astype(str)
    namemapper = {}
    donesofar = {}
    cats = adata.obs[leid].value_counts().index
    if len(cats) > 1:
        for i in cats:
            if not i[:i.find('-')] in donesofar:
                donesofar[i[:i.find('-')]] = 0
            donesofar[i[:i.find('-')]] += 1
            namemapper[i] = 's'+i[:i.find('-')]+'-'+str(donesofar[i[:i.find('-')]])
        adata.obs[leid] = adata.obs[leid].apply(lambda x: namemapper[x]).astype(str).astype('category')
        adata.obs[leid] = adata.obs[leid].cat.reorder_categories([f's{t}-{n}' for t in adata.obs['Type'].cat.categories for n in range(len(adata.obs[leid].cat.categories)) if f's{t}-{n}' in adata.obs[leid].cat.categories], ordered=True)
    else:
        adata.obs[leid] = adata.obs[leid].apply(lambda x: 's'+x[:x.find('-')+1]+'1').astype(str)
        adata.obs[leid] = adata.obs[leid].astype('category')
    type2color = {
        'ESC':np.array((1,0,0)),
        'DE':np.array((1,1,0)),
        'AFE':np.array((0,0,1)),
        'PFE':np.array((1,0,1)),
    }
    cmin = 0.3
    adata.uns[f'{leid}_colors'] = [mpl.colors.to_hex(type2color[i[1:i.find('-')]]*(int(i[i.find('-')+1:])/adata.obs[leid].cat.categories.str.startswith(i[:i.find('-')]).sum()*(1-cmin)+cmin)) for i in adata.obs[leid].cat.categories]
leids = [f'leiden_res{leid}' for leid in leids]





adata.uns['iroot'] = adata.obsm['X_diffmap'][:,1].argmax()
adata.obs['root'] = 0
adata.obs['root'].iloc[adata.uns['iroot']] = 1
sc.external.tl.wishbone(adata, start_cell=adata.obs_names[adata.obs['root']==1][0], components=[0,1], branch=False) # https://www.sciencedirect.com/science/article/pii/S0092867414004711

adata.obs['neighbors'] = np.array(adata.obsp['connectivities'].sum(axis=0))[0]
adata.obs['trajectory_wishbone'] = adata.obs['trajectory_wishbone'].astype(np.float32)

for i in adata.obs.columns:
    if adata.obs[i].dtype == np.dtype('float32') or adata.obs[i].dtype == np.dtype('int'):
        continue
    adata.obs[i] = adata.obs[i].astype(str)
adata.write_h5ad('AFE.PFE.diffmap.h5ad')

# Plotting

In [1]:
import scanpy as sc
import pandas as pd
import seaborn as sns
from scipy.stats import pearsonr
import numpy as np
import random
import os
from scipy.stats import norm
sc.set_figure_params(facecolor="white", figsize=(8, 8))
sc.set_figure_params(scanpy=True, dpi_save=400,dpi=150)
import matplotlib.pyplot as plt
import matplotlib as mpl
plt.rcParams["figure.figsize"] = [8, 8]
plt.rcParams["figure.autolayout"] = True
plt.rcParams["font.family"] = "Arial"
plt.rcParams['pdf.fonttype'] = 42
SUPER_MAGMA = mpl.colors.LinearSegmentedColormap.from_list('super_magma', colors=['#e0e0e0', '#dedede', '#fff68f', '#ffec8b', '#ffc125', '#ee7600', '#ee5c42', '#cd3278', '#c71585', '#68228b'], N=500)
mpl.rc('image', cmap = SUPER_MAGMA)

adata = sc.read_h5ad('AFE.PFE.diffmap.h5ad')
os.makedirs('figures',exist_ok=True)

In [2]:
gene_names = sorted(list(set(['VGLL2','EYA1','IRX1','IRX2','IRX3','ISL1','NKX2-3','OTX2','PRRX2','RIPPLY3', 'SIX1','SOX2','FOXA2','TBX1','PAX1','PAX9', 'SOX2','FOXA2','TBX1','PAX1','PAX9','NKX2-5','FOXE1','PBX1','NKX2-6','VGLL2','EYA1','IRX1','IRX2','IRX3','ISL1','NKX2-3','OTX2','PRRX2','RIPPLY3','SIX1'] )))

newindex = adata.obs['trajectory_wishbone'].sort_values().index
kernal = np.ones([100])
kernal /= kernal.sum()
mode = 'valid'




def plotboth(df,nm,figheight=0.5,cmap=SUPER_MAGMA):
    fig, ax = plt.subplots()
    sns.heatmap(df,xticklabels=False,vmin=0,vmax=1,cmap=cmap,ax=ax, cbar=False, yticklabels=False)
    fig.set_figwidth(5)
    fig.set_figheight(figheight)
    fig.savefig(f'figures/PseudotimeHeatmap.{nm}.COLORBOX.pdf')
    plt.clf()
    fig, ax = plt.subplots()
    sns.heatmap(df,xticklabels=False,vmin=0,vmax=1,cmap=cmap,ax=ax)
    fig.savefig(f'figures/PseudotimeHeatmap.{nm}.pdf')
    plt.clf()

genedf = {}
for i in gene_names:
    signal = np.array(adata[newindex,i].layers['pre_combat'].toarray())[:,0]
    genedf[i] = np.convolve(signal,kernal,mode=mode)
genedf = pd.DataFrame(genedf)
natural_order = genedf.idxmax().sort_values().index
for i in gene_names:
    genedf[i] = genedf[i] - genedf[i].min()
    genedf[i] = genedf[i]/genedf[i].max()
genedf = genedf[natural_order].T
plotboth(genedf,'Genes',figheight=10, cmap='viridis')



pseudotime = {}
for c in ['trajectory_wishbone']:
    signal = adata.obs.loc[newindex,c]
    pseudotime[c] = np.convolve(signal,kernal,mode=mode)
pseudotime = pd.DataFrame(pseudotime).T
plotboth(pseudotime,'trajectory_wishbone',cmap='cividis')


# leidendf = {}
# for c in adata.obs['leiden'].cat.categories:
#     signal = (adata.obs.loc[newindex,'leiden'] == c).astype(float)
#     leidendf[c] = np.convolve(signal,kernal,mode=mode)
# leidendf = pd.DataFrame(leidendf)
# natural_order = leidendf.idxmax().sort_values().index
# leidendf = leidendf[natural_order].T
# plotboth(leidendf,'LeidenProbs',figheight=5)

# majority_vote = {}
# boxcolor = dict(zip(adata.obs['leiden'].cat.categories,adata.uns['leiden_colors']))
# for c in adata.obs['leiden'].cat.categories:
#     signal = (adata.obs.loc[newindex,'leiden'] == c).astype(float)
#     majority_vote[c] = np.convolve(signal,kernal,mode=mode)
# majority_vote = pd.DataFrame(majority_vote).idxmax(axis=1).apply(boxcolor.get)

# cmap_dict = dict(enumerate(boxcolor.values()))
# get_num = dict(zip(cmap_dict.values(),cmap_dict.keys()))
# majority_vote = majority_vote.apply(get_num.get)
# cmap = mpl.colors.ListedColormap([cmap_dict[i] for i in range(6)])

# fig, ax = plt.subplots()
# sns.heatmap(pd.DataFrame({'leiden':majority_vote}).T, cmap=cmap, vmin=-0.5, vmax=len(cmap_dict)-0.5, yticklabels=False, xticklabels=False, cbar=False, ax=ax)
# fig.set_figwidth(5)
# fig.set_figheight(0.5)
# fig.savefig(f'figures/PseudotimeHeatmap.leidenVote.COLORBOX.pdf')
# plt.clf()

# fig, ax = plt.subplots()
# sns.heatmap(pd.DataFrame({'leiden':majority_vote}).T, cmap=cmap, vmin=-0.5, vmax=len(cmap_dict)-0.5, xticklabels=False, ax=ax)
# fig.savefig(f'figures/PseudotimeHeatmap.leidenVote.pdf')
# plt.clf()



<Figure size 750x1500 with 0 Axes>

<Figure size 1200x1200 with 0 Axes>

<Figure size 750x75 with 0 Axes>

<Figure size 1200x1200 with 0 Axes>

In [3]:
linecolor = dict(zip(adata.obs['leiden'].cat.categories,adata.uns['leiden_colors']))

leidendf = {}
for c in adata.obs['leiden'].cat.categories:
    signal = (adata.obs.loc[newindex,'leiden'] == c).astype(float)
    leidendf[c] = np.convolve(signal,kernal,mode=mode)
leidendf = pd.DataFrame(leidendf)
natural_order = leidendf.idxmax().sort_values().index[::-1]
leidendf = leidendf[natural_order].T


random.seed(0)
smpkey = {j:i for i,j in enumerate(newindex)}
dfs = []
for _ in range(1000):
    smp = random.choices(list(newindex),k=len(newindex))
    smp.sort(key=smpkey.get)
    leidendfSAMP = {}
    for c in natural_order:
        signal = (adata.obs.loc[smp,'leiden'] == c).astype(float)
        leidendfSAMP[c] = np.convolve(signal,kernal,mode=mode)
    leidendfSAMP = pd.DataFrame(leidendfSAMP).T
    dfs.append(leidendfSAMP.values)
leidendfUP = leidendf*0
leidendfDOWN = leidendf*0
dfs = np.array(dfs)
leidendfUP += np.percentile(dfs,99,axis=0)
leidendfDOWN += np.percentile(dfs,1,axis=0)

fig, ax = plt.subplots()
for n,i in enumerate(leidendf.index):
    print(i,linecolor[i])
    ax.plot(pseudotime.iloc[0],n+leidendf.loc[i],color=linecolor[i])
    minl = n+leidendfUP.loc[i]
    maxl = n+leidendfDOWN.loc[i]
    minl[minl<n] = n
    maxl[maxl>n+1] = n+1
    ax.fill_between(pseudotime.iloc[0], minl, maxl, alpha=0.5,color=linecolor[i])
    if n != 0:
        ax.plot([0,1],[n,n],'k')
ax.set_ylim(ymin=0,ymax=n+1)
ax.set_xlim(xmin=pseudotime.iloc[0].min(),xmax=pseudotime.iloc[0].max())
ax.set_xticks([])
ax.set_yticks([])
fig.set_figwidth(5)
fig.savefig('figures/PseudotimeHeatmap.LeidenProbs.pdf')
plt.clf()

<Figure size 750x1200 with 0 Axes>

In [None]:
# leids = list(adata.obs.columns[adata.obs.columns.str.startswith('leiden_res')])
# sc.pl.diffmap(adata, color=['root','trajectory_wishbone','neighbors','Experiment','Type','leiden']+leids,save='NewLeidens',show=False)
# sc.pl.diffmap(adata, color='Type',save='Type',show=False)
# sc.pl.diffmap(adata, color='leiden',save='leiden',show=False)


In [7]:
gene_names = sorted(list(set(['VGLL2','EYA1','IRX1','IRX2','IRX3','ISL1','NKX2-3','OTX2','PRRX2','RIPPLY3', 'SIX1','SOX2','FOXA2','TBX1','PAX1','PAX9', 'SOX2','FOXA2','TBX1','PAX1','PAX9','NKX2-5','FOXE1','PBX1','NKX2-6','VGLL2','EYA1','IRX1','IRX2','IRX3','ISL1','NKX2-3','OTX2','PRRX2','RIPPLY3','SIX1', 'CER1'] )))

adata_magic = sc.external.pp.magic(adata,gene_names,t=3,random_state=0,copy=True)
adata_magic.obsm['X_umap'] = adata.obsm['X_umap']
del adata_magic.raw
adata_magic.obsm['X_diffmap'] = adata.obsm['X_diffmap']
adata_magic.obs = adata.obs
sc.pl.diffmap(adata_magic, color=gene_names,save='MAGIC.png',show=False, add_outline=True,frameon=False)
sc.pl.diffmap(adata_magic, color='CER1, EYA1, VGLL2, OTX2, PAX1, PAX9, Type, trajectory_wishbone'.split(', '),save='MAGIC.reubttle.png',show=False, add_outline=True,frameon=False)

pfe = 'HOXA3','SOX2','FOXA2','PAX1','PAX9','HOXB3','NKX2-5','DMRT2','FOXE1','PBX1','NKX2-6'
sc.tl.score_genes(adata,gene_list=pfe,score_name='pfe_score')

adata_magic = sc.external.pp.magic(sc.AnnData(adata.obs[['pfe_score']]),t=3,random_state=0,copy=True)
adata_magic.obsm['X_umap'] = adata.obsm['X_umap']
del adata_magic.raw
adata_magic.obsm['X_diffmap'] = adata.obsm['X_diffmap']
sc.pl.diffmap(adata_magic, color=['pfe_score'],save='pfe_score',show=False,cmap='bwr')

