In [1]:
import os
os.chdir(path='../')

import numpy as np
import scanpy as sc
import pandas as pd
import STForte.helper as stfhelper
import plotly.express as px
trial_name = "trial-mouse_brain_coronal_Xenium"

import plotly.io as pio   
pio.kaleido.scope.mathjax = None

Global seed set to 0


In [2]:
adata = sc.read_h5ad(f"./{trial_name}/outputs/stforte.h5ad")
adata

AnnData object with n_obs × n_vars = 130557 × 248
    obs: 'cell_id', 'x_centroid', 'y_centroid', 'transcript_counts', 'control_probe_counts', 'control_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area', 'n_counts', 'STForte_Mask', 'leiden', 'label_curated'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells'
    uns: 'leiden', 'log1p', 'neighbors', 'umap'
    obsm: 'STForte_ATTR', 'STForte_COMB', 'STForte_TOPO', 'X_umap', 'spatial'
    layers: 'count'
    obsp: 'connectivities', 'distances'

In [3]:
import plotly.express as px
from bokeh.themes import Theme
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.layouts import column, row
from bokeh.plotting import curdoc, figure, show
from bokeh.transform import factor_cmap
from bokeh.io import output_notebook, export_svg, export_png
output_notebook()
plot_dir = f"./{trial_name}/plots"
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)

In [4]:
color_curated = dict(zip(["1", "2", "4", "5", "8", "13", "14", "15", "17", "28", "30",
 "9", "10", "12", "20", "21", "0", "11", "16", "23", "26",
 "3", "7", "22", "24", "25", "6", "18", "19", "27", "29"
 ], 
stfhelper.pl.rgb2hex(px.colors.qualitative.Bold) + px.colors.qualitative.Plotly +\
    stfhelper.pl.rgb2hex(px.colors.qualitative.Vivid)[:10]))

In [5]:
leiden_hippocampal = ["11", "23", "26", "16",]
cat_hippocampal = ['Hipp. Stratum', 'CA1sp', 'CA2sp/CA3sp', 'Dg-sg',]
adata_hippocampal = adata[adata.obs['leiden'].apply(lambda x: True if x in leiden_hippocampal else False), :]
label_hippocampal = adata_hippocampal.obs['leiden'].cat.rename_categories(dict(zip(leiden_hippocampal, cat_hippocampal)))
adata_hippocampal.obs['label_hippocampal'] = pd.Categorical(label_hippocampal, categories=cat_hippocampal)


Trying to modify attribute `.obs` of view, initializing view as actual.



In [6]:
ind_corr = (adata.obsm['spatial'][:, 1] > 1800) & (adata.obsm['spatial'][:, 1] < 3200) &\
  (adata.obsm['spatial'][:, 0] > 2400) & (adata.obsm['spatial'][:, 0] < 7400)

adata_new = adata[ind_corr, :]

name_dict = dict(zip(leiden_hippocampal, cat_hippocampal))
cluster = adata_new.obs["leiden"].apply(lambda x: f"{name_dict[x]} ({x})" if x in leiden_hippocampal else "others")
# adata_new.obs["leiden_hippocampal"] = []
source = ColumnDataSource(dict(x=adata_new.obsm['spatial'][:, 0], y=-adata_new.obsm['spatial'][:, 1],
                                  rad=np.sqrt(adata_new.obs['cell_area'] / np.pi)*1.2,
                                  label=cluster))
p = figure(width=400, height=240, toolbar_location=None, match_aspect=True)
p.circle(
    'x', 'y', source=source,
    fill_color=factor_cmap('label', palette=[color_curated[x] for x in leiden_hippocampal] + ["#bcbcbc"], 
                           factors=[f"{name_dict[x]} ({x})" for x in leiden_hippocampal] + ["others"]),
    radius='rad', line_color=None,
    legend_group='label',
)
p.outline_line_width = 0
p.axis.visible = False
p.grid.visible = False
p.title.text_font = "Arial"
p.legend.border_line_width = 2.
p.legend.border_line_color = "#030303"
p.legend.label_text_font = "Arial"
p.legend.label_text_font_size = '18px'
p.legend.glyph_height = 25
p.legend.glyph_width = 25
p.legend.label_height = 25
p.legend.nrows = 3
p.legend.location = (10, 10)
p.add_layout(p.legend[0], 'below')
p.match_aspect = True
show(p)

In [7]:
export_png(p, filename=f"{trial_name}/plots/show_hippocampal.png")
p.output_backend = "svg"
export_svg(p, filename=f"{trial_name}/plots/show_hippocampal.svg")

['trial-mouse_brain_coronal_Xenium_raw/plots/show_hippocampal.svg']

In [8]:
ind_corr = (adata.obsm['spatial'][:, 1] > 1800) & (adata.obsm['spatial'][:, 1] < 3200) &\
  (adata.obsm['spatial'][:, 0] > 2400) & (adata.obsm['spatial'][:, 0] < 7400)

adata_new = adata[ind_corr, :]

name_dict = dict(zip(leiden_hippocampal, cat_hippocampal))
cluster = adata_new.obs["leiden"].apply(lambda x: f"{name_dict[x]} ({x})" if x in leiden_hippocampal else "others")
# adata_new.obs["leiden_hippocampal"] = []
source = ColumnDataSource(dict(x=adata_new.obsm['spatial'][:, 0], y=-adata_new.obsm['spatial'][:, 1],
                                  rad=np.sqrt(adata_new.obs['cell_area'] / np.pi)*1.2,
                                  label=cluster))
p = figure(width=400, height=200, toolbar_location=None, match_aspect=True)
p.circle(
    'x', 'y', source=source,
    fill_color=factor_cmap('label', palette=[color_curated[x] for x in leiden_hippocampal] + ["#bcbcbc"], 
                           factors=[f"{name_dict[x]} ({x})" for x in leiden_hippocampal] + ["others"]),
    radius='rad', line_color=None,
    # legend_group='label',
)
p.outline_line_width = 0
p.axis.visible = False
p.grid.visible = False
p.title.text_font = "Arial"
p.match_aspect = True
show(p)

In [9]:
export_png(p, filename=f"{trial_name}/plots/show_hippocampal_pure.png")
p.output_backend = "svg"
export_svg(p, filename=f"{trial_name}/plots/show_hippocampal_pure.svg")

['trial-mouse_brain_coronal_Xenium_raw/plots/show_hippocampal_pure.svg']

In [10]:
from bokeh.transform import linear_cmap
from bokeh.models import ColorBar

genes = ["Prox1", "Neurod6", "Wfs1", "Cpne4"]

ind_corr = (adata.obsm['spatial'][:, 1] > 1800) & (adata.obsm['spatial'][:, 1] < 3200) &\
  (adata.obsm['spatial'][:, 0] > 2400) & (adata.obsm['spatial'][:, 0] < 7400)

adata_new = adata[ind_corr, :]

name_dict = dict(zip(leiden_hippocampal, cat_hippocampal))
source, p = dict(), dict()
for ii, gg in enumerate(genes):
  gene_expr = adata_new[:, gg].X.A.squeeze()
  source[gg] = ColumnDataSource(dict(x=adata_new.obsm['spatial'][:, 0], y=-adata_new.obsm['spatial'][:, 1],
                                    rad=np.sqrt(adata_new.obs['cell_area'] / np.pi)*1.2,
                                    value=gene_expr))
  color_mapper = linear_cmap(field_name='value', palette=px.colors.sequential.Peach, 
                             low=min(gene_expr), high=max(gene_expr))

  p[gg] = figure(width=400, height=160, toolbar_location=None, match_aspect=True)
  p[gg].circle(
    'x', 'y', source=source[gg],
    color=color_mapper,
    radius='rad', line_color=None,
    # legend_group='label',
  )
  p[gg].min_border_left = 0
  p[gg].min_border_right = 0
  p[gg].outline_line_width = 0
  p[gg].axis.visible = False
  p[gg].grid.visible = False
  p[gg].title.text_font = "Arial"
  p[gg].match_aspect = True
  # if ii == len(genes)- 1:
  #   color_bar = ColorBar(color_mapper=color_mapper['transform'], width=10)
  #   p[gg].add_layout(color_bar, 'below')

grid = row(column(p["Prox1"], p["Neurod6"]), column(p["Wfs1"], p["Cpne4"]))
show(grid)

In [11]:
export_png(grid, filename=f"{trial_name}/plots/show_hippocampal_gene.png")
for k in p.keys():
    p[k].output_backend = "svg"
export_svg(grid, filename=f"{trial_name}/plots/show_hippocampal_gene.svg")

['trial-mouse_brain_coronal_Xenium_raw/plots/show_hippocampal_gene.svg']

In [12]:
import scipy.stats as stats
greater_arr = []
for gg in genes:
    ggarr = []
    for ll in leiden_hippocampal:
        s1 = adata_hippocampal[adata_hippocampal.obs['leiden'] ==ll, gg].X.A.squeeze().astype(np.float64)
        s2 = adata_hippocampal[adata_hippocampal.obs['leiden'] !=ll, gg].X.A.squeeze().astype(np.float64)
        _, pval = stats.ranksums(s1, s2, alternative="greater")
        if pval > 1e-6:
            ggarr.append(pval)
        else:
            ggarr.append("<1e-6")
    greater_arr.append(ggarr)
greater_arr = pd.DataFrame(greater_arr, columns=cat_hippocampal, index=genes)
greater_arr.to_excel(f"./{trial_name}/outputs/geneexp_hippocampal_ranksum_pval.xlsx")
greater_arr.to_latex(f"./{trial_name}/outputs/geneexp_hippocampal_ranksum_pval.tex")
greater_arr


In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.



Unnamed: 0,Hipp. Stratum,CA1sp,CA2sp/CA3sp,Dg-sg
Prox1,1.0,1.0,1.0,<1e-6
Neurod6,1.0,<1e-6,<1e-6,1.0
Wfs1,0.001695,<1e-6,1.0,1.0
Cpne4,1.0,1.0,<1e-6,<1e-6
