# Spaital region identification for human dorsolateral prefrontal cortex (DLPFC) data

At first, load neccessary packages.

In [1]:
import os
os.chdir(path='../../')
import scanpy as sc
import plotly.express as px
from STForte import STGraph
from STForte import STForteModel
from STForte.helper import save_gdata
trial_name = "trial-DLPFC-pca/151673"
color_pallette = px.colors.qualitative.Plotly

Global seed set to 0


## Data preprocessing and generation
We perform simple preprocessing for the original data with the intention to remove empty count data. Then we generate the graph data format from the original anndata.

In [2]:
adata = sc.read_h5ad('./data/DLPFC/151673_10xvisium.h5ad')
adata

AnnData object with n_obs × n_vars = 3639 × 33538
    obs: 'in_tissue', 'array_row', 'array_col', 'spatialLIBD'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial', 'spatialLIBD_colors'
    obsm: 'spatial'

In [3]:
d = adata.uns['spatial']['stomic']['scalefactors']['fiducial_diameter_fullres']
stgraph = STGraph.graphFromAnndata(
    adata=adata,
    d=d,
    )
stgraph.pca()
stgraph.padding(reconstruct_knn=True, reconstruct_k=18)
gdata = stgraph.topyg()
gdata

d-based initialize:   0%|          | 0/3639 [00:00<?, ?it/s]

PCA pre-compression for data, from 21842 onto 300-dim.
Scaling data: None; SVD solver: full; random_state=42.
Start compression...	Done! Elapsed time: 25.11s.


brute-force knn initialize:   0%|          | 0/14201 [00:00<?, ?it/s]

Data(x=[14201, 300], xfp=[14201, 300], adj_t=[14201, 14201, nnz=255618], inv_dist=[255618], value_idx=[3639], infer_idx=[10562], coord=[14201, 2], x_id=[14201], mask_idx=[0])

## STForte Model

First, we establish model for DLPFC analysis with used anndata/graph data.

In [4]:
model = STForteModel(adata=adata, gdata=gdata, epochs=450,
                     output_dir='./{:s}/pl_ckpts/'.format(trial_name))
model

<STForte._model.STForteModel at 0x7f0a17152700>

Fit the model.

In [5]:
model.fit()

Global seed set to 42
  rank_zero_deprecation(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type              | Params
----------------------------------------------------
0 | attr_encoder  | Sequential        | 71.9 K
1 | strc_encoder  | Sequential_f91146 | 71.9 K
2 | attr_decoder  | Sequential        | 72.2 K
3 | strc_decoder  | Sequential        | 2.1 K 
4 | discriminator | Sequential        | 1.1 K 
----------------------------------------------------
219 K     Trainable params
0         Non-trainable params
219 K     Total params
0.877     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=450` reached.


Save the anndata, gdata and model parameters for reproduction.

In [6]:
if not os.path.exists(f"{trial_name}/data"):
    os.makedirs(f"{trial_name}/data")
save_gdata(gdata, path=f"{trial_name}/data/gdata.pkl")
adata.write_h5ad(f"{trial_name}/data/trial.h5ad")
model.save_state_dict(f'./{trial_name}/models/', alias="STForteModel_DLPFC_151673")

Save the STForte output results

In [7]:
model.get_latent_original(adata)
if not os.path.exists(f"./{trial_name}/outputs"):
    os.makedirs(f"./{trial_name}/outputs")
adata.write(f"./{trial_name}/outputs/stforte.h5ad")
# For analysis in padding resolution
adata_sp = model.get_result_anndata()
adata_sp.write_h5ad("./{:s}/outputs/sp.h5ad".format(trial_name))