# Spaital region identification for mouse olfactory (10x Visium) data

At first, load neccessary packages.

In [1]:
import os
os.chdir(path='../')
import numpy as np
import scanpy as sc
from STForte import STGraph
from STForte import STForteModel
from STForte.helper import save_gdata
from STForte.helper import annotation_propagate
trial_name = "trial-mouse_olfa_Visium"

  rank_zero_deprecation(
Global seed set to 0


## Data preprocessing and generation
We perform simple preprocessing for the original data with the intention to remove empty count data. Then we generate the graph data format from the original anndata.

In [2]:
adata = sc.read_h5ad('../stforte/data/olfa-10x/GSM4656181_10x_Visium.h5ad')
adata

AnnData object with n_obs × n_vars = 918 × 31053
    obs: 'in_tissue', 'array_row', 'array_col', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'
    uns: 'spatial'
    obsm: 'spatial'
    layers: 'raw_count'

In [3]:
d = adata.uns['spatial']['stomic']['scalefactors']['fiducial_diameter_fullres']
stgraph = STGraph.graphFromAnndata(
    adata=adata,
    d=d,
    )
mask_idx = np.where(~sc.pp.filter_cells(adata, min_genes=200, inplace=False)[0])[0]  # mask low quality cells
stgraph.mask_nodes(mask_idx=mask_idx)
stgraph.pca()
stgraph.padding(reconstruct_knn=True, reconstruct_k=18)
gdata = stgraph.topyg()

d-based initialize:   0%|          | 0/918 [00:00<?, ?it/s]

PCA pre-compression for data, from 18110 onto 300-dim.
Scaling data: None; SVD solver: full; random_state=42.
Start compression...	Done! Elapsed time: 2.97s.


brute-force knn initialize:   0%|          | 0/3535 [00:00<?, ?it/s]

FP


## STForte Model


In [4]:
model = STForteModel(adata=adata, gdata=gdata,
                     output_dir='./{:s}/pl_ckpts/'.format(trial_name))
model

<STForte._model.STForteModel at 0x14ba4d26ce80>

Fit the model.

In [5]:
model.fit()

Global seed set to 42
  rank_zero_deprecation(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type              | Params
----------------------------------------------------
0 | attr_encoder  | Sequential        | 71.9 K
1 | strc_encoder  | Sequential_034289 | 71.9 K
2 | attr_decoder  | Sequential        | 72.2 K
3 | strc_decoder  | Sequential        | 2.1 K 
4 | discriminator | Sequential        | 1.1 K 
----------------------------------------------------
219 K     Trainable params
0         Non-trainable params
219 K     Total params
0.877     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=450` reached.


Save the anndata, gdata and model parameters for reproduction.

In [6]:
if not os.path.exists(f"{trial_name}/data"):
    os.makedirs(f"{trial_name}/data")
save_gdata(gdata, path=f"{trial_name}/data/gdata.pkl")
adata.write_h5ad(f"{trial_name}/data/trial.h5ad")
model.save_state_dict(f'./{trial_name}/models/', alias="STForteModel_mouse_olfa_10x")

Save the STForte output results

In [7]:
# For analysis in original resolution
model.get_latent_original(adata)
if not os.path.exists(f"./{trial_name}/outputs"):
    os.makedirs(f"./{trial_name}/outputs")
adata.write(f"./{trial_name}/outputs/stforte.h5ad")
# For analysis in padding resolution
adata_sp = model.get_result_anndata()
adata_sp.write_h5ad("./{:s}/outputs/sp.h5ad".format(trial_name))