In [1]:
import scanpy as sc
import squidpy as sq
import numpy as np
import pandas as pd
from anndata import AnnData
import pathlib
import matplotlib.pyplot as plt
import matplotlib as mpl
import skimage
import seaborn as sns
import tangram as tg

%load_ext autoreload
%autoreload 2
%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
!pwd

/home/apon/thesis/EM_approach


# Load data

In [2]:
adata_st = sc.read('../lucas_data/Visium_Mouse_Brain_SPAPROS_filtered_celltypes_annotated.h5ad')

adata_sc = sc.read('../lucas_data/SC_REF_for_VISIUM_preprocessed.h5ad')

In [3]:
tg.pp_adatas(adata_sc, adata_st, genes=None) #prepare for mapping.

INFO:root:762 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:762 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.


# Run Tg and get candidates for EM 

In [5]:
ad_map = tg.map_cells_to_space(adata_sc,
                        adata_st,
                        mode="cells",  
                        density_prior='rna_count_based',
                        lambda_d = 0.89,
                        lambda_g2 = 0.99,
                        num_epochs=350,
                        device="cpu",
                        )

INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 762 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.653, VG reg: 0.152, KL reg: 0.086
Score: 0.844, VG reg: 0.198, KL reg: 0.003
Score: 0.851, VG reg: 0.204, KL reg: 0.002
Score: 0.852, VG reg: 0.205, KL reg: 0.002


INFO:root:Saving results..


In [14]:
def create_candidates(adata_sc, adata_st, n_iter, p_thresh):
    
    candidates = np.zeros(shape = (adata_sc.shape[0], adata_st.shape[0]), dtype = int) #matrix to store candidates
    
    for _ in range(n_iter):
        ad_map = tg.map_cells_to_space(adata_sc,
                        adata_st,
                        mode="cells",  
                        density_prior='rna_count_based',
                        lambda_d = 0.89,
                        lambda_g2 = 0.99,
                        num_epochs=350,
                        device="cpu",
                        )
        
        candidates = np.add(candidates, (ad_map.X > p_thresh).astype(int))
    
    return (candidates > 0).astype(int)

    

In [15]:
candidates = create_candidates(adata_sc, adata_st, n_iter = 2, p_thresh = 0.9)

INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 762 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.653, VG reg: 0.152, KL reg: 0.086
Score: 0.844, VG reg: 0.198, KL reg: 0.003
Score: 0.851, VG reg: 0.204, KL reg: 0.002
Score: 0.852, VG reg: 0.205, KL reg: 0.002


INFO:root:Saving results..
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 762 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.653, VG reg: 0.152, KL reg: 0.086
Score: 0.844, VG reg: 0.198, KL reg: 0.003
Score: 0.851, VG reg: 0.204, KL reg: 0.002
Score: 0.852, VG reg: 0.205, KL reg: 0.002


INFO:root:Saving results..


In [23]:
print(candidates.sum(axis=1).sum())

86


In [None]:
np.save("output.npy", your_array)