# Imports

In [1]:
import scanpy as sc
import squidpy as sq
import numpy as np
import pandas as pd
from anndata import AnnData
import pathlib
import matplotlib.pyplot as plt
import matplotlib as mpl
import skimage
import seaborn as sns
import tangram as tg
from collections import Counter
from collections import defaultdict
import json

%load_ext autoreload
%autoreload 2
%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


# Load data  

### Tangram Tutorial Data

In [2]:
adata_st = sq.datasets.visium_fluo_adata_crop()
adata_st = adata_st[
    adata_st.obs.cluster.isin([f"Cortex_{i}" for i in np.arange(1, 5)])
].copy()
img = sq.datasets.visium_fluo_image_crop()

adata_sc = sq.datasets.sc_mouse_cortex()

# Get training genes

In [3]:
sc.tl.rank_genes_groups(adata_sc, groupby="cell_subclass", use_raw=False)
markers_df = pd.DataFrame(adata_sc.uns["rank_genes_groups"]["names"]).iloc[0:100, :]
markers = list(np.unique(markers_df.melt().value.values))
len(markers)



1401

# Prepare and run mapping

In [5]:
tg.pp_adatas(adata_sc, adata_st, genes=markers)

ad_map = tg.map_cells_to_space(adata_sc, adata_st,
    mode="cells",
    density_prior='rna_count_based',
    lambda_d = 0.89,
    lambda_g2 = 0.99,
    num_epochs=350,
    device='cpu',
)

INFO:root:1280 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:14785 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 1280 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.613, VG reg: 0.798, KL reg: 0.001
Score: 0.733, VG reg: 0.807, KL reg: 0.000
Score: 0.736, VG reg: 0.807, KL reg: 0.000
Score: 0.736, VG reg: 0.807, KL reg: 0.000


INFO:root:Saving results..


# All voxel celltype distribution without threshold

In [6]:
cell_types = list(adata_sc.obs["cell_subclass"].unique()) #celltypes in sc data
num_cell_types = len(cell_types)
num_voxels = ad_map.n_vars 

ct_dist_matrix = np.zeros((num_cell_types, num_voxels)) #init np array with dim (n_ct x n_voxels)
weighted_counts = defaultdict(float)


#CELLTYPE DISTRIBUTIONS
for i in range(num_voxels):
    indices = np.where(ad_map.X[:, i] > 0.0)[0]  # Cells mapped to voxel i 
    
    if len(indices) > 0:
        
        for cell in indices:
            weighted_counts[ad_map.obs.cell_subclass.iloc[cell]] += ad_map.X[cell, i] #votes are weighted by map prob
        
        total = sum(weighted_counts.values()) #total to calculate distributions
        
        for cell_type, count in weighted_counts.items():
            row_idx = cell_types.index(cell_type)  #get index for a given celltype
            ct_dist_matrix[row_idx, i] = count / total  #write value


ct_dist_matrix = np.array(ct_dist_matrix) #convert to np array
np.save("results/TG.npy", ct_dist_matrix)