In [1]:
import tangram as tg
import pandas as pd
import scanpy as sc
import anndata as ad
import numpy as np
import torch
from scipy import stats
from sklearn.metrics import r2_score

from time import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
pct_offset = 0

def MinMaxNorm(Y):
    return (Y-Y.min(axis = 0))/(Y.max(axis = 0)-Y.min(axis = 0))/(1+pct_offset)

def ReMMNorm(Y, Y_pred):
    return (Y_pred*(Y.max(axis=0)-Y.min(axis=0)*(1+pct_offset))+Y.min(axis = 0))

def PolarTrans(Y):
    
    R = np.sqrt(np.square(Y[:,0]) + np.square(Y[:,1]))
    Theta = np.arctan(Y[:,1]/Y[:,0])
    RTheta = np.concatenate([R.reshape(-1,1), Theta.reshape(-1,1)], axis = 1)
    
    return RTheta

def RePolarTrans(RTheta):
    x = RTheta[:,0] * np.cos(RTheta[:,1])
    y = RTheta[:,0] * np.sin(RTheta[:,1])
    Y = np.concatenate([x.reshape(-1,1), y.reshape(-1,1)], axis = 1)
    return Y

In [3]:
adata = ad.read_h5ad('../Dataset/AdataEmbryo1.h5ad')
adata.var_names_make_unique()

sc.pp.normalize_total(adata, target_sum = 1e4)
sc.pp.log1p(adata)

Y = adata.obs[['xcoord', 'ycoord']].values

In [4]:
train_indices = []
test_indices = []
for i in range(5):
    train_index = pd.read_csv('CV_groups/index_train_' + str(i+1) + '.csv',
                              header = None, index_col = 0).values.flatten()
    test_index = pd.read_csv('CV_groups/index_test_' + str(i+1) + '.csv',
                             header = None, index_col = 0).values.flatten()
    
    train_indices.append(train_index)
    test_indices.append(test_index)

# Running code

In [5]:
ad_maps = []
sta = time()

for i in range(5):

    print(i)
    ad_sp = adata[train_indices[i]]
    ad_sc = adata[test_indices[i]]
    tg.pp_adatas(ad_sc, ad_sp, genes=None)

    ad_map = tg.map_cells_to_space(ad_sc, ad_sp, device = 'cuda')
    ad_maps.append(ad_map)
    torch.cuda.empty_cache()

end = time()
print((end - sta)/60.0, 'mins consumed')

0


INFO:root:19198 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:19198 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 19198 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.120, KL reg: 0.040
Score: 0.375, KL reg: 0.001
Score: 0.399, KL reg: 0.000
Score: 0.409, KL reg: 0.000
Score: 0.414, KL reg: 0.000
Score: 0.418, KL reg: 0.000
Score: 0.421, KL reg: 0.000
Score: 0.423, KL reg: 0.000
Score: 0.424, KL reg: 0.000
Score: 0.426, KL reg: 0.000


INFO:root:Saving results..


1


INFO:root:19208 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:19208 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 19208 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.120, KL reg: 0.040
Score: 0.374, KL reg: 0.000
Score: 0.399, KL reg: 0.000
Score: 0.409, KL reg: 0.000
Score: 0.414, KL reg: 0.000
Score: 0.418, KL reg: 0.000
Score: 0.421, KL reg: 0.000
Score: 0.423, KL reg: 0.000
Score: 0.424, KL reg: 0.000
Score: 0.426, KL reg: 0.000


INFO:root:Saving results..


2


INFO:root:19225 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:19225 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 19225 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.120, KL reg: 0.040
Score: 0.374, KL reg: 0.001
Score: 0.399, KL reg: 0.000
Score: 0.408, KL reg: 0.000
Score: 0.414, KL reg: 0.000
Score: 0.418, KL reg: 0.000
Score: 0.420, KL reg: 0.000
Score: 0.422, KL reg: 0.000
Score: 0.424, KL reg: 0.000
Score: 0.425, KL reg: 0.000


INFO:root:Saving results..


3


INFO:root:19165 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:19165 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 19165 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.120, KL reg: 0.040
Score: 0.374, KL reg: 0.001
Score: 0.398, KL reg: 0.000
Score: 0.408, KL reg: 0.000
Score: 0.414, KL reg: 0.000
Score: 0.417, KL reg: 0.000
Score: 0.420, KL reg: 0.000
Score: 0.422, KL reg: 0.000
Score: 0.424, KL reg: 0.000
Score: 0.425, KL reg: 0.000


INFO:root:Saving results..


4


INFO:root:19153 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:19153 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 19153 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.120, KL reg: 0.041
Score: 0.374, KL reg: 0.001
Score: 0.399, KL reg: 0.000
Score: 0.409, KL reg: 0.000
Score: 0.414, KL reg: 0.000
Score: 0.418, KL reg: 0.000
Score: 0.421, KL reg: 0.000
Score: 0.423, KL reg: 0.000
Score: 0.424, KL reg: 0.000
Score: 0.426, KL reg: 0.000


INFO:root:Saving results..


221.8124224503835 mins consumed


In [6]:
len(ad_sc.uns['training_genes'])

19153

In [10]:
i = 0
for i in range(5):
    Y_pred = np.matmul(ad_maps[i].X, Y[train_indices[i]])
    pd.DataFrame(Y_pred).to_csv('SI_Benchmarking/Tangram/pred_' + str(i) + '.csv', index = None)

In [11]:
Y_pred

array([[3068.2813475 , 3178.29499347],
       [3106.76857675, 3546.43507294],
       [3319.11089687, 3169.63701816],
       ...,
       [3307.95322683, 3180.64387685],
       [3165.1240784 , 3451.13542726],
       [3479.2197775 , 3201.63575843]])