In [1]:
import anndata
import torch
import stPlus

import squidpy as sq
import numpy as np
import scanpy as sc
import pandas as pd

from sklearn.model_selection import KFold
from transpa.eval_util import calc_corr
from transpa.util import expTransImp, leiden_cluster, compute_autocorr
from benchmark import SpaGE_impute, Tangram_impute
import warnings

warnings.filterwarnings('ignore')

seed = 10
device = torch.device("cuda:2") if torch.cuda.is_available() else torch.device("cpu")

Global seed set to 0


In [2]:
import os
merfish_path = '../data/merfish.h5ad'
if os.path.exists(merfish_path):
    spa_adata = sc.read(merfish_path)
else:
    merfish = pd.read_csv('/data/users/cqiao/data/stPlus/data/SpaGE Datasets/Spatial/MERFISH/Moffitt_and_Bambah-Mukku_et_al_merfish_all_cells.csv')
    merfish_1 = merfish.loc[merfish['Animal_ID'] == 1, :]
    merfish_1 = merfish_1.loc[merfish_1['Cell_class'] != 'Ambiguous',:]
    merfish_meta = merfish_1.iloc[:,0:9]
    merfish_data = merfish_1.iloc[:,9:171]
    merfish_data = merfish_data.drop(columns = ['Blank_1','Blank_2','Blank_3','Blank_4','Blank_5','Fos'])

    spa_adata = anndata.AnnData(merfish_data.values)
    spa_adata.var_names = merfish_data.columns.values
    spa_adata.var_names_make_unique()
    spa_adata.obs = merfish_meta
    spa_adata.obs['X'] = merfish_1.Centroid_X.values
    spa_adata.obs['Y'] = merfish_1.Centroid_Y.values
    sc.pp.normalize_total(spa_adata)
    sc.pp.log1p(spa_adata)
    spa_adata.write('../data/merfish.h5ad')


Moffit_path = '../data/moffit_adata.h5ad'

if os.path.exists(Moffit_path):
    Moffit_adata = sc.read(Moffit_path)
else:
    Moffit_adata = sc.read_mtx("/data/users/cqiao/data/stPlus/data/SpaGE Datasets/scRNAseq/Moffit_RNA/GSE113576/matrix.mtx").T
    genes = pd.read_csv('/data/users/cqiao/data/stPlus/data/SpaGE Datasets/scRNAseq/Moffit_RNA/GSE113576/genes.tsv',sep='\t',header=None).loc[:, 1].values
    barcodes = pd.read_csv('/data/users/cqiao/data/stPlus/data/SpaGE Datasets/scRNAseq/Moffit_RNA/GSE113576/barcodes.tsv',sep='\t',header=None).loc[:, 0].values

    Moffit_adata.var_names = genes
    Moffit_adata.obs_names = barcodes
    Moffit_adata.var_names_make_unique()
    classes, ct_list = leiden_cluster(Moffit_adata)
    cls_key = 'leiden'
    Moffit_adata.obs[cls_key] = classes
    sc.pp.filter_genes(Moffit_adata, min_cells=10)
    sc.pp.normalize_total(Moffit_adata)
    sc.pp.log1p(Moffit_adata)
    Moffit_adata.write('../data/moffit_adata.h5ad')
    
spa_adata.var_names_make_unique()
Moffit_adata.var_names_make_unique()
ct_list = np.unique(Moffit_adata.obs.leiden)
classes = Moffit_adata.obs.leiden.values
cls_key = 'leiden'

In [3]:
raw_spatial_df  = pd.DataFrame(spa_adata.X, columns=spa_adata.var_names)
# raw_scrna_df    = pd.DataFrame.sparse.from_spmatrix(Moffit_adata.X, columns=Moffit_adata.var_names)
raw_scrna_df    = pd.DataFrame(Moffit_adata.X.toarray(), columns=Moffit_adata.var_names)
raw_shared_gene = np.intersect1d(raw_spatial_df.columns, raw_scrna_df.columns)

raw_spatial_df.to_csv('output/merfish_raw.csv')
raw_spatial_df.shape, raw_scrna_df.shape, raw_shared_gene.shape

((64373, 155), (31299, 18646), (153,))

In [4]:
spa_adata.obsm['spatial'] = np.hstack([spa_adata.obs.X.values.reshape(-1,1), spa_adata.obs.Y.values.reshape(-1,1)])
np.save('output/merfish_locations.npy', spa_adata.obsm['spatial'])
sq.gr.spatial_neighbors(spa_adata)

In [5]:
from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score, normalized_mutual_info_score, homogeneity_score
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler

n_clusters = len(spa_adata.obs.Cell_class.unique())
print(f"{n_clusters} clusters")
# real_clss = AgglomerativeClustering(n_clusters=n_clusters, 
#                                     connectivity=spa_adata.obsp['spatial_connectivities'],
#                                     ).fit_predict(StandardScaler().fit_transform(spa_adata.X))
real_clss = spa_adata.obs.Cell_class.values

15 clusters


In [6]:
def spa_agglomerative(true_imp_X):
    pred_clss = AgglomerativeClustering(n_clusters=n_clusters, 
                                        connectivity=spa_adata.obsp['spatial_connectivities'],
                                       ).fit_predict(StandardScaler().fit_transform(true_imp_X))
    ars = adjusted_rand_score(real_clss, pred_clss)
    amis = adjusted_mutual_info_score(real_clss, pred_clss)
    homo = homogeneity_score(real_clss, pred_clss)
    nmi = normalized_mutual_info_score(real_clss, pred_clss)
    scores = {"ARS":ars, "AMIS":amis, 'HOMO': homo, 'NMI':nmi}
    print(f"Cluster on Raw Expression vs  on Predicted Expression\n - ARS: {ars:.6f}, AMIS: {amis:.6f}, HOMO: {homo:.6f}, NMI: {nmi:.6f}")
    scores = {"ARS":ars, "AMIS":amis, 'HOMO': homo, 'NMI':nmi}
    return scores

In [7]:
kf = KFold(n_splits=5, shuffle=True, random_state=0)
kf.get_n_splits(raw_shared_gene)

df_transImpSpa = pd.DataFrame(np.zeros((spa_adata.n_obs, len(raw_shared_gene))), columns=raw_shared_gene)
df_transImpCls = pd.DataFrame(np.zeros((spa_adata.n_obs, len(raw_shared_gene))), columns=raw_shared_gene)
df_transImpClsSpa = pd.DataFrame(np.zeros((spa_adata.n_obs, len(raw_shared_gene))), columns=raw_shared_gene)
df_transImp = pd.DataFrame(np.zeros((spa_adata.n_obs, len(raw_shared_gene))), columns=raw_shared_gene)
df_stplus_res = pd.DataFrame(np.zeros((spa_adata.n_obs, len(raw_shared_gene))), columns=raw_shared_gene)
df_spaGE_res = pd.DataFrame(np.zeros((spa_adata.n_obs, len(raw_shared_gene))), columns=raw_shared_gene)
df_tangram_res = pd.DataFrame(np.zeros((spa_adata.n_obs, len(raw_shared_gene))), columns=raw_shared_gene)


spa_cluster_metrics = []
method_names = []
for idx, (train_ind, test_ind) in enumerate(kf.split(raw_shared_gene)):    
    print(f"\n===== Fold {idx+1} =====\nNumber of train genes: {len(train_ind)}, Number of test genes: {len(test_ind)}")
    train_gene = raw_shared_gene[train_ind]
    test_gene  = raw_shared_gene[test_ind]
    
    test_spatial_df = raw_spatial_df[test_gene]
    spatial_df = raw_spatial_df[train_gene]
    scrna_df   = raw_scrna_df

    df_transImpSpa[test_gene] = expTransImp(
            df_ref=raw_scrna_df,
            df_tgt=raw_spatial_df,
            train_gene=train_gene,
            test_gene=test_gene,
            signature_mode='cell',
            mapping_mode='lowrank',
            mapping_lowdim=128,
            wt_spa=1,
            spa_adj=spa_adata.obsp['spatial_connectivities'].tocoo(),
            seed=seed,
            device=device)
    
    spa_cluster_metrics.append(spa_agglomerative(np.hstack([df_transImpSpa[test_gene].values, raw_spatial_df[train_gene].values])))
    method_names.append("transImpSpa")

    corr_transImpSpa_res = calc_corr(raw_spatial_df, df_transImpSpa, test_gene)
    print(f'fold {idx}, median correlation: {np.median(corr_transImpSpa_res)} (TransImpSpa)')

    df_transImpCls[test_gene] = expTransImp(
            df_ref=raw_scrna_df,
            df_tgt=raw_spatial_df,
            train_gene=train_gene,
            test_gene=test_gene,
            ct_list=ct_list,
            classes=classes,
            signature_mode='cluster',
            mapping_mode='full',
            seed=seed,
            device=device)

    spa_cluster_metrics.append(spa_agglomerative(np.hstack([df_transImpCls[test_gene].values, raw_spatial_df[train_gene].values])))
    method_names.append('transImpCls')
    corr_transImpSpa_res = calc_corr(raw_spatial_df, df_transImpCls, test_gene)
    print(f'fold {idx}, median correlation: {np.median(corr_transImpSpa_res)} (TransImpCls)')

    df_transImp[test_gene] = expTransImp(
            df_ref=raw_scrna_df,
            df_tgt=raw_spatial_df,
            train_gene=train_gene,
            test_gene=test_gene,
            signature_mode='cell',
            mapping_mode='lowrank',
            mapping_lowdim=128,
            seed=seed,
            device=device)

    spa_cluster_metrics.append(spa_agglomerative(np.hstack([df_transImp[test_gene].values, raw_spatial_df[train_gene].values])))
    method_names.append("transImp")
    corr_transImpSpa_res = calc_corr(raw_spatial_df, df_transImp, test_gene)
    print(f'fold {idx}, median correlation: {np.median(corr_transImpSpa_res)} (TransImp)')

    df_transImpClsSpa[test_gene] = expTransImp(
            df_ref=raw_scrna_df,
            df_tgt=raw_spatial_df,
            train_gene=train_gene,
            test_gene=test_gene,
            ct_list=ct_list,
            classes=classes,
            spa_adj=spa_adata.obsp['spatial_connectivities'].tocoo(),
            signature_mode='cluster',
            mapping_mode='full',
            wt_spa=1,
            seed=seed,
            device=device)
    
    spa_cluster_metrics.append(spa_agglomerative(np.hstack([df_transImpClsSpa[test_gene].values, raw_spatial_df[train_gene].values])))
    method_names.append("transImpClsSpa")
    corr_transImpSpa_res = calc_corr(raw_spatial_df, df_transImpClsSpa, test_gene)
    print(f'fold {idx}, median correlation: {np.median(corr_transImpSpa_res)} (TransImpClsSpa)')

    
    df_stplus_res[test_gene] = stPlus.stPlus(spatial_df, scrna_df, test_gene, "tmp_mm", verbose=False, random_seed=seed, device=device)
    spa_cluster_metrics.append(spa_agglomerative(np.hstack([df_stplus_res[test_gene].values, raw_spatial_df[train_gene].values])))
    method_names.append("stPlus")
    corr_res_stplus = calc_corr(raw_spatial_df, df_stplus_res, test_gene)
    print(f'\t\t\t{np.median(corr_res_stplus)} (stPlus)')

    df_spaGE_res[test_gene]  = SpaGE_impute(scrna_df, spatial_df, train_gene, test_gene)
    spa_cluster_metrics.append(spa_agglomerative(np.hstack([df_spaGE_res[test_gene].values, raw_spatial_df[train_gene].values])))
    method_names.append("spaGE")
    corr_res_spaGE = calc_corr(raw_spatial_df, df_spaGE_res, test_gene)
    print(f'\t\t\t{np.median(corr_res_spaGE)} (spaGE)')

    df_tangram_res[test_gene] = Tangram_impute(Moffit_adata, spa_adata, train_gene, test_gene, device, cls_key)
    spa_cluster_metrics.append(spa_agglomerative(np.hstack([df_tangram_res[test_gene].values, raw_spatial_df[train_gene].values])))
    method_names.append("tangram")
    corr_res_tangram = calc_corr(raw_spatial_df, df_tangram_res, test_gene)
    print(f'\t\t\t{np.median(corr_res_tangram)} (Tangram)')

corr_transImpSpa_res = calc_corr(raw_spatial_df, df_transImpSpa, raw_shared_gene)
corr_transImp_res = calc_corr(raw_spatial_df, df_transImp, raw_shared_gene)
corr_transImpCls_res = calc_corr(raw_spatial_df, df_transImpCls, raw_shared_gene)
corr_transImpClsSpa_res = calc_corr(raw_spatial_df, df_transImpClsSpa, raw_shared_gene)
corr_res_stplus = calc_corr(raw_spatial_df, df_stplus_res, raw_shared_gene)
corr_res_spaGE = calc_corr(raw_spatial_df, df_spaGE_res, raw_shared_gene)
corr_res_tangram = calc_corr(raw_spatial_df, df_tangram_res, raw_shared_gene)   

print(np.median(corr_transImpSpa_res), "(TransImpSpa)", 
      np.median(corr_transImp_res), "(TransImp)", 
      np.median(corr_transImpCls_res), "(TransImpCls)", 
      np.median(corr_transImpClsSpa_res), "(TransImpClsSpa)", 
      np.median(corr_res_stplus), "(stPlus)", 
      np.median(corr_res_spaGE), "(spaGE)",
      np.median(corr_res_tangram), "(Tangram)"
      )


===== Fold 1 =====
Number of train genes: 122, Number of test genes: 31


[TransImp] Epoch: 1000/1000, loss: 0.562223, (IMP) 0.560559, (SPA) 1 x 0.001664: 100%|██████████| 1000/1000 [00:18<00:00, 55.00it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.048491, AMIS: 0.088764, HOMO: 0.095563, NMI: 0.089448
fold 0, median correlation: 0.33793506902181053 (TransImpSpa)


[TransImp] Epoch: 1000/1000, loss: 0.843272, (IMP) 0.843272, (SPA) 1.0 x 0.000000: 100%|██████████| 1000/1000 [00:07<00:00, 133.62it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.051081, AMIS: 0.081695, HOMO: 0.091842, NMI: 0.082358
fold 0, median correlation: 0.3571415145959039 (TransImpCls)


[TransImp] Epoch: 1000/1000, loss: 0.538523, (IMP) 0.538523, (SPA) 1.0 x 0.000000: 100%|██████████| 1000/1000 [00:11<00:00, 85.49it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.060038, AMIS: 0.085900, HOMO: 0.093728, NMI: 0.086579
fold 0, median correlation: 0.3250004917567962 (TransImp)


[TransImp] Epoch: 1000/1000, loss: 0.848029, (IMP) 0.844415, (SPA) 1 x 0.003614: 100%|██████████| 1000/1000 [00:14<00:00, 68.43it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.038987, AMIS: 0.077493, HOMO: 0.086845, NMI: 0.078159
fold 0, median correlation: 0.35256432756907835 (TransImpClsSpa)
Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.046201, AMIS: 0.086966, HOMO: 0.099382, NMI: 0.087613
			0.34134172014811964 (stPlus)
Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.034193, AMIS: 0.074373, HOMO: 0.083434, NMI: 0.075042
			0.33262536612669313 (spaGE)


INFO:root:122 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:122 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 122 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.426, KL reg: 0.088
Score: 0.651, KL reg: 0.001
Score: 0.653, KL reg: 0.001
Score: 0.653, KL reg: 0.001
Score: 0.654, KL reg: 0.001
Score: 0.654, KL reg: 0.001
Score: 0.654, KL reg: 0.001
Score: 0.654, KL reg: 0.001
Score: 0.654, KL reg: 0.001
Score: 0.654, KL reg: 0.001


INFO:root:Saving results..


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.043703, AMIS: 0.075433, HOMO: 0.083456, NMI: 0.076112
			0.3418914246181449 (Tangram)

===== Fold 2 =====
Number of train genes: 122, Number of test genes: 31


[TransImp] Epoch: 1000/1000, loss: 0.572696, (IMP) 0.570908, (SPA) 1 x 0.001788: 100%|██████████| 1000/1000 [00:18<00:00, 54.87it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.038915, AMIS: 0.081296, HOMO: 0.092194, NMI: 0.081953
fold 1, median correlation: 0.17477798270082537 (TransImpSpa)


[TransImp] Epoch: 1000/1000, loss: 0.825958, (IMP) 0.825958, (SPA) 1.0 x 0.000000: 100%|██████████| 1000/1000 [00:07<00:00, 130.78it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.052650, AMIS: 0.078848, HOMO: 0.086393, NMI: 0.079531
fold 1, median correlation: 0.23239398400406225 (TransImpCls)


[TransImp] Epoch: 1000/1000, loss: 0.550727, (IMP) 0.550727, (SPA) 1.0 x 0.000000: 100%|██████████| 1000/1000 [00:11<00:00, 85.38it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.039018, AMIS: 0.070793, HOMO: 0.081530, NMI: 0.071449
fold 1, median correlation: 0.1742707618499262 (TransImp)


[TransImp] Epoch: 1000/1000, loss: 0.830989, (IMP) 0.827212, (SPA) 1 x 0.003777: 100%|██████████| 1000/1000 [00:14<00:00, 69.54it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.069526, AMIS: 0.086940, HOMO: 0.088207, NMI: 0.087668
fold 1, median correlation: 0.23028945243878232 (TransImpClsSpa)
Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.040658, AMIS: 0.073583, HOMO: 0.080152, NMI: 0.074272
			0.1796437873369204 (stPlus)
Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.029962, AMIS: 0.074457, HOMO: 0.084112, NMI: 0.075120
			0.17251574396986058 (spaGE)


INFO:root:122 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:122 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 122 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.436, KL reg: 0.107
Score: 0.657, KL reg: 0.002
Score: 0.658, KL reg: 0.002
Score: 0.659, KL reg: 0.001
Score: 0.659, KL reg: 0.001
Score: 0.659, KL reg: 0.001
Score: 0.659, KL reg: 0.001
Score: 0.659, KL reg: 0.001
Score: 0.659, KL reg: 0.001
Score: 0.659, KL reg: 0.001


INFO:root:Saving results..


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.040401, AMIS: 0.075272, HOMO: 0.086224, NMI: 0.075927
			0.2407029898738709 (Tangram)

===== Fold 3 =====
Number of train genes: 122, Number of test genes: 31


[TransImp] Epoch: 1000/1000, loss: 0.489253, (IMP) 0.488233, (SPA) 1 x 0.001020: 100%|██████████| 1000/1000 [00:18<00:00, 54.56it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.045054, AMIS: 0.080561, HOMO: 0.090472, NMI: 0.081226
fold 2, median correlation: 0.3130948853994587 (TransImpSpa)


[TransImp] Epoch: 1000/1000, loss: 0.730507, (IMP) 0.730507, (SPA) 1.0 x 0.000000: 100%|██████████| 1000/1000 [00:07<00:00, 132.40it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.038468, AMIS: 0.075808, HOMO: 0.084756, NMI: 0.076477
fold 2, median correlation: 0.3050761280350354 (TransImpCls)


[TransImp] Epoch: 1000/1000, loss: 0.475512, (IMP) 0.475512, (SPA) 1.0 x 0.000000: 100%|██████████| 1000/1000 [00:11<00:00, 85.42it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.043700, AMIS: 0.075901, HOMO: 0.084021, NMI: 0.076577
fold 2, median correlation: 0.31650081506612593 (TransImp)


[TransImp] Epoch: 1000/1000, loss: 0.734084, (IMP) 0.731797, (SPA) 1 x 0.002288: 100%|██████████| 1000/1000 [00:14<00:00, 70.23it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.044321, AMIS: 0.074628, HOMO: 0.084183, NMI: 0.075292
fold 2, median correlation: 0.3053089050131768 (TransImpClsSpa)
Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.033369, AMIS: 0.075477, HOMO: 0.083363, NMI: 0.076155
			0.31492769801645554 (stPlus)
Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.034461, AMIS: 0.069719, HOMO: 0.077659, NMI: 0.070396
			0.28014813333488997 (spaGE)


INFO:root:122 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:122 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 122 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.436, KL reg: 0.104
Score: 0.653, KL reg: 0.001
Score: 0.655, KL reg: 0.001
Score: 0.655, KL reg: 0.001
Score: 0.655, KL reg: 0.001
Score: 0.655, KL reg: 0.001
Score: 0.655, KL reg: 0.001
Score: 0.656, KL reg: 0.001
Score: 0.656, KL reg: 0.001
Score: 0.656, KL reg: 0.001


INFO:root:Saving results..


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.035205, AMIS: 0.070577, HOMO: 0.079336, NMI: 0.071247
			0.31214147210834764 (Tangram)

===== Fold 4 =====
Number of train genes: 123, Number of test genes: 30


[TransImp] Epoch: 1000/1000, loss: 0.570753, (IMP) 0.569009, (SPA) 1 x 0.001744: 100%|██████████| 1000/1000 [00:18<00:00, 54.47it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.045354, AMIS: 0.084928, HOMO: 0.089524, NMI: 0.085630
fold 3, median correlation: 0.21166814314684462 (TransImpSpa)


[TransImp] Epoch: 1000/1000, loss: 0.831245, (IMP) 0.831245, (SPA) 1.0 x 0.000000: 100%|██████████| 1000/1000 [00:07<00:00, 132.23it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.063373, AMIS: 0.087507, HOMO: 0.093890, NMI: 0.088195
fold 3, median correlation: 0.25244367023921 (TransImpCls)


[TransImp] Epoch: 1000/1000, loss: 0.551349, (IMP) 0.551349, (SPA) 1.0 x 0.000000: 100%|██████████| 1000/1000 [00:11<00:00, 86.84it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.039472, AMIS: 0.078732, HOMO: 0.088581, NMI: 0.079395
fold 3, median correlation: 0.21959700544295122 (TransImp)


[TransImp] Epoch: 1000/1000, loss: 0.836673, (IMP) 0.832668, (SPA) 1 x 0.004005: 100%|██████████| 1000/1000 [00:14<00:00, 67.13it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.039109, AMIS: 0.076805, HOMO: 0.087486, NMI: 0.077463
fold 3, median correlation: 0.25171255681861576 (TransImpClsSpa)
Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.035342, AMIS: 0.079897, HOMO: 0.090297, NMI: 0.080557
			0.2531887229414372 (stPlus)
Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.051088, AMIS: 0.074132, HOMO: 0.077115, NMI: 0.074853
			0.22008229426829667 (spaGE)


INFO:root:123 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:123 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 123 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.438, KL reg: 0.100
Score: 0.655, KL reg: 0.001
Score: 0.657, KL reg: 0.001
Score: 0.657, KL reg: 0.001
Score: 0.658, KL reg: 0.001
Score: 0.658, KL reg: 0.001
Score: 0.658, KL reg: 0.001
Score: 0.658, KL reg: 0.001
Score: 0.658, KL reg: 0.001
Score: 0.658, KL reg: 0.001


INFO:root:Saving results..


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.044109, AMIS: 0.079925, HOMO: 0.087618, NMI: 0.080604
			0.2415767890000491 (Tangram)

===== Fold 5 =====
Number of train genes: 123, Number of test genes: 30


[TransImp] Epoch: 1000/1000, loss: 0.553660, (IMP) 0.552105, (SPA) 1 x 0.001555: 100%|██████████| 1000/1000 [00:18<00:00, 54.08it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.049363, AMIS: 0.084277, HOMO: 0.092731, NMI: 0.084950
fold 4, median correlation: 0.2531674596593373 (TransImpSpa)


[TransImp] Epoch: 1000/1000, loss: 0.790528, (IMP) 0.790528, (SPA) 1.0 x 0.000000: 100%|██████████| 1000/1000 [00:07<00:00, 130.93it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.042767, AMIS: 0.077879, HOMO: 0.089842, NMI: 0.078529
fold 4, median correlation: 0.2729298579520554 (TransImpCls)


[TransImp] Epoch: 1000/1000, loss: 0.529110, (IMP) 0.529110, (SPA) 1.0 x 0.000000: 100%|██████████| 1000/1000 [00:11<00:00, 86.48it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.044559, AMIS: 0.083446, HOMO: 0.091366, NMI: 0.084122
fold 4, median correlation: 0.25130109140326035 (TransImp)


[TransImp] Epoch: 1000/1000, loss: 0.795890, (IMP) 0.791930, (SPA) 1 x 0.003960: 100%|██████████| 1000/1000 [00:14<00:00, 68.10it/s]


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.041138, AMIS: 0.078593, HOMO: 0.089842, NMI: 0.079246
fold 4, median correlation: 0.27403045692845285 (TransImpClsSpa)
Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.072289, AMIS: 0.094932, HOMO: 0.093392, NMI: 0.095677
			0.2356610237098042 (stPlus)
Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.048119, AMIS: 0.080144, HOMO: 0.089574, NMI: 0.080812
			0.22733023498071797 (spaGE)


INFO:root:123 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:123 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 123 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.437, KL reg: 0.101
Score: 0.661, KL reg: 0.002
Score: 0.662, KL reg: 0.002
Score: 0.663, KL reg: 0.002
Score: 0.663, KL reg: 0.002
Score: 0.663, KL reg: 0.002
Score: 0.663, KL reg: 0.002
Score: 0.663, KL reg: 0.002
Score: 0.663, KL reg: 0.002
Score: 0.663, KL reg: 0.002


INFO:root:Saving results..


Cluster on Raw Expression vs  on Predicted Expression
 - ARS: 0.044704, AMIS: 0.077923, HOMO: 0.088828, NMI: 0.078581
			0.2830945919730508 (Tangram)
0.27322788572156775 (TransImpSpa) 0.2783631214199657 (TransImp) 0.28280039215836916 (TransImpCls) 0.2801339827156346 (TransImpClsSpa) 0.26358982611465287 (stPlus) 0.25811461426728405 (spaGE) 0.2893536322874469 (Tangram)


In [8]:
df_transImp.to_csv('output/merfish_moffit_transImpute.csv')
df_transImpSpa.to_csv('output/merfish_moffit_transImpSpa.csv')
df_transImpCls.to_csv('output/merfish_moffit_transImpCls.csv')
df_transImpClsSpa.to_csv('output/merfish_moffit_transImpClsSpa.csv')
df_spaGE_res.to_csv('output/merfish_moffit_spaGE.csv')
df_stplus_res.to_csv('output/merfish_moffit_stPlus.csv')
df_tangram_res.to_csv('output/merfish_moffit_Tangram.csv')

df = pd.DataFrame(spa_cluster_metrics)
df['method'] = method_names
df.to_csv("output/clustering_5fold/merfish_moffit.csv")


In [2]:
df = pd.read_csv("output/clustering_5fold/merfish_moffit.csv", index_col=0)
df.groupby('method').mean()

Unnamed: 0_level_0,ARS,AMIS,HOMO,NMI
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
spaGE,0.039564,0.074565,0.082379,0.075245
stPlus,0.045572,0.082171,0.089317,0.082855
tangram,0.041625,0.075826,0.085092,0.076494
transImp,0.045357,0.078954,0.087845,0.079624
transImpCls,0.049668,0.080347,0.089345,0.081018
transImpClsSpa,0.046616,0.078892,0.087313,0.079566
transImpSpa,0.045435,0.083965,0.092097,0.084641


In [10]:
dict_df = {"TransImp":df_transImp, 
           "TransImpSpa":df_transImpSpa, 
           "TransImpCls":df_transImpCls,
           "TransImpClsSpa":df_transImpClsSpa,
           "spaGE": df_spaGE_res, "stPlus": df_stplus_res, 
           "Tangram":df_tangram_res
           }
sq.gr.spatial_autocorr(
    spa_adata,
    n_jobs=10,
)
sq.gr.spatial_autocorr(
    spa_adata,
    n_jobs=10,
    mode='geary',
)

dict_adata = {name: compute_autocorr(spa_adata[:, raw_shared_gene].copy(), df) for name, df in dict_df.items()}


In [11]:
from sklearn.metrics import mean_squared_error
moranIs = {name:mean_squared_error(spa_adata.uns['moranI'].loc[raw_shared_gene].I, imp_adata.uns['moranI'].loc[raw_shared_gene].I) for name, imp_adata in dict_adata.items()}
gearyCs = {name:mean_squared_error(spa_adata.uns['gearyC'].loc[raw_shared_gene].C, imp_adata.uns['gearyC'].loc[raw_shared_gene].C) for name, imp_adata in dict_adata.items()}

print("Mean Squared Error\nMoran's I:\n")
print("\n".join([f"\tTrue vs {method}: {score:.6f}" for method, score in moranIs.items()]))
print("Geary's C:\n")
print("\n".join([f"\tTrue vs {method}: {score:.6f}" for method, score in gearyCs.items()]))


Mean Squared Error
Moran's I:

	True vs TransImp: 0.008554
	True vs TransImpSpa: 0.005353
	True vs TransImpCls: 0.007834
	True vs TransImpClsSpa: 0.004871
	True vs spaGE: 0.003916
	True vs stPlus: 0.005143
	True vs Tangram: 0.008349
Geary's C:

	True vs TransImp: 0.008628
	True vs TransImpSpa: 0.005370
	True vs TransImpCls: 0.007751
	True vs TransImpClsSpa: 0.004840
	True vs spaGE: 0.003970
	True vs stPlus: 0.005206
	True vs Tangram: 0.008244
