In [74]:
import os
import torch
import pandas as pd
import scanpy as sc
import numpy as np
from sklearn import metrics
import multiprocessing as mp
import warnings
warnings.filterwarnings("ignore")

from scipy.stats import pearsonr
from skimage.metrics import structural_similarity as ssim
from sklearn.metrics import mean_squared_error as mse
from scipy.sparse import csr_matrix

from libpysal.weights import KNN
from esda.moran import Moran

from GraphST import GraphST

In [75]:
device = torch.device("cuda:5" if torch.cuda.is_available() else "cpu")

In [76]:
n_clusters = 20
dataset = 'Breast Cancer'

In [77]:
file_fold = '/home/lytq/Spatial-Transcriptomics-Benchmark/data/BRCA1/V1_Human_Breast_Cancer_Block_A_Section_1'
adata = sc.read_visium(file_fold, count_file='filtered_feature_bc_matrix.h5', load_images=True)
adata.var_names_make_unique()

In [78]:
adata_before = adata

In [79]:
model = GraphST.GraphST(adata, device=device)
adata = model.train()

AnnData object with n_obs × n_vars = 3798 × 36601
    obs: 'in_tissue', 'array_row', 'array_col'
    var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std'
    uns: 'spatial', 'hvg', 'log1p'
    obsm: 'spatial'
    layers: 'count'
Begin to train ST data...
Debugging


100%|██████████| 600/600 [00:09<00:00, 66.38it/s]


Optimization finished for ST data!


In [80]:
adata_before

AnnData object with n_obs × n_vars = 3798 × 36601
    obs: 'in_tissue', 'array_row', 'array_col'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'

In [81]:
adata_before.var_names

Index(['MIR1302-2HG', 'FAM138A', 'OR4F5', 'AL627309.1', 'AL627309.3',
       'AL627309.2', 'AL627309.5', 'AL627309.4', 'AP006222.2', 'AL732372.1',
       ...
       'AC133551.1', 'AC136612.1', 'AC136616.1', 'AC136616.3', 'AC136616.2',
       'AC141272.1', 'AC023491.2', 'AC007325.1', 'AC007325.4', 'AC007325.2'],
      dtype='object', length=36601)

In [124]:
adata_after = adata.obsm['emb']
orig_genes = adata_before.var_names.copy()
highly_variable_genes = adata.var_names[adata.var['highly_variable']]
adata_before = adata_before[:, highly_variable_genes].copy()


gene_df2 = pd.DataFrame(adata_after,
                        index=adata.obs.index,
                        columns=highly_variable_genes)

gene_df1 = pd.DataFrame(adata_before.X.toarray(),
                        index=adata.obs.index,
                        columns=adata_before.var.index)

In [125]:
gene_df2.shape, gene_df1.shape

((3798, 3000), (3798, 3000))

In [126]:
assert gene_df1.shape == gene_df2.shape

In [127]:
vec1 = gene_df1.values.flatten()
vec2 = gene_df2.values.flatten()

pcc, _ = pearsonr(vec1, vec2)
print(f"Pearson Correlation Coefficient: {pcc:.4f}")

Pearson Correlation Coefficient: 0.4220


In [128]:
arr1 = gene_df1.values
arr2 = gene_df2.values

ssim_value = ssim(arr1, arr2, data_range=arr2.max() - arr2.min())
print(f"Structural Similarity Index: {ssim_value:.4f}")

Structural Similarity Index: 0.3818


In [129]:
rmse_value = np.sqrt(mse(arr1, arr2))
print(f"Root Mean Squared Error: {rmse_value:.4f}")

Root Mean Squared Error: 11.1802


In [130]:
def compute_morans_i(exp_matrix, w):
    morans_i_list = []
    for i in range(exp_matrix.shape[1]): # Iterate over genes
        if i % 100 == 0:
            print(i)
        moran = Moran(exp_matrix[:, i], w)
        morans_i_list.append(moran.I)
    return np.mean(morans_i_list) # Return the average Moran's I value


spatial_coords = adata.obsm['spatial']
w = KNN.from_array(spatial_coords, k=5)
w.transform = 'r'

In [131]:
morans_i_before = compute_morans_i(gene_df1.values, w)
morans_i_after = compute_morans_i(gene_df2.values, w)
print(f"Moran's I before: {morans_i_before:.4f}")
print(f"Moran's I after: {morans_i_after:.4f}")

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
Moran's I before: 0.2228
Moran's I after: 0.7267


In [132]:
results = {
    'Model': 'GraphST',
    'Dataset': dataset,
    "Moran's I before": morans_i_before,
    "Moran's I after": morans_i_after,
    'PCC': pcc,
    'SSIM': ssim_value,
    'RMSE': rmse_value
}

output_path = '/home/lytq/Spatial-Transcriptomics-Benchmark/Results/Imputation/GraphST/BRCA1'
os.makedirs(output_path, exist_ok=True)
df_results = pd.DataFrame([results])
df_results.to_csv(os.path.join(output_path, 'metrics.csv'), index=False)