In [1]:
import os,sys
sys.path.append(os.path.realpath(os.path.join(os.getcwd(), '..')))
import warnings
warnings.filterwarnings("ignore")

In [2]:
import scanpy as sc
import pandas as pd
import CPS

In [3]:
opt = CPS.config()
args = opt.parse_args(['--seed', '2026'])
args.prep_scale = False
args.sigma=0.8
args.dataset_path = '/mnt/d/Dataset/SRT_Dataset/1-DLPFC/'
args

Namespace(batch_size=256, clusters=7, coord_dim=2, dataset_path='/mnt/d/Dataset/SRT_Dataset/1-DLPFC/', decoder_latent=[256, 512, 1024], distill=1.0, dropout=0.2, er_w=0.05, flow='source_to_target', freq=32, gpu=0, hvgs=3000, inr_latent=[256, 256, 256], k_list=[0, 1, 2, 3, 4, 5, 6, 7], latent_dim=64, lr=0.001, max_epoch=1000, max_neighbors=6, n_spot=0, num_heads=4, prep_scale=False, radius=150, seed=2026, self_loops=True, sigma=0.8, weight_decay=0.0001)

In [4]:
sections = ['151507', '151508', '151509', '151510',
            '151669', '151670', '151671', '151672',
            '151673', '151674', '151675', '151676']
mask_ratio = 0.5

In [5]:
for section in sections:
    CPS.set_random_seed(args.seed)
    adata = sc.read_visium(os.path.join(args.dataset_path+section))
    Ann_df = pd.read_csv(os.path.join(args.dataset_path+'1-DLPFC_annotations/', section+'_truth.txt'), 
                        sep='\t', header=None, index_col=0)
    Ann_df.columns = ['Ground Truth']
    adata.obs['Ground Truth'] = Ann_df.loc[adata.obs_names, 'Ground Truth']
    adata.var_names_make_unique()
    adata.layers["counts"] = adata.X.copy()
    sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=args.hvgs)
    adata_hvg = adata[:, adata.var['highly_variable']].copy()
    sc.pp.normalize_total(adata_hvg, target_sum=1e4)
    sc.pp.log1p(adata_hvg)
    sc.pp.scale(adata_hvg)
    
    counts_layer = adata_hvg.layers['counts']
    adata_hvg.obsm['hvg_features'] = adata_hvg.X
    adata_hvg.obsm['hvg_counts'] = counts_layer.toarray()
    
    spatial_data = CPS.SpatialGraphBuilder(args)
    train_data, test_data, mask_pattern = spatial_data.genes_perturb(adata_hvg, 
                                                                     mask_ratio=mask_ratio, 
                                                                     method='rknn', 
                                                                     mask_value=0.0)
    cps_model = CPS.CPSTrainer(args)
    cps_model.fit(train_data, verbose=False)
    
    metrics = cps_model.evaluate_genes_imputation(test_data, 
                                                  mask_pattern=mask_pattern, 
                                                  use_log1p=True, 
                                                  experiment_name=f'DLPFC_GI_{section}')
    

  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 12678000, Masked entries: 6339000 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.042671
RMSE:          0.206570
MAE:           0.095860
R²:            0.605398
Pearson:       0.779662
Spearman:      0.406730

[sample mean metrics]
CS:               0.770469
CS nonzero:       0.857919
Pearson:          0.751988
Pearson nonzero:  0.792941
Spearman:         0.393109
Spearman nonzero: 0.584425
SAM:              39.270324°
SAM nonzero:      30.624411°
MSE:              0.042671
MSE nonzero:      0.323159
MAE:              0.095860
MAE nonzero:      0.500782

 num of sample: 4226.0
Metrics saved to: ./results/DLPFC_GI_151507_metrics.json


  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 13152000, Masked entries: 6576000 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.038837
RMSE:          0.197070
MAE:           0.090246
R²:            0.602416
Pearson:       0.777620
Spearman:      0.380097

[sample mean metrics]
CS:               0.770143
CS nonzero:       0.856530
Pearson:          0.754282
Pearson nonzero:  0.824281
Spearman:         0.364645
Spearman nonzero: 0.576552
SAM:              39.362642°
SAM nonzero:      30.786021°
MSE:              0.038837
MSE nonzero:      0.331404
MAE:              0.090246
MAE nonzero:      0.511973

 num of sample: 4384.0
Metrics saved to: ./results/DLPFC_GI_151508_metrics.json


  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 14367000, Masked entries: 7183500 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.043551
RMSE:          0.208688
MAE:           0.098318
R²:            0.596997
Pearson:       0.774155
Spearman:      0.408025

[sample mean metrics]
CS:               0.764041
CS nonzero:       0.852577
Pearson:          0.744487
Pearson nonzero:  0.786743
Spearman:         0.392908
Spearman nonzero: 0.584537
SAM:              39.852441°
SAM nonzero:      31.200706°
MSE:              0.043551
MSE nonzero:      0.326679
MAE:              0.098318
MAE nonzero:      0.504496

 num of sample: 4789.0
Metrics saved to: ./results/DLPFC_GI_151509_metrics.json


  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 13902000, Masked entries: 6951000 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.040206
RMSE:          0.200513
MAE:           0.092713
R²:            0.591610
Pearson:       0.771309
Spearman:      0.389318

[sample mean metrics]
CS:               0.763565
CS nonzero:       0.853383
Pearson:          0.746100
Pearson nonzero:  0.793384
Spearman:         0.377157
Spearman nonzero: 0.582014
SAM:              39.925748°
SAM nonzero:      31.172402°
MSE:              0.040206
MSE nonzero:      0.326890
MAE:              0.092713
MAE nonzero:      0.506347

 num of sample: 4634.0
Metrics saved to: ./results/DLPFC_GI_151510_metrics.json


  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 10983000, Masked entries: 5491500 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.053504
RMSE:          0.231310
MAE:           0.116624
R²:            0.641503
Pearson:       0.801767
Spearman:      0.458771

[sample mean metrics]
CS:               0.792843
CS nonzero:       0.874992
Pearson:          0.771340
Pearson nonzero:  0.784753
Spearman:         0.443361
Spearman nonzero: 0.632807
SAM:              37.174929°
SAM nonzero:      28.650369°
MSE:              0.053504
MSE nonzero:      0.308506
MAE:              0.116624
MAE nonzero:      0.483600

 num of sample: 3661.0
Metrics saved to: ./results/DLPFC_GI_151669_metrics.json


  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 10494000, Masked entries: 5247000 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.053509
RMSE:          0.231320
MAE:           0.114545
R²:            0.617326
Pearson:       0.786584
Spearman:      0.440790

[sample mean metrics]
CS:               0.778672
CS nonzero:       0.863862
Pearson:          0.757169
Pearson nonzero:  0.780677
Spearman:         0.423192
Spearman nonzero: 0.603926
SAM:              38.501605°
SAM nonzero:      29.951070°
MSE:              0.053509
MSE nonzero:      0.334047
MAE:              0.114545
MAE nonzero:      0.502369

 num of sample: 3498.0
Metrics saved to: ./results/DLPFC_GI_151670_metrics.json


  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 12330000, Masked entries: 6165000 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.053440
RMSE:          0.231172
MAE:           0.115626
R²:            0.620493
Pearson:       0.788716
Spearman:      0.460036

[sample mean metrics]
CS:               0.775578
CS nonzero:       0.870944
Pearson:          0.751759
Pearson nonzero:  0.755627
Spearman:         0.442686
Spearman nonzero: 0.619127
SAM:              38.676502°
SAM nonzero:      29.103368°
MSE:              0.053440
MSE nonzero:      0.302162
MAE:              0.115626
MAE nonzero:      0.479703

 num of sample: 4110.0
Metrics saved to: ./results/DLPFC_GI_151671_metrics.json


  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 12045000, Masked entries: 6022500 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.052694
RMSE:          0.229552
MAE:           0.113141
R²:            0.622888
Pearson:       0.789514
Spearman:      0.458609

[sample mean metrics]
CS:               0.775725
CS nonzero:       0.863016
Pearson:          0.751460
Pearson nonzero:  0.754509
Spearman:         0.441519
Spearman nonzero: 0.619768
SAM:              38.649294°
SAM nonzero:      29.948991°
MSE:              0.052694
MSE nonzero:      0.314682
MAE:              0.113140
MAE nonzero:      0.492356

 num of sample: 4015.0
Metrics saved to: ./results/DLPFC_GI_151672_metrics.json


  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 10917000, Masked entries: 5458500 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.063955
RMSE:          0.252893
MAE:           0.136957
R²:            0.623284
Pearson:       0.792498
Spearman:      0.498769

[sample mean metrics]
CS:               0.784771
CS nonzero:       0.873549
Pearson:          0.755163
Pearson nonzero:  0.738012
Spearman:         0.483608
Spearman nonzero: 0.629669
SAM:              37.838537°
SAM nonzero:      28.759388°
MSE:              0.063955
MSE nonzero:      0.287344
MAE:              0.136957
MAE nonzero:      0.463558

 num of sample: 3639.0
Metrics saved to: ./results/DLPFC_GI_151673_metrics.json


  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 11019000, Masked entries: 5509500 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.074598
RMSE:          0.273126
MAE:           0.154778
R²:            0.642298
Pearson:       0.803355
Spearman:      0.537540

[sample mean metrics]
CS:               0.806874
CS nonzero:       0.893459
Pearson:          0.775752
Pearson nonzero:  0.760245
Spearman:         0.524494
Spearman nonzero: 0.648961
SAM:              35.858162°
SAM nonzero:      26.399866°
MSE:              0.074598
MSE nonzero:      0.274736
MAE:              0.154778
MAE nonzero:      0.447390

 num of sample: 3673.0
Metrics saved to: ./results/DLPFC_GI_151674_metrics.json


  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 10776000, Masked entries: 5388000 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.055884
RMSE:          0.236398
MAE:           0.123314
R²:            0.571488
Pearson:       0.761124
Spearman:      0.457993

[sample mean metrics]
CS:               0.745760
CS nonzero:       0.855110
Pearson:          0.715306
Pearson nonzero:  0.702752
Spearman:         0.440559
Spearman nonzero: 0.588349
SAM:              41.326215°
SAM nonzero:      30.856143°
MSE:              0.055884
MSE nonzero:      0.292209
MAE:              0.123314
MAE nonzero:      0.471952

 num of sample: 3592.0
Metrics saved to: ./results/DLPFC_GI_151675_metrics.json


  0%|          | 0/1000 [00:00<?, ?it/s]

Performing genes imputation evaluation...
Total entries: 10380000, Masked entries: 5190000 (50.0%)

Computing GENE metrics on **Log1p Transformed** data (Masked only)...

Spatial imputation metrics [Log1p Transformed]

[global flatten metrics]
MSE:           0.059315
RMSE:          0.243548
MAE:           0.130201
R²:            0.589718
Pearson:       0.772431
Spearman:      0.475646

[sample mean metrics]
CS:               0.765660
CS nonzero:       0.867820
Pearson:          0.735653
Pearson nonzero:  0.713903
Spearman:         0.461812
Spearman nonzero: 0.608759
SAM:              39.656988°
SAM nonzero:      29.492031°
MSE:              0.059315
MSE nonzero:      0.285970
MAE:              0.130201
MAE nonzero:      0.463039

 num of sample: 3460.0
Metrics saved to: ./results/DLPFC_GI_151676_metrics.json
