# 2024-08-20-Analysis: Final GEARS evaluation on test split

In [None]:
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import optuna

from gears_helpers import run_gears

%load_ext autoreload
%autoreload 2

## Load best trial hparams

In [2]:
best_params = {
    'hidden_size': 384,
    'num_go_gnn_layers': 1,
    'num_gene_gnn_layers': 2,
    'decoder_hidden_size': 48,
    'num_similar_genes_go_graph': 25,
    'num_similar_genes_co_express_graph': 15,
    'coexpress_threshold': 0.31,
    'lr': 3.863459970955458e-05,
    'wd': 0.0009967351093758648
}

[I 2024-08-20 21:07:31,666] Using an existing study with name 'norman19_gears_optuna' instead of creating a new one.


## Run best trials

In [None]:
summary_metrics_list = []
for seed in [301, 302, 303, 304]:
    run_params = best_params.copy()
    run_params['seed'] = seed
    run_params['eval_split'] = 'test'
    
    summary_metrics = run_gears(
        **run_params,
    )
    summary_metrics_list.append(summary_metrics)

In [10]:
summary_metrics_list

[rmse_average         0.068426
 rmse_rank_average    0.043478
 cosine_logfc         0.435436
 cosine_rank_logfc    0.052930
 dtype: float64,
 rmse_average         0.067806
 rmse_rank_average    0.059074
 cosine_logfc         0.439017
 cosine_rank_logfc    0.057656
 dtype: float64,
 rmse_average         0.067963
 rmse_rank_average    0.045369
 cosine_logfc         0.447089
 cosine_rank_logfc    0.043478
 dtype: float64,
 rmse_average         0.070721
 rmse_rank_average    0.072779
 cosine_logfc         0.442385
 cosine_rank_logfc    0.052457
 dtype: float64]

In [11]:
summary_metrics_df = pd.DataFrame(summary_metrics_list)
summary_metrics_df

Unnamed: 0,rmse_average,rmse_rank_average,cosine_logfc,cosine_rank_logfc
0,0.068426,0.043478,0.435436,0.05293
1,0.067806,0.059074,0.439017,0.057656
2,0.067963,0.045369,0.447089,0.043478
3,0.070721,0.072779,0.442385,0.052457


In [14]:
summary_metrics_df.mean(0)

rmse_average         0.068729
rmse_rank_average    0.055175
cosine_logfc         0.440982
cosine_rank_logfc    0.051630
dtype: float64

In [16]:
summary_metrics_df.std(0)

rmse_average         0.001354
rmse_rank_average    0.013639
cosine_logfc         0.004963
cosine_rank_logfc    0.005920
dtype: float64