## Sim Demo

### Imports

In [25]:
from imports import *

In [26]:
# overall imports
import importlib
import data

# data load
from data.data_load import load_transcriptome, load_connectome
importlib.reload(data.data_load)

# data utils
from data.data_utils import (
    reconstruct_connectome,
    reconstruct_upper_triangle,
    make_symmetric,
    expand_X_symmetric,
    expand_Y_symmetric,
    expand_X_symmetric_shared,
    expand_X_Y_symmetric_conn_only,
    expand_shared_matrices,
    expand_X_symmetric_w_conn, 
    process_cv_splits, 
    process_cv_splits_conn_only_model, 
    expanded_inner_folds_combined_plus_indices
)

import data.data_utils
importlib.reload(data.data_utils)

# cross-validation classes
import data.cv_split
from data.cv_split import RandomCVSplit, SchaeferCVSplit, CommunityCVSplit, SubnetworkCVSplit
importlib.reload(data.cv_split)

# prebuilt model classes
import models.base_models   
from models.base_models import ModelBuild
importlib.reload(models.base_models)

# metric classes
import models.metrics.distance_FC
import models.metrics.eval
from models.metrics.eval import ModelEvaluator
from models.metrics.eval import pearson_numpy, mse_numpy, r2_numpy, pearson_cupy, mse_cupy, r2_cupy
importlib.reload(models.metrics.eval)

# sim class
importlib.reload(sim.sim)
importlib.reload(sim.sim_run)

import sim.sim 
import sim.sim_utils
from sim.sim_utils import bytes2human, print_system_usage
from sim.sim import Simulation
from sim.sim_run import single_sim_run, open_pickled_results



#### Check job specs

In [21]:
print_system_usage()

total = psutil.disk_usage('/').total
print(bytes2human(total))

CPU Usage: 15.1%
RAM Usage: 34.7%
Available RAM: 246.1G
Total RAM: 377.1G
52.4G


In [17]:
print("XGBoost version:", xgboost.__version__)
print("cupy version:", cp.__version__)

XGBoost version: 2.0.3
cupy version: 13.1.0


In [22]:
GPUtil.getAvailable()
# if a number is seen a GPU is available

[]

In [23]:
GPUtil.getGPUs()

DEVICE_ID_LIST = GPUtil.getFirstAvailable()
DEVICE_ID = DEVICE_ID_LIST[0] # grab first element from list
if DEVICE_ID != None: 
    print('GPU found', DEVICE_ID)
    use_gpu = True

RuntimeError: Could not find an available GPU after 1 attempts with 900 seconds interval.

In [24]:
GPUtil.showUtilization()


| ID | GPU | MEM |
------------------


In [25]:
torch.cuda.is_available()


False

#### Simulation tests <a id="sims"></a>

In [27]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='ridge',
              feature_type=[{'structural': None}],
              connectome_target='FC',
              use_gpu=False,
              use_shared_regions=False,
              test_shared_regions=False,
              save_sim=True,
              search_method=('grid', 'mse')
              )

computing eig of laplacian
computing eig of adjacency
feature_type [{'structural': None}]
feature_name structural
processing_type None
features ['structural']
feature structural
X shape (114, 114)

 Test fold num: 1
(7140, 228) (7140,) (812, 228) (812,)
SEARCH METHOD ('grid', 'mse')
2
3
4
Fitting 3 folds for each of 8 candidates, totalling 24 fits
[CV 1/3] END alpha=0, solver=auto;, score=-3434954870325470301735878656.000 total time=   0.2s
[CV 2/3] END alpha=0, solver=auto;, score=-28160248021761073589025505280.000 total time=   0.1s
[CV 3/3] END alpha=0, solver=auto;, score=-7626964028110262880037765120.000 total time=   0.2s
[CV 1/3] END .........alpha=0.001, solver=auto;, score=-0.031 total time=   0.0s
[CV 2/3] END .........alpha=0.001, solver=auto;, score=-0.033 total time=   0.0s
[CV 3/3] END .........alpha=0.001, solver=auto;, score=-0.032 total time=   0.0s
[CV 1/3] END ..........alpha=0.01, solver=auto;, score=-0.031 total time=   0.0s
[CV 2/3] END ..........alpha=0.01, solve

[[{'model_parameters': {'alpha': 1000,
    'copy_X': True,
    'fit_intercept': True,
    'max_iter': None,
    'positive': False,
    'random_state': None,
    'solver': 'auto',
    'tol': 0.0001},
   'train_metrics': {'mse': 0.020278882393201227,
    'mae': 0.10268155683003194,
    'r2': -1.2951947192552453,
    'pearson_corr': 0.6488312728967608,
    'connectome_corr': 0.4720206263782361,
    'connectome_r2': -1.4197676573704183,
    'geodesic_distance': 14.506735990596791},
   'best_val_score': -0.03132316020996637,
   'test_metrics': {'mse': 0.036058105787796654,
    'mae': 0.14322012822528069,
    'r2': -3.031753003372747,
    'pearson_corr': 0.20495868827945796,
    'connectome_corr': 0.09884151549399994,
    'connectome_r2': -3.6483979437240537,
    'geodesic_distance': 8.083194963994085},
   'y_true': array([ 0.25718  ,  0.25718  ,  0.21614  ,  0.21614  ,  0.17879  ,
           0.17879  ,  0.20003  ,  0.20003  ,  0.18572  ,  0.18572  ,
           0.25026  ,  0.25026  ,  0.1166

In [None]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='ridge',
              feature_type=[
                            {'structural': 'spectral_A_20'}
                            ],
              use_gpu=False,
              use_shared_regions=False,
              test_shared_regions=False,
              save_sim=True,
              connectome_target='SC',
              search_method=('grid', 'mse')
              )

In [None]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='ridge',
              feature_type=[{'structural': None}, 
                            {'structural': 'spectral_A_20'}
                            ],
              use_gpu=True,
              use_shared_regions=False,
              test_shared_regions=False,
              save_sim=True,
              connectome_target='SC',
              search_method=('grid', 'mse')
              )