## Sim Demo

### Imports

In [2]:
%load_ext autoreload
%autoreload 2

In [55]:
from imports import *

In [56]:
# overall imports
import importlib
import data

import sim.sim 
import sim.sim_utils
from sim.sim_utils import bytes2human, print_system_usage
from sim.sim import Simulation
from sim.sim_run import single_sim_run, open_pickled_results



#### Check job specs

In [57]:
print_system_usage()

total = psutil.disk_usage('/').total
print(bytes2human(total))

CPU Usage: 41.5%
RAM Usage: 36.0%
Available RAM: 644.5G
Total RAM: 1007.0G
52.4G


In [58]:
print("XGBoost version:", xgboost.__version__)
print("cupy version:", cp.__version__)

XGBoost version: 2.0.3
cupy version: 13.1.0


In [59]:
GPUtil.getAvailable()
# if a number is seen a GPU is available

[0]

In [60]:
GPUtil.getGPUs()

DEVICE_ID_LIST = GPUtil.getFirstAvailable()
DEVICE_ID = DEVICE_ID_LIST[0] # grab first element from list
if DEVICE_ID != None: 
    print('GPU found', DEVICE_ID)
    use_gpu = True

GPU found 0


In [61]:
GPUtil.showUtilization()


| ID | GPU | MEM |
------------------
|  0 |  0% | 38% |


In [62]:
torch.cuda.is_available()


True

#### Simulation tests <a id="sims"></a>

In [63]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='xgboost',
              feature_type=[{'transcriptome': None}],
              connectome_target='FC',
              use_gpu=True,
              use_shared_regions=False,
              test_shared_regions=False,
              omit_subcortical=False,
              parcellation='S456',
              gene_list='all_abagen',
              save_sim=False,
              search_method=('bayes', 'mse'),
              track_wandb=False
              )

Number of components for PCA: 36
X shape: (390, 107)
X_pca shape: (456, 36)
Y_sc shape: (390, 390)
Y_sc_spectralL shape: (390, 455)
Y_sc_spectralA shape: (390, 456)
Y_fc shape: (390, 390)
Coordinates shape: (390, 3)
Y shape (390, 390)
feature_name transcriptome
processing_type None
X shape (390, 107)
Fold 0 shapes - X_train: (84972, 214), X_test: (9506, 214), Y_train: (84972,), Y_test: (9506,)
Fold 1 shapes - X_train: (84972, 214), X_test: (9506, 214), Y_train: (84972,), Y_test: (9506,)
Fold 2 shapes - X_train: (85556, 214), X_test: (9312, 214), Y_train: (85556,), Y_test: (9312,)
Fold 3 shapes - X_train: (85556, 214), X_test: (9312, 214), Y_train: (85556,), Y_test: (9312,)

 Test fold num: 1
(84972, 214) (84972,) (9506, 214) (9506,)
SEARCH METHOD ('bayes', 'mse')
2
3
4
Fold 0 shapes - X_train: (37442, 214), X_test: (9506, 214), Y_train: (37442,), Y_test: (9506,)
Fold 1 shapes - X_train: (37830, 214), X_test: (9312, 214), Y_train: (37830,), Y_test: (9312,)
Fold 2 shapes - X_train: (3783

[[{'model_parameters': {'objective': 'reg:squarederror',
    'base_score': None,
    'booster': None,
    'callbacks': None,
    'colsample_bylevel': None,
    'colsample_bynode': None,
    'colsample_bytree': 0.8,
    'device': 'cuda',
    'early_stopping_rounds': None,
    'enable_categorical': False,
    'eval_metric': None,
    'feature_types': None,
    'gamma': None,
    'grow_policy': None,
    'importance_type': None,
    'interaction_constraints': None,
    'learning_rate': 0.3,
    'max_bin': None,
    'max_cat_threshold': None,
    'max_cat_to_onehot': None,
    'max_delta_step': None,
    'max_depth': 3,
    'max_leaves': None,
    'min_child_weight': None,
    'missing': nan,
    'monotone_constraints': None,
    'multi_strategy': None,
    'n_estimators': 250,
    'n_jobs': None,
    'num_parallel_tree': None,
    'random_state': 42,
    'reg_alpha': 0.1,
    'reg_lambda': 0.0001,
    'sampling_method': None,
    'scale_pos_weight': None,
    'subsample': 0.8,
    'tree_m

In [12]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='xgboost',
              feature_type=[{'transcriptome': None}],
              connectome_target='FC',
              use_gpu=True,
              use_shared_regions=False,
              test_shared_regions=False,
              omit_subcortical=True,
              parcellation='S456',
              gene_list='richiardi2015',
              save_sim=True,
              search_method=('bayes', 'mse'),
              track_wandb=False
              )

feature_name transcriptome
processing_type None
X shape (100, 1906)

 Test fold num: 1
(5550, 3812) (5550,) (600, 3812) (600,)
SEARCH METHOD ('bayes', 'mse')
2
3
4
ACCELERATING
Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV 1/3] END colsample_bytree=0.8, device=cuda, learning_rate=0.3, max_depth=3, n_estimators=250, n_gpus=-1, random_state=42, reg_alpha=0.1, reg_lambda=0.0001, subsample=0.8, tree_method=gpu_hist, verbosity=0;, score=(train=-0.001, test=-0.031) total time=   0.5s
[CV 2/3] END colsample_bytree=0.8, device=cuda, learning_rate=0.3, max_depth=3, n_estimators=250, n_gpus=-1, random_state=42, reg_alpha=0.1, reg_lambda=0.0001, subsample=0.8, tree_method=gpu_hist, verbosity=0;, score=(train=-0.001, test=-0.025) total time=   0.4s
[CV 3/3] END colsample_bytree=0.8, device=cuda, learning_rate=0.3, max_depth=3, n_estimators=250, n_gpus=-1, random_state=42, reg_alpha=0.1, reg_lambda=0.0001, subsample=0.8, tree_method=gpu_hist, verbosity=0;, score=(train=-0.001, te

[[{'model_parameters': {'objective': 'reg:squarederror',
    'base_score': None,
    'booster': None,
    'callbacks': None,
    'colsample_bylevel': None,
    'colsample_bynode': None,
    'colsample_bytree': 0.6,
    'device': 'cuda',
    'early_stopping_rounds': None,
    'enable_categorical': False,
    'eval_metric': None,
    'feature_types': None,
    'gamma': None,
    'grow_policy': None,
    'importance_type': None,
    'interaction_constraints': None,
    'learning_rate': 0.01,
    'max_bin': None,
    'max_cat_threshold': None,
    'max_cat_to_onehot': None,
    'max_delta_step': None,
    'max_depth': 5,
    'max_leaves': None,
    'min_child_weight': None,
    'missing': nan,
    'monotone_constraints': None,
    'multi_strategy': None,
    'n_estimators': 150,
    'n_jobs': None,
    'num_parallel_tree': None,
    'random_state': 42,
    'reg_alpha': 0,
    'reg_lambda': 0,
    'sampling_method': None,
    'scale_pos_weight': None,
    'subsample': 0.8,
    'tree_method'

In [None]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='ridge',
              feature_type=[{'euclidean': None}],
              connectome_target='FC',
              use_gpu=False,
              use_shared_regions=True,
              test_shared_regions=False,
              save_sim=False,
              search_method=('grid', 'mse'),
              track_wandb=True
              )

In [None]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='dynamic_nn',
              feature_type=[{'euclidean': None}],
              connectome_target='FC',
              use_gpu=True,
              use_shared_regions=True,
              test_shared_regions=False,
              save_sim=False,
              search_method=('grid', 'mse'),
              track_wandb=True
              )

In [None]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='xgboost',
              feature_type=[{'euclidean': None}],
              connectome_target='FC',
              use_gpu=True,
              use_shared_regions=True,
              test_shared_regions=False,
              save_sim=False,
              search_method=('bayes', 'pearson'),
              track_wandb=True
              )

In [None]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='xgboost',
              feature_type=[{'structural': 'spectral_A_20'}],
              connectome_target='FC',
              use_gpu=True,
              use_shared_regions=True,
              test_shared_regions=False,
              save_sim=False,
              search_method=('bayes', 'pearson'),
              track_wandb=True
              )

In [None]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='xgboost',
              feature_type=[{'transcriptome': None}],
              connectome_target='FC',
              use_gpu=True,
              use_shared_regions=True,
              test_shared_regions=False,
              save_sim=False,
              search_method=('bayes', 'pearson'),
              track_wandb=True
              )

In [None]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='xgboost',
              feature_type=[{'transcriptome': None}, {'structural': 'spectral_A_20'}],
              connectome_target='FC',
              use_gpu=True,
              use_shared_regions=True,
              test_shared_regions=False,
              save_sim=False,
              search_method=('bayes', 'pearson'),
              track_wandb=True
              )

In [None]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='dynamic_nn',
              feature_type=[{'transcriptome': None}],
              connectome_target='FC',
              use_gpu=True,
              use_shared_regions=True,
              test_shared_regions=False,
              save_sim=False,
              search_method=('grid', 'mse'),
              track_wandb=True
              )

In [None]:
single_sim_run(
              cv_type='random',
              random_seed=42,
              model_type='ridge',
              feature_type=[{'transcriptome': None}, 
                            {'structural': 'spectral_A_20'}
                            ],
              use_gpu=False,
              use_shared_regions=False,
              test_shared_regions=False,
              save_sim=True,
              connectome_target='FC',
              search_method=('grid', 'mse')
              )