## MICCAI Test Pipeline (transformer)

### Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from imports import *

In [3]:
# overall imports
import importlib
import data

# importlib.reload(data.data_utils)
# importlib.reload(data.data_load)
#importlib.reload(sim.sim_utils)
# importlib.reload(models)

import models
import sim.sim 
import sim.sim_utils
from sim.sim_utils import bytes2human, print_system_usage
from sim.sim import Simulation
from sim.sim_run import single_sim_run, open_pickled_results


#### Check job specs

In [4]:
print_system_usage()

total = psutil.disk_usage('/').total
print(bytes2human(total))

CPU Usage: 16.3%
RAM Usage: 4.7%
Available RAM: 959.8G
Total RAM: 1007.0G
52.4G


In [5]:
print("XGBoost version:", xgboost.__version__)
print("cupy version:", cp.__version__)

XGBoost version: 2.0.3
cupy version: 13.1.0


In [6]:
GPUtil.getAvailable()
# if a number is seen a GPU is available

[0]

In [7]:
GPUtil.getGPUs()

DEVICE_ID_LIST = GPUtil.getFirstAvailable()
DEVICE_ID = DEVICE_ID_LIST[0] # grab first element from list
if DEVICE_ID != None: 
    print('GPU found', DEVICE_ID)
    use_gpu = True

GPU found 0


In [8]:
GPUtil.showUtilization()


| ID | GPU | MEM |
------------------
|  0 |  0% |  0% |


In [9]:
torch.cuda.is_available()

# Clear GPU memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [10]:
torch.cuda.empty_cache()

## Start test pipeline <a id="sims"></a>

In [None]:
def run_simulation_set(model_types=['dynamic_mlp'],
                      cv_types=['spatial', 'random'],
                      parcellations=['S100', 'S400'],
                      connectome_targets=['FC', 'SC'],
                      feature_types=['transcriptome', 'euclidean'],
                      hemisphere='left',
                      random_seeds=[42, 123, 456]):
    """
    Run a set of simulations with different combinations of parameters.
    
    Args:
        model_types (list): List of model types to test
        cv_types (list): List of cross-validation types
        parcellations (list): List of parcellation schemes
        connectome_targets (list): List of connectome targets
        feature_types (list): List of feature types
        hemisphere (str): Brain hemisphere to use ('left', 'right', 'both')
        random_seeds (list): List of random seeds for multiple runs
    """
    
    for seed in random_seeds:
        for model in model_types:
            for cv in cv_types:
                for parc in parcellations:
                    # Set hemisphere based on parcellation
                    current_hemisphere = 'both' if parc == 'S100' else 'left'
                    
                    for target in connectome_targets:
                        for feat in feature_types:
                            # Configure feature type dict
                            feat_dict = [{'transcriptome': None}] if feat == 'transcriptome' else [{'euclidean': None}]
                            
                            print(f"Running simulation with: {model}, {cv}, {parc}, {target}, {feat}, seed={seed}")
                            
                            # Run single simulation
                            single_sim_run(
                                cv_type=cv,
                                random_seed=seed,
                                model_type=model,
                                feature_type=feat_dict,
                                connectome_target=target,
                                use_gpu=True,
                                use_shared_regions=False,
                                test_shared_regions=False,
                                omit_subcortical=False,
                                parcellation=parc,
                                gene_list='0.2',
                                hemisphere=current_hemisphere,
                                search_method=('wandb', 'mse', 10),
                                save_sim=False,
                                track_wandb=True,
                                skip_cv=False
                            )
                            
                            # Clear GPU memory
                            if torch.cuda.is_available():
                                torch.cuda.empty_cache()
                            # Clear CPU memory
                            gc.collect()

# Example usage:
run_simulation_set(
    model_types=['shared_transformer'],
    cv_types=['random', 'spatial'],
    parcellations=['S100', 'S400'],
    connectome_targets=['FC', 'SC'],
    feature_types=['transcriptome'],
    hemisphere='both',
    random_seeds=[1, 2]
)

Running simulation with: shared_transformer, random, S100, FC, transcriptome, seed=1
Number of components for 95% variance PCA: 34
X shape: (114, 10760)
X_pca shape: (114, 34)
Y_sc shape: (114, 114)
Y_sc_spectralL shape: (114, 113)
Y_sc_spectralA shape: (114, 114)
Y_fc shape: (114, 114)
Coordinates shape: (114, 3)
Y shape (114, 114)
feature_name:  transcriptome
processing_type:  None
X shape (114, 10760)

 Test fold num: 1 X_train shape: (7140, 21520) Y_train shape: (7140,) X_test shape: (812, 21520) Y_test shape: (812,)


  return LooseVersion(v) >= LooseVersion(check)

ERROR: Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33masratzan[0m ([33malexander-ratzan-new-york-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


2
3
4




Create sweep with ID: vv7aubvm
Sweep URL: https://wandb.ai/alexander-ratzan-new-york-university/gx2conn/sweeps/vv7aubvm


[34m[1mwandb[0m: Agent Starting Run: 24ts0981 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	deep_hidden_dims: [512, 256, 128]
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	encoder_output_dim: 1
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	input_dim: 21520
[34m[1mwandb[0m: 	lambda_reg: 0
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	nhead: 2
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	token_encoder_dim: 10
[34m[1mwandb[0m: 	transformer_dropout: 0.3
[34m[1mwandb[0m: 	use_positional_encoding: True
[34m[1mwandb[0m: 	weight_decay: 0.002
ERROR: Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
  self._sweep_config = config_util.dict_from_config_file(

  self.comm = Comm(**args)

  from IPython.core.display import HTML, display  # type: ignore



Processing inner fold 0





Epoch 5/100, Train Loss: 0.3196, Val Loss: 0.0260
Epoch 10/100, Train Loss: 0.2565, Val Loss: 0.0265
Epoch 15/100, Train Loss: 0.2307, Val Loss: 0.0373
Epoch 20/100, Train Loss: 0.1961, Val Loss: 0.0228
Epoch 25/100, Train Loss: 0.1842, Val Loss: 0.0298
