In [1]:
import sys
import os
import hydra
from omegaconf import OmegaConf

# add parent directory to path
sys.path.append(os.path.abspath(os.path.join('..')))

# initialize hydra

In [2]:
hydra.initialize(config_path="../config", version_base="1.1")

# Choose which config to load
config_name = "essential_genes_exp"  # Change this to use a different config
print(f"Loading config: {config_name}")

# Load the config
cfg = hydra.compose(
    config_name=config_name, 
    overrides=["generator=ddpm", "model=diffusion_gnn"]
)

# Display the loaded config
print(OmegaConf.to_yaml(cfg))

Loading config: essential_genes_exp
dataset:
  _target_: datasets.essential_genes_dataset.EssentialGenesDataset
  h5ad_file: /orcd/data/omarabu/001/njwfish/DistributionEmbeddings/data/essential_genes/essential_gene_knockouts_raw.h5ad
  set_size: 100
  data_shape:
  - 11907
  seed: ${seed}
encoder:
  _target_: encoder.encoders.DistributionEncoderGNN
  in_dim: ${dataset.data_shape[0]}
  latent_dim: ${experiment.latent_dim}
  hidden_dim: ${experiment.hidden_dim}
  set_size: ${experiment.set_size}
  layers: 2
  fc_layers: 2
model:
  _target_: model.gnn.GNN
  gnn_dim: ${dataset.data_shape[0]}
  in_dims:
  - ${experiment.latent_dim}
  - 1
  hidden_dim: ${experiment.hidden_dim}
  embedding_dim: 64
  shared_embedding_dim: 128
generator:
  _target_: generator.ddpm.DDPM
  model: ${model}
  betas:
  - 0.0001
  - 0.02
  n_T: 400
  drop_prob: 0.1
  noise_shape: ${dataset.data_shape}
optimizer:
  _target_: torch.optim.Adam
  _partial_: true
  lr: ${experiment.lr}
  betas:
  - 0.9
  - 0.999
  eps: 1.

In [3]:
from torch.utils.data import DataLoader

dataset = hydra.utils.instantiate(cfg.dataset)
        


(966728, 11907)
Loaded 9240 sets (cell type x perturbation combinations)


In [4]:
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

In [5]:
# Create encoder
encoder = hydra.utils.instantiate(cfg.encoder)

In [6]:
# Create generator (with model already instantiated)
generator = hydra.utils.instantiate(cfg.generator)

In [7]:
# Get model parameters
model_parameters = list(encoder.parameters()) + list(generator.model.parameters())

# Create optimizer and scheduler
optimizer = hydra.utils.instantiate(cfg.optimizer)(params=model_parameters)
scheduler = hydra.utils.instantiate(cfg.scheduler)(optimizer=optimizer)

# Create trainer
trainer = hydra.utils.instantiate(cfg.training)

In [8]:
output_dir, stats = trainer.train(
    encoder=encoder,
    generator=generator,
    dataloader=dataloader,
    optimizer=optimizer,
    scheduler=scheduler,
    output_dir=os.path.abspath('../outputs'),
    config=cfg,
)

sampling timestep 400

OutOfMemoryError: CUDA out of memory. Tried to allocate 36.34 GiB. GPU 0 has a total capacity of 79.10 GiB of which 17.42 GiB is free. Including non-PyTorch memory, this process has 61.67 GiB memory in use. Of the allocated memory 59.98 GiB is allocated by PyTorch, and 994.54 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)