Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions _toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ parts:
- caption: Tutorials
chapters:
- file: tutorials/brain-disorder-diagnosis/notebook
- file: tutorials/drug-target-interaction/notebook-cross-domain
109 changes: 109 additions & 0 deletions tutorials/drug-target-interaction/configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from yacs.config import CfgNode

_C = CfgNode()

# ---------------------------------------------------------------------------- #
# DATA setting
# ---------------------------------------------------------------------------- #
_C.DATA = CfgNode()
_C.DATA.DATASET = None # Name of the dataset to use
_C.DATA.SPLIT = None # Data splitting strategy

# ---------------------------------------------------------------------------- #
# Drug feature extractor
# ---------------------------------------------------------------------------- #
_C.DRUG = CfgNode()
_C.DRUG.NODE_IN_FEATS = 7 # Number of input node features
_C.DRUG.NODE_IN_EMBEDDING = (
128 # Dimensionality of input node features after linear transformation
)
_C.DRUG.PADDING = True # Whether to apply padding
_C.DRUG.HIDDEN_LAYERS = [
128,
128,
128,
] # Sizes of hidden layers in the GCN feature extractor
_C.DRUG.MAX_NODES = 290 # Max number of nodes to pad to (used when PADDING=True)

# ---------------------------------------------------------------------------- #
# Protein feature extractor
# ---------------------------------------------------------------------------- #
_C.PROTEIN = CfgNode()
_C.PROTEIN.NUM_FILTERS = [
128,
128,
128,
] # Number of filters in each convolutional layer
_C.PROTEIN.KERNEL_SIZE = [3, 6, 9] # Kernel size for each convolutional layer
_C.PROTEIN.EMBEDDING_DIM = 128 # Dimension of character embedding for amino acids
_C.PROTEIN.PADDING = True # Whether to apply zero-padding to the embedding

# ---------------------------------------------------------------------------- #
# BCN setting
# ---------------------------------------------------------------------------- #
_C.BCN = CfgNode()
_C.BCN.HEADS = 2 # Number of attention heads in the Bilinear Attention Network

# ---------------------------------------------------------------------------- #
# MLP decoder
# ---------------------------------------------------------------------------- #
_C.DECODER = CfgNode()
_C.DECODER.NAME = "MLP" # Decoder type
_C.DECODER.IN_DIM = 256 # Input dimension to the MLP (typically fused BAN feature size)
_C.DECODER.HIDDEN_DIM = 512 # Hidden layer size in the MLP
_C.DECODER.OUT_DIM = 128 # Output dimension before the final classification layer
_C.DECODER.BINARY = 1 # Number of output classes

# ---------------------------------------------------------------------------- #
# SOLVER
# ---------------------------------------------------------------------------- #
_C.SOLVER = CfgNode()
_C.SOLVER.MAX_EPOCH = 100 # Total number of training epochs
_C.SOLVER.BATCH_SIZE = 64 # Batch size for training and evaluation
_C.SOLVER.NUM_WORKERS = 0 # Number of subprocesses for data loading
_C.SOLVER.LEARNING_RATE = 5e-5 # Learning rate for the main model
_C.SOLVER.DA_LEARNING_RATE = (
1e-3 # Learning rate for the domain adaptation (if DA is enabled)
)
_C.SOLVER.SEED = 2048 # Random seed for reproducibility

# ---------------------------------------------------------------------------- #
# RESULT
# ---------------------------------------------------------------------------- #
_C.RESULT = CfgNode()
_C.RESULT.SAVE_MODEL = True # Whether to save model checkpoints during training

# ---------------------------------------------------------------------------- #
# Domain adaptation
# ---------------------------------------------------------------------------- #
_C.DA = CfgNode()
_C.DA.TASK = (
False # False = in-domain splitting task, True = cross-domain splitting task
)
_C.DA.METHOD = "CDAN" # Domain adaptation method to use
_C.DA.USE = False # Whether to enable domain adaptation
_C.DA.INIT_EPOCH = 10 # Number of epochs to wait before applying domain adaptation
_C.DA.LAMB_DA = 1 # Initial value of λ (lambda) used to weight the domain adaptation loss in the total loss # Total loss = model loss + λ * domain loss
_C.DA.RANDOM_LAYER = False # Whether to use a random projection layer in CDAN
_C.DA.ORIGINAL_RANDOM = False # If True, uses the original RandomLayer from the CDAN paper (multi-input form) # If False, uses a simplified linear layer implementation.
_C.DA.RANDOM_DIM = None # Output dimensionality of the random layer (only used if RANDOM_LAYER is True)
_C.DA.USE_ENTROPY = True # Whether to use entropy-based weighting when computing domain adversarial loss

# ---------------------------------------------------------------------------- #
# Comet config, ignore it If not installed.
# ---------------------------------------------------------------------------- #
_C.COMET = CfgNode()
_C.COMET.USE = (
True # Enable Comet logging (set True if Comet is installed and configured)
)
_C.COMET.PROJECT_NAME = "drugban-23-May" # Comet project name (if applicable)
_C.COMET.EXPERIMENT_NAME = None # Optional experiment name (e.g., 'drugban-run-1')
_C.COMET.TAG = None # Comet tags (optional)
_C.COMET.API_KEY = "" # Comet API key (leave blank if unused)


# ---------------------------------------------------------------------------- #
# Function to return a clone of the default config
# ---------------------------------------------------------------------------- #
def get_cfg_defaults():
return _C.clone()
30 changes: 30 additions & 0 deletions tutorials/drug-target-interaction/experiments/DA_cross_domain.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# This is for cross-domain experiments using DrugBAN with domain adaptation.

DATA:
DATASET: "bindingdb" # bindingdb, biosnap
SPLIT: "cluster"

SOLVER:
BATCH_SIZE: 32
MAX_EPOCH: 100
LEARNING_RATE: 1e-4
DA_LEARNING_RATE: 5e-5
SEED: 20

DA:
TASK: True
USE: True
METHOD: "CDAN"
USE_ENTROPY: False
RANDOM_LAYER: True
ORIGINAL_RANDOM: True
RANDOM_DIM: 256
INIT_EPOCH: 10

DECODER:
BINARY: 2

# Config below only when you use comet
COMET:
EXPERIMENT_NAME: "DA_cross_domain"
TAG: "DrugBAN_CDAN"
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# This is for cross-domain experiments using DrugBAN without domain adaptation.


DATA:
DATASET: "bindingdb" # bindingdb, biosnap
SPLIT: "cluster"

SOLVER:
BATCH_SIZE: 32
MAX_EPOCH: 100
LEARNING_RATE: 5e-5
SEED: 20

DA:
TASK: True
USE: False

DECODER:
BINARY: 2

# Config below only when you use comet
COMET:
EXPERIMENT_NAME: "Non_DA_cross_domain"
TAG: "DrugBAN_Vanilla"
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# This is for in-domain experiments using DrugBAN without domain adaptation.

DATA:
DATASET: "bindingdb" # bindingdb, biosnap
SPLIT: "random" # random

SOLVER:
BATCH_SIZE: 64
MAX_EPOCH: 100
LEARNING_RATE: 5e-5
SEED: 20

DA:
TASK: False
USE: False

DECODER:
BINARY: 1

# Config below only when you use comet
COMET:
EXPERIMENT_NAME: "Non_DA_in_domain"
TAG: "DrugBAN_Vanilla"
Loading