# 1. Setup Environment & Configuration

In [None]:
import os
import torch

# Verify GPU availability
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Device: {torch.cuda.get_device_name(0)}")

# Define project paths (Adjust these relative to your notebook location)
# Assuming notebook is in the root or a subfolder, we set absolute paths for safety.
PROJECT_ROOT = os.path.abspath(".")
WORKSPACE = os.path.join(PROJECT_ROOT, "workspace")
DATA_DIR = os.path.join(WORKSPACE, "data")
LOGS_DIR = os.path.join(WORKSPACE, "logs")
CONFIGS_DIR = os.path.join(
    PROJECT_ROOT, "config"
)  

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(LOGS_DIR, exist_ok=True)

# Create .env file required by src/utils/path_utils.py
env_content = f"""
SOURCE_DATA_DIR={DATA_DIR}
LOGS_DIR={LOGS_DIR}
CONFIGS_DIR={CONFIGS_DIR}
MODELS_DIR={os.path.join(LOGS_DIR, "models")}
STORAGE_DIR={os.path.join(LOGS_DIR, "storage")}
"""

with open(".env", "w") as f:
    f.write(env_content)

print("Created .env file with the following configuration:")
print(env_content)

CUDA Available: True
Device: NVIDIA GeForce RTX 3070 Ti Laptop GPU
Created .env file with the following configuration:

SOURCE_DATA_DIR=/home/prg/GitHub/semi-supervised-gnn-drug-discovery/workspace/data
LOGS_DIR=/home/prg/GitHub/semi-supervised-gnn-drug-discovery/workspace/logs
CONFIGS_DIR=/home/prg/GitHub/semi-supervised-gnn-drug-discovery/config
MODELS_DIR=/home/prg/GitHub/semi-supervised-gnn-drug-discovery/workspace/logs/models
STORAGE_DIR=/home/prg/GitHub/semi-supervised-gnn-drug-discovery/workspace/logs/storage



## Experiment Runner Helper

In [13]:
import subprocess
import sys
import os


def run_experiment(module_path, overrides):
    """
    Runs a training script as a module with Hydra overrides.

    Args:
        module_path (str): Python module path (e.g. 'src.trainers.baseline_trainer')
        overrides (list): List of Hydra override strings
    """

    module_path = module_path.strip()

    cmd = [sys.executable, "-m", module_path] + overrides

    print(f"Running: {' '.join(cmd)}\n" + "=" * 50)

    cwd = os.getcwd()

    process = subprocess.Popen(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
        bufsize=1,
        universal_newlines=True,
        cwd=cwd,
    )

    # Print output in real-time
    with process.stdout:
        for line in iter(process.stdout.readline, ""):
            print(line, end="")

    return_code = process.wait()

    if return_code != 0:
        print(f"\nError running experiment. Return code: {return_code}")

        print(process.stderr.read())
    else:
        print("\nExperiment completed successfully.")


BASELINE_TRAINER = "src.trainers.baseline_trainer"
MEAN_TEACHER_TRAINER = "src.trainers.mean_teacher_trainer"

## Experiments

- Experiment 1: [0.35,0.35,0.1,0.2]"
- Experiment 2: [0.56,0.14,0.1,0.2]"
- Experiment 3: [0.63,0.07,0.1,0.2]"
- Experiment 4: [0.67,0.03,0.1,0.2]"

In [None]:
split = "[0.67,0.03,0.1,0.2]"
mode = "supervised"
dataset = 

### Supervised Baseline

In [None]:
# Common splits for semi-supervised learning [Unlabeled, Labeled, Val, Test]

# - Experiment 1: [0.35,0.35,0.1,0.2]"
# - Experiment 2: [0.56,0.14,0.1,0.2]"
# - Experiment 3: [0.63,0.07,0.1,0.2]"
# - Experiment 4: [0.67,0.03,0.1,0.2]"

SSL_SPLITS = "[0.67,0.03,0.1,0.2]"

# ---Supervised Baseline---
sider_baseline_overrides = [
    "dataset=moleculenet",
    "dataset.init.name=SIDER", # SIDER, TOX21 or PCBA
    f"dataset.init.splits={SSL_SPLITS}",
    "dataset.init.batch_size_train=32",
    "dataset.init.mu=1",
    "trainer.init.max_epochs=100",  # Reduced for demo purposes
    "logger.wandb.project=SIDER_Experiments",
]
run_experiment(BASELINE_TRAINER, sider_baseline_overrides)

Running: /home/prg/miniconda3/envs/monai_env/bin/python -m src.trainers.baseline_trainer dataset=moleculenet dataset.init.name=sider dataset.init.splits=[0.67,0.03,0.1,0.2] model=ginev2 trainer.init.max_epochs=100 logger.wandb.project=SIDER_Experiments
dataset:
  name: moleculenet
  init:
    _target_: src.data.moleculenet.MoleculeNetDataModule
    target: null
    batch_size_train: 256
    batch_size_inference: 64
    num_workers: 4
    splits:
    - 0.67
    - 0.03
    - 0.1
    - 0.2
    seed: 42
    mu: 22
    subset_size: null
    data_augmentation: false
    mode: supervised
    name: sider
model:
  name: GINEv2
  init:
    _target_: src.models.nets.gine_v2.GINEv2
    _recursive_: false
    embedding_dim: 16
    hidden_channels: 256
    encoder_num_heads: 4
    encoder_dropout: 0.1
    num_gnn_layers: 4
    gnn_mlp_layers: 2
    readout_mlp_layers: 2
    dropout: 0.5
    activation: relu
    pooling_type: mean
    use_residual: true
    learn_eps: true
lightning_module:
  init:
 

### Mean Teacher

In [None]:
# --- 3b. Mean Teacher (SIDER) ---
    # Common splits for semi-supervised learning [Unlabeled, Labeled, Val, Test]

# - Experiment 1: [0.35,0.35,0.1,0.2]"
# - Experiment 2: [0.56,0.14,0.1,0.2]"
# - Experiment 3: [0.63,0.07,0.1,0.2]"
# - Experiment 4: [0.67,0.03,0.1,0.2]"


SSL_SPLITS = "[0.67,0.03,0.1,0.2]"

sider_mt_overrides = [
    "dataset=moleculenet",
    "dataset.init.name=SIDER", # SIDER, TOX21 or PCBA
    f"dataset.init.splits={SSL_SPLITS}",
    "dataset.init.batch_size_train=32",
    "dataset.init.mu=1",
    "trainer.init.max_epochs=100",  # Reduced for demo purposes
    "logger.wandb.project=SIDER_Experiments"
]

run_experiment(MEAN_TEACHER_TRAINER, sider_mt_overrides)