In [1]:
import os
import shutil
import contextlib
from pathlib import Path

import numpy as np
import nbformat
import papermill as pm

#################################
# --- Experiment parameters ---
#################################
model_seeds = np.arange(0, 10, 1)
data_seeds = np.arange(10, 20, 1)
print (model_seeds)

#################################
# --- Generation Parameters ---
#################################
TEMPLATE_FOLDER = Path("base folder")
TEMPLATE_NOTEBOOK = TEMPLATE_FOLDER / "Train Low Test Models.ipynb"
PARAM_CELL_TAG = "parameters"

#################################
# --- Helper Functions ---
#################################
@contextlib.contextmanager
def working_dir(path: Path):
    """Temporarily switch working directory."""
    old_dir = Path.cwd()
    os.chdir(path)
    try:
        yield
    finally:
        os.chdir(old_dir)
        
def copy_code_files(src_folder: Path, dest_folder: Path, exclude: Path = None):
    """
    Copy all notebooks (*.ipynb) and Python scripts (*.py) from src_folder to dest_folder.
    Optionally exclude a specific file.
    """
    for file_path in src_folder.glob("*"):
        if file_path.suffix not in {".ipynb", ".py"}:
            continue
        if exclude is not None and file_path == exclude:
            continue
        shutil.copy(file_path, dest_folder / file_path.name)

def update_parameter_cell(nb_path: Path, model_seed: int, data_seed: int) -> nbformat.NotebookNode:
    """Load a notebook and replace the parameters cell."""
    nb = nbformat.read(nb_path, as_version=nbformat.NO_CONVERT)
    for cell in nb.cells:
        if cell.cell_type == "code" and PARAM_CELL_TAG in cell.metadata.get("tags", []):
            cell.source = f"""
# ==============================
# Base Input Parameters
# ==============================
# --- SEEDS ---
data_seed = {data_seed}            
model_seed = {model_seed}           

# --- Training configuration ---
epochs = 200            

# --- Dataset configuration ---
base_data_size = 50      
dataset_type = "data"   
dataset_quantities = [0, 500-50, 2000 - 50, 5000 - 50, 20000 - 50, 50000 - 50]

# --- Output configuration ---
base_output_dir = ""     
save_generated_dataset = True   
save_generated_models = True    
"""
            break
    return nb

def run_notebook_in_folder(folder: Path, notebook_name: str):
    """Execute a notebook in its folder."""
    with working_dir(folder):
        pm.execute_notebook(
            notebook_name,
            notebook_name,
            kernel_name="python3"
        )

#################################
# --- Main Loop ---
#################################
for model_seed, data_seed in zip(model_seeds, data_seeds):
    folder = Path(f"model_{model_seed}_data_{data_seed}")
    folder.mkdir(exist_ok=True, parents=True)

    copy_code_files(TEMPLATE_FOLDER, folder, TEMPLATE_NOTEBOOK)

    nb = update_parameter_cell(TEMPLATE_NOTEBOOK, model_seed, data_seed)
    output_notebook = folder / TEMPLATE_NOTEBOOK.name
    nbformat.write(nb, output_notebook)

    run_notebook_in_folder(folder, output_notebook.name)

    print(f"✅ Ran notebook with model_seed={model_seed}, data_seed={data_seed} in folder {folder}")

print("Done!")

[0 1 2 3 4 5 6 7 8 9]


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

✅ Ran notebook with model_seed=0, data_seed=10 in folder model_0_data_10


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

✅ Ran notebook with model_seed=1, data_seed=11 in folder model_1_data_11


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

✅ Ran notebook with model_seed=2, data_seed=12 in folder model_2_data_12


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

✅ Ran notebook with model_seed=3, data_seed=13 in folder model_3_data_13


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

✅ Ran notebook with model_seed=4, data_seed=14 in folder model_4_data_14


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

✅ Ran notebook with model_seed=5, data_seed=15 in folder model_5_data_15


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

✅ Ran notebook with model_seed=6, data_seed=16 in folder model_6_data_16


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

✅ Ran notebook with model_seed=7, data_seed=17 in folder model_7_data_17


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

✅ Ran notebook with model_seed=8, data_seed=18 in folder model_8_data_18


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

✅ Ran notebook with model_seed=9, data_seed=19 in folder model_9_data_19
Done!
