In [2]:
import GPTNeoXColab
import os
from pathlib import Path
ROOT_DIR = GPTNeoXColab.utils.colab.find_project_root()
RELATIVE_ROOT_DIR = os.path.relpath(ROOT_DIR, Path.cwd())

In [None]:
experiment_name = "experiment1"  # Change this to dynamically load different experiments

In [22]:
import os
import dagshub
import mlflow
from omegaconf import DictConfig, OmegaConf
from hydra.core.global_hydra import GlobalHydra
from hydra import initialize, compose

# Clear Hydra's global state if it’s already initialized
if GlobalHydra.instance().is_initialized():
    GlobalHydra.instance().clear()

initialize(config_path=f"{RELATIVE_ROOT_DIR}/configs", version_base="1.1")

cfg = compose(config_name="hydra", overrides=[f"experiments={experiment_name}"])

# Set MLflow tracking URI for DagsHub
tracking_uri = f"https://dagshub.com/{cfg.dagshub.repo_owner}/{cfg.dagshub.repo_name}.mlflow"
mlflow.set_tracking_uri(tracking_uri)

# Initialize DagsHub logging
try:
    dagshub.init(repo_owner=cfg.dagshub.repo_owner, repo_name=cfg.dagshub.repo_name, mlflow=True)
except Exception as e:
    print(f"Failed to initialize DagsHub logging: {e}")

def train_model(cfg: DictConfig):
    print("Running experiment:", cfg.experiment_name)
    print(OmegaConf.to_yaml(cfg))

    # Log parameters
    mlflow.log_params(OmegaConf.to_container(cfg, resolve=True))

    # Example dummy training loop
    for epoch in range(10):
        loss = 0.4 - epoch * 0.01  # Dummy decreasing loss
        accuracy = epoch * 0.1     # Dummy increasing accuracy
        mlflow.log_metric("train_loss", loss, step=epoch)
        mlflow.log_metric("train_accuracy", accuracy, step=epoch)

    print("Training complete.")

# Set the experiment name and start the run
mlflow.set_experiment(experiment_name)
with mlflow.start_run():
    train_model(cfg)


Tracking URI: https://dagshub.com/MarkNZed/GPT-NeoX-Colab.mlflow


2024/11/10 13:35:17 INFO mlflow.tracking.fluent: Experiment with name 'experiment1' does not exist. Creating a new experiment.


Running experiment: base_experiment
output_dir: ../outputs
experiment_name: base_experiment
seed: 42
dagshub:
  repo_owner: MarkNZed
  repo_name: GPT-NeoX-Colab
experiments:
  experiment_name: experiment_1
  seed: 123

Training complete.


2024/11/10 13:35:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run awesome-gnat-811 at: https://dagshub.com/MarkNZed/GPT-NeoX-Colab.mlflow/#/experiments/2/runs/f96c7424acb44f34a69f3c723905f2d6.
2024/11/10 13:35:30 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/MarkNZed/GPT-NeoX-Colab.mlflow/#/experiments/2.
