In [6]:
import mlflow
from mlflow.tracking import MlflowClient
from concurrent.futures import ThreadPoolExecutor

In [None]:
#!docker compose up mlflow

In [8]:
import mlflow
import random
import time
import functools
from concurrent.futures import ThreadPoolExecutor

mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Test")

# --- Configuration ---
# 1. Set the number of concurrent runs
NUM_CONCURRENT_RUNS = 4
# 2. Set the total duration each run should log for (in seconds)
LOGGING_DURATION_SECONDS = 20 # e.g., log metrics for 20 seconds
# 3. Set the interval between logging metrics (in seconds)
LOG_INTERVAL_SECONDS = 2 # e.g., log a new metric every 2 seconds
# 4. Set the MLflow Experiment Name

# --- The function for a single MLflow Run (now logs over time) ---
def log_metrics_for_duration(run_index, duration_sec, interval_sec):
    """
    Starts an MLflow run and logs a random metric repeatedly
    at intervals for a specified total duration.
    """
    run_name = f"continuous_run_{run_index + 1}"
    start_time = time.time()
    step = 0
    print(f"Starting Run {run_index + 1} (Name: {run_name}) - will log for {duration_sec} seconds.")
    try:
        # Start the MLflow Run - the entire loop happens within this run
        with mlflow.start_run(run_name = run_name):
            # Loop until the desired duration has passed
            while (time.time() - start_time) < duration_sec:
                # Generate a random value
                random_metric = random.random() # Random float 0.0 to 1.0
                # Log the metric with a step - this allows viewing it as a time series in MLflow
                mlflow.log_metric("random_value_stream", random_metric, step=step)
                print(f"  Run {run_index + 1}: Step {step}, Logged random_value_stream = {random_metric:.4f}")
                # Increment step for the next metric
                step += 1
                # Wait for the specified interval before logging the next metric
                time_to_wait = max(0, interval_sec - (time.time() - start_time) % interval_sec) # Adjust sleep to stay roughly on interval
                time.sleep(time_to_wait if time_to_wait > 0.1 else interval_sec) # Ensure minimum sleep
            # Mark end time (optional logging)
            end_time = time.time()
            mlflow.log_metric("total_logging_duration_actual", end_time - start_time)
            mlflow.log_metric("total_steps_logged", step)
            print(f"Finished Run {run_index + 1} (Name: {run_name}) after ~{end_time - start_time:.2f} seconds.")
    except Exception as e:
        print(f"!!! Run {run_index + 1} (Name: {run_name}) failed during logging: {e}")
        # Optionally, log the failure status if the run was started
        if mlflow.active_run():
             mlflow.set_tag("status", "FAILED")
             mlflow.log_param("error", str(e))


# --- Parallel Execution ---
print(f"\nStarting {NUM_CONCURRENT_RUNS} MLflow runs concurrently...")
print(f"Each run will log metrics every {LOG_INTERVAL_SECONDS} seconds for a total of {LOGGING_DURATION_SECONDS} seconds.")

# Use ThreadPoolExecutor to manage 4 parallel threads
with ThreadPoolExecutor(max_workers = NUM_CONCURRENT_RUNS) as executor:
    # We need to pass arguments to our function. 'functools.partial' helps here.
    # It creates a new function version with some arguments pre-filled.
    task_with_args = functools.partial(
        log_metrics_for_duration,
        duration_sec = LOGGING_DURATION_SECONDS,
        interval_sec = LOG_INTERVAL_SECONDS
    )
    # 'map' will call 'task_with_args(run_index)' for each index in range(NUM_CONCURRENT_RUNS)
    # This effectively calls: log_metrics_for_duration(0, 20, 2), log_metrics_for_duration(1, 20, 2), etc.
    # Use list() to wait for all tasks submitted via map to complete.
    list(executor.map(task_with_args, range(NUM_CONCURRENT_RUNS)))


Starting 4 MLflow runs concurrently...
Each run will log metrics every 2 seconds for a total of 20 seconds.
Starting Run 1 (Name: continuous_run_1) - will log for 20 seconds.
Starting Run 2 (Name: continuous_run_2) - will log for 20 seconds.
Starting Run 3 (Name: continuous_run_3) - will log for 20 seconds.
Starting Run 4 (Name: continuous_run_4) - will log for 20 seconds.
  Run 2: Step 0, Logged random_value_stream = 0.3977
  Run 3: Step 0, Logged random_value_stream = 0.6441
  Run 1: Step 0, Logged random_value_stream = 0.5169
  Run 4: Step 0, Logged random_value_stream = 0.8061
  Run 2: Step 1, Logged random_value_stream = 0.4131
  Run 1: Step 1, Logged random_value_stream = 0.0742
  Run 3: Step 1, Logged random_value_stream = 0.3946
  Run 4: Step 1, Logged random_value_stream = 0.8674
  Run 2: Step 2, Logged random_value_stream = 0.0243
  Run 1: Step 2, Logged random_value_stream = 0.9829
  Run 3: Step 2, Logged random_value_stream = 0.4561
  Run 4: Step 2, Logged random_value_str

In [None]:
import mlflow
import random
import time
from concurrent.futures import ThreadPoolExecutor

# --- Configuration ---
# 1. Set the number of concurrent runs you want
NUM_CONCURRENT_RUNS = 4

# 2. Set the MLflow Experiment Name
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Test")

# --- The function for a single MLflow Run ---
def log_single_random_metric(run_index):
    """
    Starts an MLflow run and logs one random metric.
    This is the task executed by each parallel worker.
    """
    run_name = f"simple_run_{run_index + 1}"
    try:
        # Start a new MLflow Run
        with mlflow.start_run(run_name = run_name):
            # Get a random value
            random_metric = random.random() # Generate a random float between 0.0 and 1.0
            # Log the single metric
            mlflow.log_metric("random_value", random_metric)
            # Print confirmation
            print(f"Run {run_index + 1} (Name: {run_name}): Logged random_value = {random_metric:.4f}")
            # Simulate a tiny bit of work (optional, helps visualize concurrency)
            time.sleep(random.uniform(0.5, 1.5))
    except Exception as e:
        print(f"!!! Run {run_index + 1} (Name: {run_name}) failed: {e}")


# --- Parallel Execution ---
print(f"\nStarting {NUM_CONCURRENT_RUNS} MLflow runs concurrently...")

# Use ThreadPoolExecutor to manage parallel threads
# max_workers is set to the desired number of concurrent runs
with ThreadPoolExecutor(max_workers=NUM_CONCURRENT_RUNS) as executor:
    # map will call 'log_single_random_metric' for each number in range(NUM_CONCURRENT_RUNS)
    # It assigns these calls to the available worker threads (up to 4 simultaneously)
    # We use list() to ensure all tasks complete before moving on.
    list(executor.map(log_single_random_metric, range(NUM_CONCURRENT_RUNS)))


Starting 4 MLflow runs concurrently...
Run 4 (Name: simple_run_4): Logged random_value = 0.9255
Run 2 (Name: simple_run_2): Logged random_value = 0.0744
Run 3 (Name: simple_run_3): Logged random_value = 0.9227
Run 1 (Name: simple_run_1): Logged random_value = 0.4973
üèÉ View run simple_run_4 at: http://localhost:5000/#/experiments/363844526062362550/runs/8c18e448f86d486a87487a0a1afa307d
üß™ View experiment at: http://localhost:5000/#/experiments/363844526062362550
üèÉ View run simple_run_1 at: http://localhost:5000/#/experiments/363844526062362550/runs/1350d7a775bb49959b115266f7e6891e
üß™ View experiment at: http://localhost:5000/#/experiments/363844526062362550
üèÉ View run simple_run_2 at: http://localhost:5000/#/experiments/363844526062362550/runs/c815a58fa93247d3b849b91fdc2843c9
üß™ View experiment at: http://localhost:5000/#/experiments/363844526062362550
üèÉ View run simple_run_3 at: http://localhost:5000/#/experiments/363844526062362550/runs/ebe09409781f404d9fcca25212c39