In [None]:
"""Collect metrics from failed comet-ml uploads and sent them back to comet-ml cloud."""
# pylint: disable=import-error,redefined-outer-name

In [None]:
from __future__ import annotations

import re
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List

from comet_ml.api import API  # type: ignore

found failed files with:

```bash
[rabyj@narval4 logs-dfreeze-2.1]$ find . -type f -name "*.e" -exec grep -lc "COMET WARNING: Failed to log run in comet.ml" {} +
```

In [None]:
def collect_experiment_data(file_path: str | Path) -> Dict[str, Dict[str, Any]]:
    """
    Collect log file lines and identify failed experiments based on the log file.

    Args:
    - file_path (str): The path to the log file.

    Returns:
    - Dict[str, Dict[str, List[str]]]: A dictionary where the key is the experiment key
      and the value is another dictionary with log lines and flags ('failed' or 'success').
    """
    experiments = defaultdict(
        lambda: {"log": [], "status": "success"}
    )  # Initialize dictionary to store experiment data.
    experiment_key = None  # Variable to keep track of the current experiment key.

    with open(file_path, "r", encoding="utf8") as f:
        for line in f:
            line = line.strip()

            # Check for a new experiment and update the experiment_key.
            match = re.search(r"https://www\.comet\.com/rabyj/epilap/([\w\d]+)", line)
            if match:
                experiment_key = match.group(1)

            # If we have a valid experiment_key, collect its metrics.
            if experiment_key:
                if any(keyword in line for keyword in ["COMET WARNING", "COMET ERROR"]):
                    experiments[experiment_key]["status"] = "failed"
                else:
                    # Here, you can add more conditions to filter out lines that are not metrics or parameters.
                    experiments[experiment_key]["log"].append(line)  # type: ignore

    return experiments

In [None]:
def extract_final_metrics_to_dict(
    experiment_data: Dict[str, Dict[str, List[str]]]
) -> Dict[str, Dict[str, Any]]:
    """
    Extract final metrics from "COMET INFO: Comet.ml ExistingExperiment Summary" section to a dictionary format.

    Args:
    - experiment_data (Dict[str, Dict[str, List[str]]]): The experiment data collected from the log file.

    Returns:
    - Dict[str, Dict[str, float]]: A dictionary where the key is the experiment key and the value is another
      dictionary containing final metric labels and their corresponding values.
    """
    final_metrics = {}

    for exp_key, exp_data in experiment_data.items():
        if exp_data["status"] == "failed":
            metrics_dict = {}
            summary_section = False  # Flag to track if we are in the "COMET INFO: Comet.ml ExistingExperiment Summary" section

            for log_line in exp_data["log"]:
                if "COMET INFO: Comet.ml ExistingExperiment Summary" in log_line:
                    summary_section = True
                    continue  # Move to next line

                irrelevant_lines = ["asset", "Experiment", "display"]
                if summary_section and all(
                    word not in log_line for word in irrelevant_lines
                ):
                    match = re.search(r"COMET INFO:([^:]+):(.*\d+.*)", log_line)
                    if match:
                        entry_label = match.group(1)
                        entry_value = match.group(2)
                        metrics_dict[entry_label.strip()] = entry_value.strip()

            if metrics_dict:  # Add only if the metrics_dict is not empty
                final_metrics[exp_key] = metrics_dict

    return final_metrics

In [None]:
def upload_data_to_existing_experiment(
    api: API, final_metrics: Dict[str, Dict[str, Any]]
):
    """
    Uploads metrics to existing experiments in Comet.ml.

    Given the Comet.ml API object and a dictionary containing final metrics
    for each experiment, this function uploads the metrics to the respective
    experiments on Comet.ml. Existing metrics are not overwritten.

    Args:
        api (API): The Comet.ml API object to interact with the Comet.ml service.
        final_metrics (Dict[str, Dict[str, str]]): A nested dictionary where the
            outer dictionary's keys are experiment keys and the inner dictionaries
            contain metric labels and their corresponding values.

    Prints:
        Information about which metrics were uploaded for which experiments or
        if there were no new data to upload.
    """
    for experiment in [api.get(f"rabyj/epilap/{key}") for key in final_metrics.keys()]:
        # Get the old data labels to avoid overwriting them.
        old_data_labels = [exp["name"] for exp in experiment.get_metrics_summary()]
        no_write = set(["url", "Name"] + old_data_labels)

        # Create a dictionary with the new data to be uploaded.
        new_data = {
            metric_label: metric_value
            for metric_label, metric_value in final_metrics[experiment.key].items()
            if metric_label not in no_write
        }

        # Upload the new data.
        if new_data:
            experiment.log_metrics(new_data)
            print(f"Uploaded data to experiment {experiment.key}.")
            print(f"New data: {new_data}")
        else:
            print(f"No new data to upload to experiment {experiment.key}.")

In [None]:
api = API()

In [None]:
for log_file in list(
    (Path.home() / "downloads" / "temp" / "failed_comet_upload").glob("*.e")
):
    data = collect_experiment_data(log_file)
    failed_upload_metrics = extract_final_metrics_to_dict(data)
    upload_data_to_existing_experiment(api, failed_upload_metrics)