In [None]:
"""Some results from two different groups_second_level_name runs were mixed up, so I need to unmix them."""
# pylint: disable=import-error,redefined-outer-name,consider-using-f-string

In [None]:
from __future__ import annotations

import os
import shutil
import urllib.request
from pathlib import Path
from typing import Dict, List

from comet_ml.api import API

In [None]:
def find_smallest_md5_files(root_dir: str | Path) -> List[Dict[str, int | str | Path]]:
    """
    Traverse the directory structure and identify the smallest .md5 files for each split and set pair.

    Args:
        root_dir (str | Path): The root directory to start the search.

    Returns:
        List[Dict[str, int|str|Path]]: A list containing dictionaries with the smallest files for each split and set pair.
    """
    root_path = Path(root_dir)
    smallest_files = [
        {"split": i, "training": (float("inf"), None), "validation": (float("inf"), None)}
        for i in range(10)
    ]
    for file_path in root_path.rglob("*.md5"):
        filename = file_path.name
        # Extracting split and set information from the filename
        if filename.startswith("split") and (
            "_training_" in filename or "_validation_" in filename
        ):
            split, set_type = filename.split("_")[:2]
            split_index = int(split[-1])  # spliti string
            file_size = file_path.stat().st_size
            if file_size < smallest_files[split_index][set_type][0]:
                smallest_files[split_index][set_type] = (file_size, file_path)
    return smallest_files

In [None]:
root_directory = (
    Path.home()
    / "mounts/narval-mount/project-rabyj/epilap/output/logs/epiatlas-dfreeze-v2.1/hg38_100kb_all_none/groups_second_level_name_1l_3000n/10fold/"
)
destination_root_directory = Path(root_directory).parent / "10fold-nomix"

In [None]:
smallest_md5_files = find_smallest_md5_files(root_directory)
print(smallest_md5_files)

In [None]:
# print all paths
for file_dict in smallest_md5_files:
    print(file_dict["training"][1])  # type: ignore
    print(file_dict["validation"][1])  # type: ignore

In [None]:
def transfer_files(
    smallest_files: List[Dict[str, int | str | Path]], destination_root: str | Path
):
    """
    Transfer the smallest .md5 files to a new directory structure, preserving the original structure.

    Args:
        smallest_files (List[Dict[str, int|str|Path]]): A list containing dictionaries with the smallest files for each split and set pair.
        destination_root (str | Path): The root directory where the files will be transferred to.
    """
    for file_info in smallest_files:
        split = "split" + str(file_info["split"])
        for set_type in ["training", "validation"]:
            _, file_path = file_info[set_type]  # type: ignore
            if file_path:  # Check if the file path is not None
                destination_path: Path = (
                    Path(destination_root) / split / set_type / file_path.name
                )
                os.makedirs(destination_path.parent, exist_ok=True)

                shutil.copy(file_path, destination_path)
                os.unlink(file_path)

In [None]:
transfer_files(smallest_md5_files, destination_root_directory)

In [None]:
nomix_experiments = [
    "238b03e3bdb548569a988c4b9a9c402c",
    "9025d53d51ed4851b8e72600c3c3cdaf",
    "78ca2264ffd040308552adc75c712ff1",
    "f432bbfbcf8c4a6081cb26ca13ce8789",
    "09f9f3a95b5a44e9b003f6c1e5c0d0db",
    "b416e619a95c4f5ba95fa42c64f9ec0f",
    "d153f94f07874f4d83a93445fab31da5",
    "0f1372d81f0b4416814b401a2c3b8e31",
    "f855c7b162284e9287c95ba8baabeef7",
    "4ab263a806f443dbaec6d809f9f1cbc2",
]

In [None]:
def copy_files_with_strings(
    root_dir: str | Path, destination_root: str | Path, strings_list: list
):
    """
    Traverse the directory structure and print commands to move files containing any of the specified strings
    to a mirrored directory structure.

    Args:
        root_dir (str): The root directory to start the search.
        destination_root (str): The root directory where the files will be copied to.
        strings_list (list): List of strings to look for in the file paths.
    """
    root_path = Path(root_dir)
    for file_path in root_path.rglob("*"):
        if file_path.is_file() and any(s in str(file_path) for s in strings_list):
            relative_path = file_path.relative_to(root_path)
            destination_path = Path(destination_root) / relative_path

            os.makedirs(destination_path.parent, exist_ok=True)

            print(r"\mv {} {}".format(file_path, destination_path))

In [None]:
copy_files_with_strings(root_directory, destination_root_directory, nomix_experiments)

## Dealing with the cometML API to retrieve saved result files.

In [None]:
api = API()

correct_list = []
for experiment in api.get("rabyj/epilap"):
    if "groups_second_level_name" in experiment.get_tags():
        correct_list.append(experiment.key)

In [None]:
for experiment in [api.get(f"rabyj/epilap/{key}") for key in correct_list]:
    # print(experiment.get_metadata())
    # print(experiment.get_others_summary())
    wanted_experiment = True
    for param_dict in experiment.get_parameters_summary():
        if "mixed.mixed" in param_dict.values() and "mapping" in param_dict["name"]:
            wanted_experiment = False

    if wanted_experiment:
        exp_name = experiment.get_name()
        split_name = exp_name.split("-")[-1]
        for asset_dict in experiment.get_asset_list(asset_type="all"):
            filename = asset_dict["fileName"]
            if (
                filename.endswith(".csv")
                or filename.endswith(".png")
                or filename.endswith(".tsv")
            ):
                url = asset_dict["link"]
                local_filename, _ = urllib.request.urlretrieve(url, filename)
                new_path = destination_root_directory / split_name / filename
                shutil.move(local_filename, new_path)