In [None]:
"""Find sample input size for neural network trainings logged on comet.ml"""
# pylint: disable=import-error, redefined-outer-name

In [None]:
from __future__ import annotations

from datetime import datetime
from pathlib import Path
from typing import List

import pandas as pd
from comet_ml.api import API

In [None]:
api = API()

In [None]:
def select_time_slice(api: API, date1: str, date2: str) -> List[str]:
    """Select experiments within a time slice.

    Args:
        api: comet.ml API object
        date1 (str): start date, ISO format
        date2 (str): end date, ISO format
    Returns:
        List[str]: List of experiment keys
    """
    valid_list = []
    for experiment in api.get("rabyj/epilap"):
        meta = experiment.get_metadata()
        time = int(meta["startTimeMillis"]) / 1000
        time = datetime.utcfromtimestamp(time)
        is_within_date = (
            datetime.fromisoformat(date1) < time < datetime.fromisoformat(date2)
        )
        if is_within_date:
            valid_list.append(experiment.key)

    return valid_list

In [None]:
date1 = "2024-01-01"
date2 = "2024-03-01"
experiments = select_time_slice(api, date1, date2)

In [None]:
input_sizes = {}
for experiment in api.get("rabyj/epilap"):
    for hparam_dict in experiment.get_parameters_summary():
        if hparam_dict["name"] == "input_size":
            input_size = int(hparam_dict["valueMax"])
            meta = experiment.get_metadata()
            input_sizes[meta["experimentKey"]] = input_size

In [None]:
input_sizes_df = pd.DataFrame.from_dict(
    data=input_sizes, orient="index", columns=["input_size"]
)

In [None]:
input_sizes_df.to_csv("input_sizes.csv")

In [None]:
global_log_df = pd.read_csv(
    Path.home() / "Downloads/temp/cometml_dfreeze2.1_logs.csv", index_col=False, header=0
)

In [None]:
joined_df = global_log_df.merge(
    input_sizes_df, left_on="Experiment key", right_index=True, how="left"
)

In [None]:
new_in_s_df = (
    joined_df[["Experiment key", "input_size"]].set_index("Experiment key").sort_index()
)

In [None]:
new_in_s_df.to_csv(Path.home() / "Downloads/temp/new_input_sizes.csv")