In [1]:
from proposal import *

In [2]:
class Trace(Generic[ASpecificScen, ASpecificKB]):
    NodeType = tuple[Experiment, ExperimentFeedback]  # Define NodeType as a new type representing the tuple
    NEW_ROOT: tuple = ()

    def __init__(self, scen: ASpecificScen, knowledge_base: ASpecificKB | None = None) -> None:
        self.scen: ASpecificScen = scen

        # BEGIN: graph structure -------------------------
        self.hist: list[Trace.NodeType] = (
            []
        )  # List of tuples containing experiments and their feedback, organized over time.
        self.dag_parent: list[tuple[int, ...]] = []  # List of tuples representing parent indices in the DAG structure.
        # Definition:
        # - (,) represents no parent (root node in one tree);
        # - (1,) presents one parent;
        # - (1, 2) represents two parents (Multiple parent is not implemented yet).
        # Syntax sugar for the parent relationship:
        # - Only for selection:
        #    - (-1,) indicates that select the last record node as parent.

        # NOTE: the sequence of hist and dag_parent is organized by the order to record the experiment.
        # So it may be different from the order of the loop_id.
        # So we need an extra mapping to map the enqueue id back to the loop id.
        self.idx2loop_id: dict[int, int] = {}

        # Design discussion:
        # - If we unifiy the loop_id and the enqueue id, we will have less recognition burden.
        # - If we use different id for loop and enqueue, we don't have to handle the placeholder logic.
        # END: graph structure -------------------------

        # TODO: self.hist is 2-tuple now, remove hypothesis from it, change old code for this later.
        self.knowledge_base: ASpecificKB | None = knowledge_base
        self.current_selection: tuple[int, ...] = (-1,)

    def get_sota_hypothesis_and_experiment(self) -> tuple[Hypothesis | None, Experiment | None]:
        """Access the last experiment result, sub-task, and the corresponding hypothesis."""
        # TODO: The return value does not align with the signature.
        for experiment, feedback in self.hist[::-1]:
            if feedback.decision:
                return experiment.hypothesis, experiment

        return None, None

    def is_selection_new_tree(self, selection: tuple[int, ...] | None = None) -> bool:
        """
        Check if the current trace is a new tree.
        - selection maybe (-1,) when the dag_parent is empty.
        """
        if selection is None:
            selection = self.get_current_selection()

        return selection == self.NEW_ROOT or len(self.dag_parent) == 0

    def get_current_selection(self) -> tuple[int, ...]:
        return self.current_selection

    def set_current_selection(self, selection: tuple[int, ...]) -> None:
        self.current_selection = selection

    def get_parent_exps(
        self,
        selection: tuple[int, ...] | None = None,
    ) -> list[Trace.NodeType]:
        """
        Collect all ancestors of the given selection.
        The return list follows the order of [root->...->parent->current_node].
        """
        if selection is None:
            selection = self.get_current_selection()

        if self.is_selection_new_tree(selection):
            return []

        return [self.hist[i] for i in self.get_parents(selection[0])]

    def exp2idx(self, exp: Experiment | list[Experiment]) -> int | list[int] | None:
        if isinstance(exp, list):
            exps: list[Experiment] = exp

            # keep the order
            exp_to_index: dict[Experiment, int] = {_exp: i for i, (_exp, _) in enumerate(self.hist)}
            return [exp_to_index[_exp] for _exp in exps]
        for i, (_exp, _) in enumerate(self.hist):
            if _exp == exp:
                return i
        return None

    def idx2exp(self, idx: int | list[int]) -> Experiment | list[Experiment]:
        if isinstance(idx, list):
            idxs: list[int] = idx
            return [self.hist[_idx][0] for _idx in idxs]
        return self.hist[idx][0]

    def is_parent(self, parent_idx: int, child_idx: int) -> bool:
        ancestors = self.get_parents(child_idx)
        return parent_idx in ancestors

    def get_parents(self, child_idx: int) -> list[int]:
        if self.is_selection_new_tree((child_idx,)):
            return []

        ancestors: list[int] = []
        curr = child_idx
        while True:
            ancestors.insert(0, curr)
            parent_tuple = self.dag_parent[curr]
            if not parent_tuple or parent_tuple[0] == curr:
                break
            curr = parent_tuple[0]

        return ancestors
    
    def get_reply_tree_dict(self) -> dict[int, dict]:
        tree_dict: dict[int, dict] = {}
        for idx, (experiment, feedback) in enumerate(self.hist):
            tree_dict[idx] = {
                'hypothesis': experiment.hypothesis,
                'feedback': feedback.decision,
                'children': []
            }
        for child_idx, parents in enumerate(self.dag_parent):
            for parent_idx in parents:
                if parent_idx != child_idx:  
                    tree_dict[parent_idx]['children'].append(child_idx)

        return tree_dict


In [2]:
from rdagent.log.storage import FileStorage
from pathlib import Path
from rdagent.scenarios.data_science.experiment.experiment import DSExperiment

from rdagent.log.utils import extract_loopid_func_name
log_path = Path("/home/bowen/workspace/JobAndExp/amlt_project/amlt/dynamic-prawn/combined_logs/aerial-cactus-identification.1")

traces = []
for msg in FileStorage(log_path).iter_msg(tag="trace"):
    loop_id, fn = extract_loopid_func_name(msg.tag)
    traces.append(msg.content)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
Tr = traces[-1]

In [4]:
len(Tr.hist)

51

In [6]:
Tr.current_selection

(-1,)

In [8]:
Tr.idx2loop_id

AttributeError: 'DSTrace' object has no attribute 'idx2loop_id'

In [19]:
Tr.idx2loop_id[30]

30

In [13]:
Tr.hist[30]

(<rdagent.scenarios.data_science.experiment.experiment.DSExperiment at 0x7f4924407850>,
 <rdagent.core.proposal.HypothesisFeedback at 0x7f4924407d30>)

In [8]:
Tr.get_current_selection()

(22,)

In [9]:
Tr.hist[-1]

(<rdagent.scenarios.data_science.experiment.experiment.DSExperiment at 0x7f684ce421a0>,
 <rdagent.core.proposal.HypothesisFeedback at 0x7f684ce42680>)

In [10]:
Tr.exp2idx(Tr.hist[-1][0])

50

In [11]:
Tr.idx2loop_id

AttributeError: 'DSTrace' object has no attribute 'idx2loop_id'

In [53]:
tree_dict: dict[int, dict] = {}        
for idx, (experiment, feedback) in enumerate(Tr.hist):
    tree_dict[idx] = {
                'hypothesis': experiment.hypothesis.hypothesis,
                'feedback': feedback.decision,
                'children': []
            }
for child_idx, parents in enumerate(Tr.dag_parent):
    for parent_idx in parents:
        if parent_idx != child_idx:  
            tree_dict[parent_idx]['children'].append(child_idx)
            tree_dict[parent_idx]['hist_node']= Tr.get_parents(parent_idx)

In [23]:
Tr.get_parents(6)

[1, 6]

In [54]:
tree_dict

{0: {'hypothesis': 'Fine-tune an ImageNet-pretrained WideResNet-28-10 on the 32×32 cactus dataset by (i) freezing all layers except the final block for the first 5 epochs, (ii) then unfreezing and training the entire network for 25 additional epochs with SGD (initial LR = 0.05, momentum = 0.9, weight-decay = 5e-4) under a cosine-annealing schedule, while applying CutOut (p = 1.0, size = 8) and CutMix (β = 1.0, p = 0.5) augmentations.',
  'feedback': True,
  'children': [2],
  'hist_node': [0]},
 1: {'hypothesis': 'Train a ResNet-18 variant whose stem is a single 3×3, stride-1 convolution (no max-pool) and that inserts a Squeeze-and-Excitation (SE) block after every residual block, optimising with AdamW (lr = 3e-4, weight_decay = 1e-4) for 25 epochs and validating with stratified 5-fold cross-validation; success is defined as mean CV ROC-AUC ≥ 0.995.',
  'feedback': False,
  'children': [6],
  'hist_node': [1]},
 2: {'hypothesis': 'Augment each training image with the pipeline: Rotate(l

In [None]:
    def get_reply_tree_dict(self) -> dict[int, dict]:
        tree_dict: dict[int, dict] = {}
        for idx, (experiment, feedback) in enumerate(self.hist):
            tree_dict[idx] = {
                'hypothesis': experiment.hypothesis,
                'feedback': feedback.decision,
                'children': []
            }
        for child_idx, parents in enumerate(self.dag_parent):
            for parent_idx in parents:
                if parent_idx != child_idx:  
                    tree_dict[parent_idx]['children'].append(child_idx)
        
        return tree_dict
