In [None]:
"""
kentarrito/AI-Scientist-Fork
Add brainstorming mechanism in idea generation part
"""

# Step 1 : Inference

## Process Class

In [None]:
# Use Model from local environment

import asyncio
from transformers import AutoTokenizer
from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams
import time, warnings

model_name = "deepseek_r1_qwen14b"
tensor_parallel_size = 2

engine_args = AsyncEngineArgs(
    model = model_name,
    tensor_parallel_size = tensor_parallel_size,
    gpu_memory_utilization=0.95,
)
engine = AsyncLLMEngine.from_engine_args(engine_args)
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left')


class AllRequests:
    
    def __init__(self, max_request):
        self.max_request = max_request
        self.requests = []
        self.request_ids = []
        self.request_id = 0
        self.results = []
        self.finished_ids = []
        
    def add(self, request):
        self.requests.append(request)
        self.request_ids.append(self.request_id)
        self.request_id += 1
    
    async def process(self, model=model_name, max_tokens = 3000, temperature=0.4, save_dir = "progress_log", restart = False):

        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        if restart:
            if os.path.exists(f"{save_dir}/finished_ids.json") and os.path.exists(f"{save_dir}/results.json"):
                with open(f"{save_dir}/finished_ids.json") as f:
                    finished_ids = json.load(f)
                with open(f"{save_dir}/results.json") as f:
                    self.results = json.load(f)
                for finished_id in finished_ids:
                    self.finished_ids.append(finished_id)
                    id = self.request_ids.index(finished_id)
                    self.request_ids.pop(id)
                    self.requests.pop(id)

        await asyncio.gather(
            *[self.process_requests(temperature = temperature, max_tokens = max_tokens, restart = restart, save_dir=save_dir) for _ in range(self.max_request)]
        )
            
        return self.results


    async def process_requests(self, max_tokens = 3000, temperature=0.4, save_dir = "progress_log", restart = False):

        while len(self.requests) != 0:
            request_dict = self.requests.pop(0)
            request_id = self.request_ids.pop(0)

            prompt = request_dict["prompt"]

            final_output = None
            results_generator = engine.generate(prompt, SamplingParams(temperature=temperature, max_tokens=max_tokens), request_id)
            async for request_output in results_generator:
                # print(request_output) => for streaming
                final_output = request_output

            output = final_output.outputs[0].text
            
            request_dict["output"] = output
            self.results.append(request_dict)
            self.finished_ids.append(request_id)

            with open(f"{save_dir}/results.json", "w") as f:
                json.dump(self.results, f)
            with open(f"{save_dir}/finished_ids.json", "w") as f:
                json.dump(self.finished_ids, f)
    
    


## Download Dataset

## Solve Problems

In [None]:
# GENERATE IDEAS
def generate_bs_agents_dataset(
        base_dir,
        agents,
        client,
        model,
        skip_generation=False,
        max_num_generations=20,
        num_reflections=5,
):
    if skip_generation:
        # Load existing ideas from file
        try:
            with open(osp.join(base_dir, "ideas.json"), "r") as f:
                ideas = json.load(f)
            print("Loaded existing ideas:")
            for idea in ideas:
                print(idea)
            return ideas
        except FileNotFoundError:
            print("No existing ideas found. Generating new ideas.")
        except json.JSONDecodeError:
            print("Error decoding existing ideas. Generating new ideas.")

    idea_str_archive = []
    with open(osp.join(base_dir, "seed_ideas.json"), "r") as f:
        seed_ideas = json.load(f)
    for seed_idea in seed_ideas:
        idea_str_archive.append(json.dumps(seed_idea))

    with open(osp.join(base_dir, "experiment.py"), "r") as f:
        code = f.read()

    with open(osp.join(base_dir, "prompt.json"), "r") as f:
        prompt = json.load(f)

    idea_system_prompt = prompt["system"]

    
    
    bs_msg_histories = {}  # {(depth,branch):[{"system":}...], ...}
    bs_agent_id_histories = {}  # {(depth,branch):id, ...}
    all_ideas = {}

    num_depth = 2
    num_branch = 2
    n_agents = len(agents)
    bs_agent_tree = {}  # [{"agent_id":, "node_id":[0], "bs_msg":[{"role":"system", "content":"..."}], "ideas":[{}], "children":[{"msg":[{"role":"user"}, {"role":"assistant"}]}]}, ]

    def build_bs_agent_tree(agents, *, num_depth=3, num_branch=2, seed=None):
        if seed is not None:
            random.seed(seed)
        if not agents:
            raise ValueError("agents list is empty.")

        n_agents = len(agents)

        def _grow(node, depth, forbidden):
            if depth == num_depth:
                return
            available = list(set(range(n_agents)) - forbidden)
            if not available:
                raise RuntimeError("Not enough distinct agents for the depth requested.")

            picked_a_idxs = random.sample(available, num_branch)
            for b in range(num_branch):
                a_idx = picked_a_idxs[b]
                child = {
                    "agent"   : agents[a_idx],
                    "agent_ids": node["agent_ids"] + [a_idx],
                    "agent_id": a_idx,
                    "node_ids" : node["node_ids"] + [b],
                    "bs_msg"  : [],
                    "ideas"   : [],
                    "children": [],
                }
                node["children"].append(child)

                _grow(child, depth + 1, forbidden | {a_idx})

        root = {"agent_id": None, "agent_ids": [], "node_ids": [], "bs_msg": [], "ideas": [], "children": []}
        _grow(root, 0, set())
        return root

    def get_assistant_msg(
        node,
        *,                          # keyword-only
        history_so_far,             # ancestor conversation
        prompt, code,
        client, model,
    ):
        """
        Returns a *new* history list that is `history_so_far`
        plus ONE user/assistant pair for this depth,
        and the parsed idea dict produced in that exchange.
        """
        if node["agent_ids"]==[]: agent_id = None
        else: agent_id = node["agent_ids"][-1]

        # ------------------------------------------------------------------ copy
        bs_msg = list(history_so_far)               # preserves ancestor msgs

        # --------------------------------------------------- construct user turn

        bs_sys_msg = brainstorming_system_msg.format(
            task_description = prompt["task_description"],
            code             = code,
        )

        bs_prompt = brainstorming_prompt.format(
            agent = agents[agent_id],
        )

        bs_txt, bs_msg = get_response_from_llm(
            bs_prompt,
            client         = client,
            model          = model,
            system_message = bs_sys_msg,
            msg_history    = bs_msg,
        )

        idea_prompt = idea_first_prompt.format(
            task_description = prompt["task_description"],
            code             = code,
            num_reflections  = num_reflections,
        )

        # talk to LLM *once* (temp_history is a throw-away list)
        idea_txt, msg_history = get_response_from_llm(
            idea_prompt,
            client         = client,
            model          = model,
            system_message = "",
            msg_history    = bs_msg,
        )

        # Iteratively improve task.
        if num_reflections > 1:
            msg_history = msg_history[-2:]
            for j in range(num_reflections - 1):
                idea_txt, msg_history = get_response_from_llm(
                    idea_reflection_prompt.format(
                        current_round=j + 2, num_reflections=num_reflections
                    ),
                    client=client,
                    model=model,
                    system_message=idea_system_prompt,
                    msg_history=msg_history,
                )
                ## PARSE OUTPUT
                json_output = extract_json_between_markers(idea_txt)
                assert (
                        json_output is not None
                ), "Failed to extract JSON from LLM output"
                #print()
                #print(f"Iteration {j + 2}/{num_reflections} Generated Ideas: ")
                #print(json_output)

                if "I am done" in idea_txt:
                    #print()
                    #print(f"Idea generation converged after {j + 2} iterations.")
                    break


        # parse idea
        idea_json = extract_json_between_markers(idea_txt) or {"idea": idea_txt,
                                                                    "agent": agents[agent_id]}
        return bs_msg, [idea_json]

    total_num_node = num_branch**(num_depth+1) - 2

    global populate_count
    populate_count = 0
    def populate_tree(node, history_so_far, **llm_kwargs):
        global populate_count
        """
        Depth-first traversal.
        history_so_far already obeys the 1 + depth*2 rule.
        """
        if node["agent_id"] is not None:         # skip dummy root
            node["bs_msg"], node["ideas"] = get_assistant_msg(
                node,
                history_so_far = history_so_far,
                **llm_kwargs,
            )
            next_history = node["bs_msg"]
            populate_count += 1
            print(f"populate count: {populate_count}/{total_num_node}")
        else:
            next_history = history_so_far

        for child in node["children"]:
            populate_tree(child, next_history, **llm_kwargs)

    print()
    print("Making Brainstorming Tree...")
    print(f"num_depth:{num_depth}, num_branch:{num_branch}, n_agents:{n_agents}, ")

    bs_agent_tree = build_bs_agent_tree(
        agents, num_depth=num_depth, num_branch=num_branch, seed=42
    )

    print(bs_agent_tree)

    print()
    print("Brainstorming and Genrating Ideas...")

    populate_tree(
        bs_agent_tree,
        history_so_far = [],      # start empty
        prompt         = prompt,
        code           = code,
        client         = client,
        model          = model,
    )

    ## SAVE IDEAS

    with open(osp.join(base_dir, "bs_agent_tree.json"), "w") as f:
        json.dump(bs_agent_tree, f, indent=4)

    return bs_agent_tree


In [None]:
def check_idea_novelty_in_bs_agent_tree(
    bs_agent_tree,  # {"agent_ids":[],"bs_msg":[],"children":[{same structure}, ]}
    base_dir,
    client,
    model,
    max_num_iterations=10,
    engine="semanticscholar",
):
    with open(osp.join(base_dir, "experiment.py"), "r") as f:
        code = f.read()
    with open(osp.join(base_dir, "prompt.json"), "r") as f:
        prompt = json.load(f)
        task_description = prompt["task_description"]

    def check(ideas):
        idea = ideas[0]

        print(f"\nChecking novelty of idea: {idea['Name']}")
        
        novel = False
        msg_history = []
        papers_str = ""
        thought_output = ""

        for j in range(max_num_iterations):
            try:
                text, msg_history = get_response_from_llm(
                    novelty_prompt.format(
                        current_round=j + 1,
                        num_rounds=max_num_iterations,
                        idea=idea,
                        last_query_results=papers_str,
                    ),
                    client=client,
                    model=model,
                    system_message=novelty_system_msg.format(
                        num_rounds=max_num_iterations,
                        task_description=task_description,
                        code=code,
                    ),
                    msg_history=msg_history,
                )
                thought_output = extract_text_inside_backticks(text, "thought") or thought_output

                if "decision made: novel" in text.lower():
                    print("Decision made: novel after round", j)
                    novel = True
                    break
                if "decision made: not novel" in text.lower():
                    print("Decision made: not novel after round", j)
                    break

                # parse JSON
                json_output = extract_json_between_markers(text)
                assert json_output is not None, "Failed to extract JSON from LLM output"

                # search
                query = json_output.get("Query", "")
                papers = search_for_papers(query, result_limit=5, engine=engine)
                if not papers:
                    papers_str = "No papers found."
                else:
                    paper_strings = []
                    for i, paper in enumerate(papers):
                        paper_strings.append(
                                """{i}: {title}. {authors}. {venue}, {year}.\nNumber of citations: {cites}\nAbstract: {abstract}""".format(
                                    i=i,
                                    title=paper["title"],
                                    authors=paper["authors"],
                                    venue=paper["venue"],
                                    year=paper["year"],
                                    cites=paper["citationCount"],
                                    abstract=paper["abstract"],
                                )
                        )
                    papers_str = "\n\n".join(paper_strings)

            except Exception as e:
                print(f"Error: {e}")
                continue

        print()
        print(f"novelty: {novel}")

        return [novel]


    def add_novelty_to_tree(node, depth):

        if node["children"] == []:
            return None

        else:
            for i, node_dict in enumerate(node["children"]):
                novelties = check(node_dict["ideas"])
                node["children"][i]["novelties"] = novelties

                add_novelty_to_tree(node_dict, depth + 1)
            

    #bs_agent_tree = {"agent_id": None, "agent_ids": [], "node_ids": [], "bs_msg": [], "ideas": [], "children": []}
    add_novelty_to_tree(bs_agent_tree, 0)

    return bs_agent_tree



# Step 2 : Make dataset_list

In [None]:
exp_dir = "/workspace/exp12"
# bs_agent_tree = {"agent_id": None, "agent_ids": [], "node_ids": [], "task": "", "bs_msg": [], "ideas": [], "novelty": True or False, "children": [{}, ...]}
bs_agent_tree_save_dir = "/workspace/AI-Scientist-Fork-dataset"
dataset_list_save_path = f"{exp_dir}/dataset_list.json"

import os
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)

In [None]:

import json
import pandas as pd

def list_files(root_dir):
    file_paths = []
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for fname in filenames:
            file_paths.append(os.path.join(dirpath, fname))
    return file_paths


if not os.path.exists(dataset_list_save_path):

    bs_agent_trees = []
    all_files = list_files(bs_agent_tree_save_dir)

    for file_path in all_files:
        with open(file_path) as f:
            bs_agent_tree = json.load(f)

        bs_agent_trees.append(bs_agent_tree)
        

    def get_dataset_dict(task, chosen_msg, rejected_msg):
        if len(chosen_msg) != len(chosen_msg):
            raise Exception("chosen_msg and rejected_msg must be same size") 

        if len(chosen_msg) == 2:
            return {
                "query":[{'role': 'user', 'content':f"Give me a brainstorming sentence to solve the task below;\n\nTask:{task}"}],
                "chosen_key":[{'role': 'assistant', 'content': chosen_msg[0]["content"]}],
                "rejected_key":[{'role': 'assistant', 'content': rejected_msg[0]["content"]}],
            }
        elif len(chosen_msg) < 2:
            return None
        else:
            return {
                "query": chosen_msg[:-2] + [{'role': 'user', 'content':f"Give me a brainstorming sentence to solve the task below;\n\nTask:{task}"}],
                "chosen_key": [{'role': 'assistant', 'content': chosen_msg[-2]["content"]}],
                "rejected_key": [{'role': 'assistant', 'content': rejected_msg[-2]["content"]}],
            }


    dataset_list = []
    def walk(node, depth):
        global dataset_list

        if node["children"] == []:
            return 0

        else:
            num_novel = 0
            updated_score = 0
            scores = []
            bs_msgs = []
            for i, node_dict in enumerate(node["children"]):
                #dataset_dict = get_dataset_dict(node_dict)
                bs_msg = node_dict["bs_msg"]  # [{"role":}, ...]
                task = node_dict["task"]
                novelty = node_dict["novelty"]
                score = walk(node_dict, depth + 1)
                if novelty: score += 1
                updated_score += score

                for j, other_score in enumerate(scores):
                    if score < other_score:
                        dataset_dict = get_dataset_dict(task, chosen_msg=bs_msgs[j], rejected_msg=bs_msg)
                    elif score > other_score:
                        dataset_dict = get_dataset_dict(task, chosen_msg=bs_msg, rejected_msg=bs_msgs[j])
                    else:
                        continue
                    
                    dataset_list.append(dataset_dict)

                    
                bs_msgs.append(bs_msg)
                scores.append(score)

            updated_score = updated_score / len(node["children"])

        return update_score

    for bs_agent_tree in bs_agent_trees:
        walk(bs_agent_tree, 0)

    with open(dataset_list_save_path, "w") as f:
        json.dump(dataset_list, f)
        
else:
    with open(dataset_list_save_path) as f:
        dataset_list = json.load(f)

# Step 3 : Train Reward Model

In [None]:
from rmsearch import RMTrainer

model_name = "/workspace/llama3b-rm"
num_gpus = 1

rmtrainer = RMTrainer(model_name = model_name, num_gpus = num_gpus)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
dataset_save_path = f"{exp_dir}/dataset"
train_ids_save_path = f"{exp_dir}/train_ids.json"
test_ids_save_path = f"{exp_dir}/test_ids.json"
test_size = 48

formatted_dataset = rmtrainer.prepare_dataset(dataset_list, dataset_save_path, test_size, train_ids_save_path, test_ids_save_path)

In [None]:
from trl import RewardConfig
from peft import LoraConfig, TaskType

batch_size_per_device = 4
eval_batch_size_per_device = 4

training_args = RewardConfig(
    output_dir=model_save_dir,
    per_device_train_batch_size=batch_size_per_device,
    per_device_eval_batch_size=eval_batch_size_per_device,
    #evaluation_strategy="steps",
    eval_steps=20,
    eval_on_start=True,
    save_steps=20,
    logging_steps=1,
    num_train_epochs = 3,
    report_to=None,
    remove_unused_columns=False,
)

peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    target_modules=["k_proj","q_proj","o_proj", "v_proj","down_proj","gate_proj","up_proj",],
    layers_to_transform=[25,26,27],
    r=16,
    lora_alpha=16,
    lora_dropout=0.1,
)

rmtrainer.train(formatted_dataset, training_args = training_args, peft_config = peft_config)

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss,Accuracy
0,No log,0.339076,0.812500
1,0.382000,No Log,No Log
2,0.230500,No Log,No Log
3,0.418800,No Log,No Log
4,0.627400,No Log,No Log
5,0.531700,No Log,No Log
6,0.303600,No Log,No Log
7,0.060400,No Log,No Log
8,0.028500,No Log,No Log
9,0.242800,No Log,No Log
