In [15]:
import os
import json
import glob
import re
from openai import OpenAI
import time
from pydantic import BaseModel
from dotenv import load_dotenv
import json
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import os
import re
from collections import defaultdict
import pandas as pd

In [3]:
load_dotenv()
client = OpenAI(
    # This is the default and can be omitted
    api_key=os.getenv("OPENAI_API_KEY")
)

In [5]:
def get_last_agent_file(folder_path):
    # Find all agent files in the folder
    agent_files = glob.glob(os.path.join(folder_path, "agent_*_*.json"))
    
    if not agent_files:
        return None
    
    # Extract agent numbers and find the highest one
    max_agent_num = 0
    max_agent_file = None
    
    for file in agent_files:
        # Extract the number from filename using regex
        match = re.search(r"agent_(\d+)_\d+\.json", file)
        if match:
            agent_num = int(match.group(1))
            if agent_num > max_agent_num:
                max_agent_num = agent_num
                max_agent_file = file
    
    return max_agent_file

def extract_history_steps(file_path):
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
            
        # Extract history_steps without observation field
        history_steps = data.get('history_steps', [])
        
        # Remove observation field from each step
        for step in history_steps:
            if 'observation' in step:
                del step['observation']

            if 'action' in step and isinstance(step['action'], dict):
                # if 'Research Plan and Status' in step['action']:
                #     del step['action']['Research Plan and Status']
                if 'Fact Check' in step['action']:
                    del step['action']['Fact Check']
                
        return history_steps
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

def extract_task_name_and_id(folder_path):
    # Split the path into components
    parts = folder_path.split(os.sep)
    
    # Look for task name and run ID in the path
    task_name = None
    run_id = None
    
    for i, part in enumerate(parts):
        if i < len(parts) - 2 and parts[i+2].startswith('0'):  # Assuming run IDs start with numbers
            task_name = part
            run_id = parts[i+2]
            break
    
    return task_name, run_id

def process_all_folders(base_path, output_base):
    # Walk through directory structure
    for root, dirs, files in os.walk(base_path):
        # Check if this folder contains agent files
        agent_files = [f for f in files if f.startswith("agent_") and f.endswith(".json")]
        if agent_files:
            last_agent = get_last_agent_file(root)
            if last_agent:
                history = extract_history_steps(last_agent)
                if history:
                    # Extract task name and run ID from the folder path
                    task_name, run_id = extract_task_name_and_id(root)
                    
                    if task_name and run_id:
                        # Create output directory
                        output_dir = os.path.join(output_base, task_name, run_id)
                        os.makedirs(output_dir, exist_ok=True)
                        
                        # Save the extracted history
                        output_file = os.path.join(output_dir, "output.json")
                        with open(output_file, 'w') as f:
                            json.dump(history, f, indent=2)
                        
                        print(f"Extracted history from {root} saved to {output_file}")
                    else:
                        print(f"Could not determine task name and run ID for {root}")

In [6]:
base_directory = "data"
output_directory = "Steps_RP"

# Create the base output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Process all folders
process_all_folders(base_directory, output_directory)

Extracted history from data/machine_unlearning/claude-3-5-sonnet-v2/0203205627_279760/agent_log saved to Steps_RP/machine_unlearning/0203205627_279760/output.json
Extracted history from data/machine_unlearning/claude-3-5-sonnet-v2/0203205627_2107738/agent_log saved to Steps_RP/machine_unlearning/0203205627_2107738/output.json
Extracted history from data/machine_unlearning/claude-3-5-sonnet-v2/0204004906_2156777/agent_log saved to Steps_RP/machine_unlearning/0204004906_2156777/output.json
Extracted history from data/machine_unlearning/claude-3-5-sonnet-v2/0204021048_1005508/agent_log saved to Steps_RP/machine_unlearning/0204021048_1005508/output.json
Extracted history from data/machine_unlearning/claude-3-5-sonnet-v2/0204000906_980483/agent_log saved to Steps_RP/machine_unlearning/0204000906_980483/output.json
Extracted history from data/machine_unlearning/claude-3-5-sonnet-v2/0204002434_497948/agent_log saved to Steps_RP/machine_unlearning/0204002434_497948/output.json
Extracted histor

In [11]:
def process_and_analyze_files(steps_dir):
    
    # Create empty dataframe with our desired columns
    results_df = pd.DataFrame(columns=["Task", "Run_ID", "Summary"])
    
    failed_folders = []
    retry_stats = []
    max_retries = 5  # Maximum number of retries before giving up
    
    # Walk through the Steps directory
    for task_name in os.listdir(steps_dir):
        task_dir = os.path.join(steps_dir, task_name)
        if not os.path.isdir(task_dir):
            continue
        
        for run_id in os.listdir(task_dir):
            run_dir = os.path.join(task_dir, run_id)
            if not os.path.isdir(run_dir):
                continue
                
            output_file = os.path.join(run_dir, "output.json")
            if not os.path.exists(output_file):
                print(f"No output.json found in {run_dir}")
                continue
                
            # Read the output.json file
            try:
                with open(output_file, 'r') as f:
                    output_data = json.load(f)
                
                # Skip empty files or invalid data
                if not output_data or not isinstance(output_data, list) or len(output_data) == 0:
                    print(f"Empty or invalid data in {run_dir}/output.json")
                    failed_folders.append(f"{task_name}/{run_id}")
                    continue
                
                # Convert output_data to pretty-printed string for the prompt
                output_json_str = json.dumps(output_data, indent=2)
                
                # Create prompt for the approach summary
                prompt = f"""
                You are a researcher, given the following trace of an AI agent doing ML research challenges:
                {output_json_str}
                
                Analyze what the agent did and provide a concise 50-word summary focusing on whether 
                the agent took an engineering approach (implementing known methods) or a scientific 
                research approach (developing novel theories).
                
                Just return the plain text summary without any JSON structure or additional formatting.
                Keep the summary EXACTLY 50 words or fewer.
                """
                
                print(f"Processing {task_name}/{run_id}...")
                
                # Retry loop for handling API errors
                retry_count = 0
                success = False
                
                while not success and retry_count < max_retries:
                    try:
                        # Call the API
                        completion = client.beta.chat.completions.parse(
                            model="gpt-4o-mini",
                            messages=[
                                {
                                    "role": "user",
                                    "content": prompt
                                }
                            ]
                        )
                        
                        # Extract the response
                        response_content = completion.choices[0].message.content
                        
                        # No need to validate word count - any response is accepted
                        success = True
                            
                    except Exception as e:
                        retry_count += 1
                        print(f"Error processing {task_name}/{run_id} (attempt {retry_count}/{max_retries}): {str(e)}")
                        time.sleep(2)
                
                # After the retry loop, check if we had success
                if success:
                    # Save the analysis result
                    summary_file = os.path.join(run_dir, "summary.txt")
                    with open(summary_file, 'w') as f:
                        f.write(response_content)
                    
                    # Add row to our dataframe
                    new_row = pd.DataFrame({"Task": [task_name], "Run_ID": [run_id], "Summary": [response_content]})
                    results_df = pd.concat([results_df, new_row], ignore_index=True)
                    
                    # Record retry stats if we needed retries
                    if retry_count > 0:
                        retry_stats.append(f"{task_name}/{run_id}: Succeeded after {retry_count} retries")
                        
                    #print(f"Summary saved to {analysis_file}" + (f" after {retry_count} retries" if retry_count > 0 else ""))
                    
                else:
                    # All retries failed
                    print(f"Failed to get valid summary for {task_name}/{run_id} after {max_retries} attempts")
                    failed_folders.append(f"{task_name}/{run_id}")
                    retry_stats.append(f"{task_name}/{run_id}: Failed after {max_retries} retries")
                
                # Add a small delay to avoid rate limiting
                time.sleep(1)
                    
            except Exception as e:
                print(f"Error reading file for {task_name}/{run_id}: {str(e)}")
                failed_folders.append(f"{task_name}/{run_id}")
    
    # Save the compiled results as CSV
    compiled_results_file = os.path.join(steps_dir, "compiled_summaries.csv")
    results_df.to_csv(compiled_results_file, index=False)
    
    # Also save as Excel for easier viewing
    excel_file = os.path.join(steps_dir, "compiled_summaries.xlsx")
    results_df.to_excel(excel_file, index=False)
    
    # Save the list of failed folders
    failed_folders_file = os.path.join(steps_dir, "failed_folders.txt")
    with open(failed_folders_file, 'w') as f:
        for folder in failed_folders:
            f.write(f"{folder}\n")
    
    # Save the retry statistics
    retry_stats_file = os.path.join(steps_dir, "retry_stats.txt")
    with open(retry_stats_file, 'w') as f:
        for stat in retry_stats:
            f.write(f"{stat}\n")
    
    print(f"Compiled summaries saved to {compiled_results_file} and {excel_file}")
    print(f"Failed folders list saved to {failed_folders_file}")
    print(f"Retry statistics saved to {retry_stats_file}")
    print(f"Total failed folders: {len(failed_folders)}")
    print(f"Folders that needed retries: {len(retry_stats)}")
    
    return results_df

In [12]:
# Process all the files
results = process_and_analyze_files("Steps_RP")

Processing machine_unlearning/0203205627_279760...
Processing machine_unlearning/0203205627_2107738...
Processing machine_unlearning/0204004906_2156777...
Processing machine_unlearning/0204021048_1005508...
Processing machine_unlearning/0204000906_980483...
Processing machine_unlearning/0204002434_497948...
Processing machine_unlearning/0203235437_311468...
Processing machine_unlearning/0203205627_931197...
Processing meta-learning/0211164506_234282...
Processing meta-learning/0212001115_3514169...
Processing meta-learning/0212223706_984095...
Processing meta-learning/0212230702_2432398...
Processing meta-learning/0212021042_3840295...
Processing meta-learning/0211142216_3241844...
Processing meta-learning/0212235435_1003034...
Processing meta-learning/0211192713_3421734...
Processing llm-merging/0121081654...
Processing llm-merging/0124071448...
Processing llm-merging/0124110854...
Processing llm-merging/0122121253...
Processing llm-merging/0122132158...
Processing llm-merging/0121200

In [13]:
def add_research_plan_to_csv(csv_path, steps_dir):
    """
    Reads an existing CSV, extracts the last research plan for each entry,
    and adds it as a new column to the CSV.
    
    Args:
        csv_path (str): Path to the existing CSV file
        steps_dir (str): Directory containing the Steps folders
    
    Returns:
        pd.DataFrame: Updated DataFrame with the new column
    """
    # Read the existing CSV
    df = pd.read_csv(csv_path)
    
    # Add the new column
    df['Last_Research_Plan'] = ""
    
    # Iterate through each row in the DataFrame
    for index, row in df.iterrows():
        task_name = row['Task']
        run_id = row['Run_ID']
        
        # Construct the path to the output.json file
        output_file = os.path.join(steps_dir, task_name, str(run_id), "output.json")
        
        # Extract the last research plan
        last_research_plan = ""
        try:
            if os.path.exists(output_file):
                with open(output_file, 'r') as f:
                    output_data = json.load(f)
                
                # Extract the research plan from the last step's action field
                if output_data and len(output_data) > 0:
                    last_step = output_data[-1]
                    if (isinstance(last_step, dict) and 
                        "action" in last_step and 
                        isinstance(last_step["action"], dict) and 
                        "Research Plan and Status" in last_step["action"]):
                        last_research_plan = last_step["action"]["Research Plan and Status"]
                    else:
                        print(f"No 'Research Plan and Status' found in last step for {task_name}/{run_id}")
            else:
                print(f"Output file not found: {output_file}")
        
        except Exception as e:
            print(f"Error extracting research plan for {task_name}/{run_id}: {str(e)}")
            last_research_plan = "Error: Could not extract research plan"
        
        # Update the DataFrame
        df.at[index, 'Last_Research_Plan'] = last_research_plan
        
        # Print progress every 10 rows
        if (index + 1) % 10 == 0:
            print(f"Processed {index + 1}/{len(df)} entries...")
    
    # Save the updated CSV
    output_path = csv_path.replace('.csv', '_with_research_plan.csv')
    df.to_csv(output_path, index=False)
    
    # Also save as Excel for easier viewing
    excel_path = output_path.replace('.csv', '.xlsx')
    df.to_excel(excel_path, index=False)
    
    print(f"Updated CSV saved to {output_path}")
    print(f"Updated Excel saved to {excel_path}")
    
    return df

In [16]:
csv_path = "Steps_RP/compiled_summaries.csv"
    
# Path to the Steps directory
steps_dir = "Steps_RP"

# Add the research plan column
updated_df = add_research_plan_to_csv(csv_path, steps_dir)

# Display some sample data
print("\nSample data from updated DataFrame:")
print(updated_df[['Task', 'Run_ID', 'Summary', 'Last_Research_Plan']].head())

Processed 10/40 entries...
Processed 20/40 entries...
Processed 30/40 entries...
Processed 40/40 entries...
Updated CSV saved to Steps_RP/compiled_summaries_with_research_plan.csv
Updated Excel saved to Steps_RP/compiled_summaries_with_research_plan.xlsx

Sample data from updated DataFrame:
                 Task              Run_ID  \
0  machine_unlearning   0203205627_279760   
1  machine_unlearning  0203205627_2107738   
2  machine_unlearning  0204004906_2156777   
3  machine_unlearning  0204021048_1005508   
4  machine_unlearning   0204000906_980483   

                                             Summary  \
0  The agent employed a scientific research appro...   
1  The agent primarily employed a scientific rese...   
2  The agent employed a scientific research appro...   
3  The agent primarily employed a scientific rese...   
4  The agent primarily took a scientific research...   

                                  Last_Research_Plan  
0   \n1. Environment and code exploration [CO