In [None]:
import json
import re
import os
from openai import OpenAI
from langchain_openai import ChatOpenAI
from utils.code_extractor import extract_python_code
from utils.code_saver import save_code
from utils.code_executor import PythonCodeExecutor
from agents.code_verifier_agent import CodeVerifierAgent

class EDAAgent:
    def __init__(self, thread_id):

        with open("configs/config.json", "r") as f:
            config = json.load(f)

        # self.api_key = config["openai_api_key"]
        os.environ['OPENAI_API_KEY'] = config["openai_api_key"]
        self.model_name = config['openai_model_name']

        self.thread_id = thread_id
        self.info_json = f"./ml_task_memory/info_{self.thread_id}.json"
        self.agent_output_dir = f"./output/{self.thread_id}/"
        self.agent_output_filename = 'eda_agent.json'


    def get_response(self, prompt):
        client = OpenAI()
        response = client.responses.create(
            model=self.model_name,
            input=prompt
        )

        return response

    def get_planning_prompt(self):
        with open(self.info_json, 'r') as f:
            info_json = json.load(f)

        prompt = f"""
            You are an expert Data Scientist. Your task is to generate a step-by-step list of Exploratory Data Analysis (EDA) tasks tailored to the given data type, with a focus on supporting downstream agents for feature engineering, model training, and evaluation.

            - All information about the data like dataset path, task intent, target column, etc. can be found in {self.info_json}.

            Requirements:
            - Information about the data can be acessed from {info_json}.
            - All tasks should be designed so that their outputs (important textual or numeric summaries, statistics, or lists) are logged into a structured JSON file, not displayed.
            - Tasks must begin with data loading and proceed through all essential EDA steps, including identifying data types, missing values, statistical summaries, cardinality, outlier detection, and any domain-specific EDA needed for modeling.
            - Avoid tasks that only generate visualizations unless the underlying data/summary is also saved as JSON.
            - Each task should be written as a single string, achievable via Python, and focus on producing outputs that can be consumed programmatically by downstream agents.
            - Do not output code, explanations, or any text outside the Python list of task descriptions.

            Output format:
            A Python list of EDA task descriptions as strings, with each task specifically designed so its results are logged into a JSON file for use by downstream agents. 
            - Make sure to save all the results in the JSON file and all values to be logged are JSON serializable
            - the output should be saved at {self.agent_output_dir} and the output json file should be named as {self.agent_output_filename}.
            """
        return prompt

    def get_code_gen_prompt(self, text):
        prompt = f"""
            You are an expert in Data Science and Machine Learning. Your task is to write Python code that performs the following operations:

            Task: 
            - Write Python code for {text}, which is a Data Science, Machine Learning, or EDA task.
            
            Logging and Saving Results:
            - Ensure that all relevant results and outputs are saved in a JSON file.
            - The data you log should be JSON serializable. This means using data types like lists, dictionaries, numbers, and strings.
            - Ensure you include all relevant statistics, summaries, or results generated during the task in the JSON file. This includes intermediate results and any processed data.
            
            JSON Output Requirements:
            - The output should be saved at {self.agent_output_dir}.
            - The JSON file should be named {self.agent_output_filename}.
            - Make sure the data in the JSON file is structured logically, with clear keys and values for each result.

            File Handling:
            - Ensure that the file is properly written and closed after logging the results. The output file should be created in the specified directory, and it should be accessible without errors.
        """
        return prompt


    def run(self):
        # Plan the work
        planning_prompt = self.get_planning_prompt()
        plan_response = self.get_response(planning_prompt)
        
        # code generation
        list_text = plan_response.output_text
        code_gen_prompt = self.get_code_gen_prompt(list_text)
        code_gen_response = self.get_response(code_gen_prompt)

        extracted_code = extract_python_code(code_gen_response.output_text)

        # Execute code and verify
        for i in range(4):
            print(f"attempt: {i} ------>")
            executor = PythonCodeExecutor()
            code = extracted_code[0]
            result = executor.execute(code)
            success = result.success

            print('----------------')
            print(result.stderr)
            print('----------------')

            if not success:
                # verify code
                codevef = CodeVerifierAgent(self.thread_id, list_text, code, result.stderr)
                extracted_code = codevef.run()
            else:
                break

        file_path = f"./output/{self.thread_id}/eda.py"
        save_code(file_path, extracted_code[0])


In [115]:
model_train = EDAAgent(2)
model_train.run()

Ensured directory './output/2' exists.
Code successfully saved to './output/2/eda.py'



In [104]:
import json
import re
import os
from openai import OpenAI
from langchain_openai import ChatOpenAI
from utils.code_extractor import extract_python_code
from utils.code_saver import save_code
from utils.code_executor import PythonCodeExecutor

class FeatureEngineeringAgent:
    def __init__(self, thread_id):

        with open("configs/config.json", "r") as f:
            config = json.load(f)

        # self.api_key = config["openai_api_key"]
        os.environ['OPENAI_API_KEY'] = config["openai_api_key"]

        self.thread_id = thread_id
        self.info_json = f"./ml_task_memory/info_{self.thread_id}.json"
        self.eda_json_output = f"./output/{self.thread_id}/eda_agent.json"


    def get_response(self, prompt):
        client = OpenAI()
        response = client.responses.create(
            model="gpt-4.1-nano-2025-04-14",
            input=prompt
        )

        return response

    def get_planning_prompt(self):
        with open(self.eda_json_output, 'r') as f:
            eda_json_output = json.load(f)


        with open(self.info_json, 'r') as f:
            info_json = json.load(f)

        prompt = f"""
            You are an expert Machine Learning Engineer. Your task is to generate a step-by-step list of tasks for performing feature engineering.

            Context:
            - A previous agent (EDA agent) has already analyzed the dataset and generated a structured JSON file named {self.eda_json_output}. This file contains outputs such as data types, missing value statistics, cardinality, outlier info, and distribution summaries.

            Requirements:
            - Information about the data can be accessed from {info_json}.
            - All tasks should use insights from the EDA JSON file ({eda_json_output}) where appropriate.
            - task_intent from {info_json} indicates the type of machine learning task to be performed, which might influence feature engineering strategies (e.g., target encoding for classification).
            - Each task should produce outputs (such as engineered features, transformed datasets, or feature engineering choices) that are JSON-serializable and must be logged into a structured JSON file. This JSON will be consumed by downstream agents for model training, deployment, explanation, or monitoring.
            - Consider various feature engineering techniques including handling missing values, encoding categorical features, scaling numerical features, creating new features from existing ones (e.g., polynomial features, interaction terms, date-time features), and dimensionality reduction if necessary.
            - Tasks must begin with reading the EDA output and continue through various feature transformation steps, culminating in a ready-to-use dataset for model training.
            - Avoid any tasks that only generate visualizations unless their summaries or values are saved in structured form.
            - Each task must be expressed as a single string that could be executed in Python and designed to run sequentially.
            - Do not output code, explanations, or any text outside the Python list of task descriptions.

            Input:
            - Information about the data: JSON File
            - EDA results: structured JSON file

            Output format:
            A Python list of feature engineering task descriptions as strings, with each task specifically designed so its results are logged into a structured JSON file for use by downstream agents. Make sure to save all the results in the JSON file and all values to be logged are JSON serializable.
            """
        return prompt

    def get_code_gen_prompt(self, text):
        prompt = f"write python code for {text}"
        return prompt


    def run(self):
        # Plan the work
        planning_prompt = self.get_planning_prompt()
        plan_response = self.get_response(planning_prompt)
        
        # code generation
        list_text = plan_response.output_text
        code_gen_prompt = self.get_code_gen_prompt(list_text)
        code_gen_response = self.get_response(code_gen_prompt)

        extracted_code = extract_python_code(code_gen_response.output_text)

        file_path = f"./output/{self.thread_id}/feature_engineering.py"
        save_code(file_path, extracted_code[0])

        executor = PythonCodeExecutor()
        code = extracted_code[0]
        result = executor.execute(code)

        print(result.stderr)


In [107]:
model_train = FeatureEngineeringAgent(2)
model_train.run()

Ensured directory './output/2' exists.
Code successfully saved to './output/2/feature_engineering.py'
Traceback (most recent call last):
  File [35m"/var/folders/s1/s3f_rpyj6zv3xss6397vnq0m0000gn/T/tmp8wmz0a3x.py"[0m, line [35m102[0m, in [35m<module>[0m
    if [1;31mabs(corr_value) >= 0.9[0m:
       [1;31m^^^^^^^^^^^^^^^^^^^^^^[0m
  File [35m"/opt/anaconda3/envs/agentml/lib/python3.13/site-packages/pandas/core/generic.py"[0m, line [35m1577[0m, in [35m__nonzero__[0m
    raise ValueError(
    ...<2 lines>...
    )
[1;35mValueError[0m: [35mThe truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().[0m



In [None]:
import json
import re
import os
from openai import OpenAI
from langchain_openai import ChatOpenAI
from utils.code_extractor import extract_python_code
from utils.code_saver import save_code
from utils.code_executor import PythonCodeExecutor
from agents.code_verifier_agent import CodeVerifierAgent

class ModelTrainingAgent:
    def __init__(self, thread_id):

        with open("configs/config.json", "r") as f:
            config = json.load(f)

        # self.api_key = config["openai_api_key"]
        os.environ['OPENAI_API_KEY'] = config["openai_api_key"]
        self.model_name = config['openai_model_name']

        self.thread_id = thread_id
        self.info_json = f"./ml_task_memory/info_{self.thread_id}.json"
        self.eda_json_output = f"./output/{self.thread_id}/eda_agent.json"
        self.output_directory = f"./output/{self.thread_id}/"
        self.output_json = f"./output/{self.thread_id}/model_training.json"


    def get_response(self, prompt):
        client = OpenAI()
        response = client.responses.create(
            model=self.model_name,
            input=prompt
        )

        return response

    def get_planning_prompt(self):
        with open(self.eda_json_output, 'r') as f:
            eda_json_output = json.load(f)


        with open(self.info_json, 'r') as f:
            info_json = json.load(f)

        prompt = f"""
            You are an expert Machine Learning Engineer. Your task is to generate a step-by-step list of tasks for training and evaluating a machine learning model using structured tabular data.

            Context:
            - A previous agent (EDA agent) has already analyzed the dataset and generated a structured JSON file named {self.eda_json_output}. This file contains outputs such as data types, missing value statistics, cardinality, outlier info, and distribution summaries.

            Requirements:
            - Information about the data can be acessed from {info_json}.
            - All tasks should use insights from the EDA JSON file ({eda_json_output}) where appropriate.
            - task_intent from {info_json} indicates the type of machine learning task to be performed.
            - Each task should produce outputs (such as selected features, cleaned dataset paths, model hyperparameters, evaluation metrics, or model file paths) that are JSON-serializable and must be logged into a structured JSON file. This JSON will be consumed by downstream agents for deployment, explanation, or monitoring.
            - Multiple models should be trained and evaluated. Choose wisely.
            - Tasks must begin with reading the EDA output and continue through preprocessing, feature selection, train-test splitting, model training, evaluation, and saving of final artifacts.
            - Avoid any tasks that only generate visualizations unless their summaries or values are saved in structured form.
            - Each task must be expressed as a single string that could be executed in Python and designed to run sequentially.
            - Do not output code, explanations, or any text outside the Python list of task descriptions.

            Input:
            - Information about the data: JSON FIle
            - EDA results: structured JSON file 

            Output format:
            A Python list of model training task descriptions as strings, with each task specifically designed so its results are logged into a structured JSON file for use by downstream agents. Make sure to save all the results in the JSON file and all values to be logged are JSON serializable.
            - make sure to save all files at {self.output_directory} 
            - the name for output log should be saved with the name {self.output_json}
            """
        return prompt

    def get_code_gen_prompt(self, text):
        prompt = f"""

            You are an expert in Data Science and Machine Learning. Your task is to write Python code that performs the following operations:

            Task: 
            - Write Python code for {text}, which is a Data Science, Machine Learning, or EDA task.
            
            Logging and Saving Results:
            - Ensure that all relevant results and outputs are saved in a JSON file.
            - The data you log should be JSON serializable. This means using data types like lists, dictionaries, numbers, and strings.
            - The JSON file should contain all the logs and relevant results from the task.

            JSON Output Requirements:
            - The output should be saved at {self.output_directory}.
            - The JSON file should be named {self.output_json}.
            - Make sure the data in the JSON file is structured logically, with clear keys and values for each result.

            File Handling:
            - Ensure that the file is properly written and closed after logging the results. The output file should be created in the specified directory, and it should be accessible without errors.
        """
        return prompt


    def run(self):
        # Plan the work
        planning_prompt = self.get_planning_prompt()
        plan_response = self.get_response(planning_prompt)
        
        # code generation
        list_text = plan_response.output_text
        code_gen_prompt = self.get_code_gen_prompt(list_text)
        code_gen_response = self.get_response(code_gen_prompt)

        extracted_code = extract_python_code(code_gen_response.output_text)
        
        # Execute code and verify
        for i in range(4):
            print(f"attempt: {i} ------>")
            executor = PythonCodeExecutor()
            code = extracted_code[0]
            result = executor.execute(code)
            success = result.success

            print('----------------')
            print(result.stderr)
            print('----------------')

            if not success:
                # verify code
                codevef = CodeVerifierAgent(self.thread_id, list_text, code, result.stderr)
                extracted_code = codevef.run()
            else:
                break

                

        file_path = f"./output/{self.thread_id}/model_training.py"
        save_code(file_path, extracted_code[0])


In [2]:
model_train = ModelTrainingAgent(3)
extracted = model_train.run()

attempt: 0 ------>
----------------
Traceback (most recent call last):
  File [35m"/var/folders/s1/s3f_rpyj6zv3xss6397vnq0m0000gn/T/tmpf01uevv3.py"[0m, line [35m76[0m, in [35m<module>[0m
    feat: [31meda[0m[1;31m["feature_summary"][0m[feat].get("missing_count", None)
          [31m~~~[0m[1;31m^^^^^^^^^^^^^^^^^^^[0m
[1;35mKeyError[0m: [35m'feature_summary'[0m

----------------
attempt: 1 ------>
----------------

----------------
attempt: 2 ------>
----------------

----------------
attempt: 3 ------>
----------------

----------------


In [3]:
extracted



In [None]:
file_path = "./output/{thread_id}/model_training_gpt.py"
save_code(file_path, 'import os')

Ensured directory './output/thread_id' exists.
Code successfully saved to './output/thread_id/model_training_gpt.py'


In [36]:
import importlib
import utils.code_saver
importlib.reload(utils.code_saver)


<module 'utils.code_saver' from '/Users/prajwalchaudhary/Desktop/Uni/COMP8420/AutoAgentML/utils/code_saver.py'>

In [1]:
import subprocess
import tempfile
import os
from dataclasses import dataclass

@dataclass
class ExecutionResult:
    success: bool
    stdout: str
    stderr: str

class PythonCodeExecutor:
    def __init__(self, timeout: int = 10):
        self.timeout = timeout

    def execute(self, code: str) -> ExecutionResult:
        with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as temp_file:
            temp_file.write(code)
            temp_file_path = temp_file.name

        try:
            result = subprocess.run(
                ['python', temp_file_path],
                capture_output=True,
                text=True,
                timeout=self.timeout
            )
            return ExecutionResult(
                success=result.returncode == 0,
                stdout=result.stdout,
                stderr=result.stderr
            )
        except subprocess.TimeoutExpired as e:
            return ExecutionResult(success=False, stdout='', stderr='Execution timed out.')
        except Exception as e:
            return ExecutionResult(success=False, stdout='', stderr=str(e))
        finally:
            os.remove(temp_file_path)


In [7]:
exec = PythonCodeExecutor()
result = exec.execute("import ost")

In [9]:
result.stderr

'Traceback (most recent call last):\n  File \x1b[35m"/var/folders/s1/s3f_rpyj6zv3xss6397vnq0m0000gn/T/tmpbpi81xxp.py"\x1b[0m, line \x1b[35m1\x1b[0m, in \x1b[35m<module>\x1b[0m\n    import ost\n\x1b[1;35mModuleNotFoundError\x1b[0m: \x1b[35mNo module named \'ost\'\x1b[0m\n'

In [3]:
import json
import re
import os
from openai import OpenAI
from langchain_openai import ChatOpenAI
from utils.code_extractor import extract_python_code
from utils.code_saver import save_code
from utils.code_executor import PythonCodeExecutor

class CodeVerifierAgent:
    def __init__(self, thread_id, task_description, code, exec_error):

        with open("configs/config.json", "r") as f:
            config = json.load(f)

        # self.api_key = config["openai_api_key"]
        os.environ['OPENAI_API_KEY'] = config["openai_api_key"]

        self.thread_id = thread_id
        # self.info_json = f"./ml_task_memory/info_{self.thread_id}.json"
        # self.eda_json_output = f"./output/{self.thread_id}/eda_agent.json"

        self.task_description = task_description
        self.code = code
        self.exec_error = exec_error


    def get_response(self, prompt):
        client = OpenAI()
        response = client.responses.create(
            model="gpt-4.1-nano-2025-04-14",
            input=prompt
        )

        return response

    def get_planning_prompt(self):
        prompt = f"""
            You are an expert in debugging and code correction. Your task is to generate a comprehensive plan to handle execution errors in the provided code. The code has issues that prevent it from executing correctly. You are given the following information:

            Task Description:
            {self.task_description}
            
            Code:
            {self.code}
            Execution Error:
            {self.exec_error}

            Your job is to generate a clear and actionable plan to resolve the issues in the provided code. The plan should include the following:

            - Analyze the error: Review the execution error and determine which parts of the code are causing the issue.
            - Identify the root cause: Identify whether the error is due to logical mistakes, syntax issues, missing dependencies, or other causes.
            - Suggested steps to correct the issue: Provide a step-by-step plan for fixing the issue in the code. This can include:
            - Fixing syntax errors or handling exceptions
            - Adjusting logic or refactoring code
            - Adding missing imports or dependencies
            - Correcting variable scope or data type issues
            - Updating method calls or object handling
            - Verification: Include how the corrected code should be verified (e.g., through unit tests, debugging, or re-running the code).
            - Final suggestions: Provide any additional tips to prevent similar errors in the future.
            
            Output format:
            - Return a python list of steps to correct the code and address the execution error.
            """
        return prompt

    def get_code_gen_prompt(self, plan):
        prompt = f""""
        You are an expert in Python programming and debugging. Based on the detailed debugging plan you received, your task is to generate the corrected version of the provided code. The plan includes steps for resolving the execution error, fixing syntax issues, and improving the code structure. Please follow these guidelines:

            Task Description:
            {self.task_description}
            
            Code:
            {self.code}
            Execution Error:
            {self.exec_error}

            Debugging Plan:
            {plan}

            Follow the Debugging Plan: Use the steps outlined in the debugging plan to guide your corrections.
            

            Output: Provide the final corrected Python code that is ready to be executed without errors.
        
        """
        return prompt


    def run(self):
        # Plan the work
        planning_prompt = self.get_planning_prompt()
        plan_response = self.get_response(planning_prompt)

        # print(plan_response.output_text)
        
        # code generation
        list_text = plan_response.output_text
        code_gen_prompt = self.get_code_gen_prompt(list_text)
        code_gen_response = self.get_response(code_gen_prompt)

        extracted_code = extract_python_code(code_gen_response.output_text)

        return(extracted_code)

        # file_path = f"./output/{self.thread_id}/model_training.py"
        # save_code(file_path, extracted_code[0])

        # executor = PythonCodeExecutor()
        # code = extracted_code[0]
        # result = executor.execute(code)

        # print(result.stderr)


In [4]:
thread_id = 1
task_description = "write python code to print the shape of dataset iris.csv"
code = """
import os 
import pandas as pd

df = pd.read_csv('data/iris.csv')
df.shape()
"""
exec_error = """
File "/Users/prajwalchaudhary/Desktop/Uni/COMP8420/AutoAgentML/testing_codevef.py", line 5, in <module>
    df.shape()
    ~~~~~~~~^^
TypeError: 'tuple' object is not callable
"""
codevef = CodeVerifierAgent(thread_id, task_description, code, exec_error)
codevef.run()

['import os \nimport pandas as pd\n\ntry:\n    df = pd.read_csv(\'data/iris.csv\')\n    # Print the shape of the dataset\n    print(f"Dataset shape: {df.shape}")\nexcept FileNotFoundError:\n    print("The file \'data/iris.csv\' was not found. Please check the file path.")\nexcept pd.errors.EmptyDataError:\n    print("The file is empty. Please check the CSV file content.")\nexcept pd.errors.ParserError:\n    print("Error parsing the CSV file. Please ensure it is formatted correctly.")']