# A pair of agents to generate code for data analysis

## Prepare the environment

In [None]:
import tempfile
import shutil
import os
import subprocess

from autogen import ConversableAgent
from autogen.coding import LocalCommandLineCodeExecutor

with open('openai.credential', 'r') as file:
    key = file.read()
MODEL = 'gpt-4o'

## Create a temporary folder to run the code
*Question:* Why not the the work directory, or sub-folder in it?

In [None]:
temp_dir = tempfile.TemporaryDirectory()
print(temp_dir.name)

subprocess.run(["cp", "population.csv", temp_dir.name+"/"])

os.listdir(temp_dir.name)

## Code executor. A proxy of command line, no LLM

In [None]:
executor = LocalCommandLineCodeExecutor(
    timeout=10,  # Timeout for each code execution in seconds.
    work_dir=temp_dir.name,  # Use the temporary directory to store the code files.
)

# Create an agent with code executor configuration.
code_executor = ConversableAgent(
    "code_executor_agent",
    llm_config=False,  # Turn off LLM for this agent.
    code_execution_config={"executor": executor},  # Use the local command line code executor.
    human_input_mode="ALWAYS",  # Always take human input for this agent for safety.
    is_termination_msg=lambda msg: "terminate" in msg["content"].lower(),
)

## Data scientist - the code writer

In [None]:
data_scientist_system_message = """You are a data scientist, and you can solve problems by writing python code. 
In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.
1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself.
2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly.
Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.
When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user.
If you want the user to save the code in a file before executing it, put # filename: <filename> inside the code block as the first line. Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user.
If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.
Reply 'TERMINATE' in the end when everything is done.
Some of the tasks are about analyzing a csv file. If you don't know the structure of the csv file, you may provide code to the users to extract the csv structure, and use the feedback from the users to generate the csv analysis code. 
"""

data_scientist = ConversableAgent(
    "data_scientist_agent",
    system_message = data_scientist_system_message,
    llm_config = {"config_list": [{"model": MODEL, "api_key": key}]},
    code_execution_config=False,  # Turn off code execution for this agent.
)

### Now give the task

In [None]:
chat_result = code_executor.initiate_chat(
    data_scientist,
    message="There is a csv file called population.csv, can you please find out the number of newborns each year for Norway, and store it in a new file result.csv",
)
os.listdir(temp_dir.name)