In [14]:
import pandas as pd
from langchain import hub
from dotenv import load_dotenv
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from langchain.agents.agent_types import AgentType
from langchain.agents import AgentExecutor, create_react_agent, Tool
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

In [15]:
load_dotenv("../.env")
LLM = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")
prompt = hub.pull("hwchase17/react")

In [16]:
print(prompt.template)

Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}


In [17]:
df = pd.read_csv("../data/dummy_data.csv")

In [18]:
df.head()

Unnamed: 0,countries,year,month,revenue
0,malaysia,2013,1,1
1,malaysia,2013,2,2
2,malaysia,2013,3,3
3,malaysia,2013,4,4
4,malaysia,2013,5,5


In [19]:
@tool
def multiply(first_int: int, second_int: int) -> int:
    """Multiply two integers together."""
    return first_int * second_int


@tool
def add(first_int: int, second_int: int) -> int:
    "Add two integers."
    return first_int + second_int


@tool
def divide(first_int: int, second_int: int) -> int:
    "Divide two integers."
    return first_int / second_int


@tool
def writer(input: str) -> int:
    "Write an answer to a file whenever user requested writing a file."
    with open("../data/result.txt", "w") as f:
        f.write(input)

    return f"File saved to '../data/result.txt'"

In [56]:
from langchain_experimental.tools import PythonREPLTool
from langchain.agents import create_openai_functions_agent

reporter_tools = [PythonREPLTool()]
base_prompt = hub.pull("langchain-ai/openai-functions-template")

instructions = """You are an agent designed to write and execute python code to write a report in PDF to "../data/result.pdf" using ReportLab package.
    First you need to give a proper title based on the content, then write the content to a PDF file.
    You have access to a python REPL, which you can use to execute python code.
    If you get an error, debug your code and try again.
    Return "I have written a report to `filepath`" after you have successfully finished executing the code. 
    You might know the answer without running any code, but you should still run the code to get the answer.
    If it does not seem like you can write code to answer the question, just return "I don't know" as the answer.
    """
reporter_prompt = base_prompt.partial(instructions=instructions)

reporter_agent = create_openai_functions_agent(
    ChatOpenAI(temperature=0), reporter_tools, reporter_prompt
)

reporter_agent_executor = AgentExecutor(
    agent=reporter_agent, tools=reporter_tools, verbose=True
)


@tool
def write_to_file(input) -> int:
    """useful when writing a report to a file."""

    return reporter_agent_executor.invoke({"input": input})

In [57]:
csv_agent = create_pandas_dataframe_agent(
    llm=LLM,
    df=[df],
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,  # ZERO_SHOT_REACT_DESCRIPTION
    verbose=True,
    return_intermediate_steps=True,
    prefix="""
    There will be multiple dfs, so access them with df1, df2 etc.
    Always call "df1.head()" to see what are the possible columns and values for you to query on.
    Process the whole df and provide accurate answer.
    if function_name == "python": function_name = "python_repl_ast"
    """,
    include_df_in_prompt=False,
)

In [58]:
print(csv_agent)

verbose=True agent=RunnableAgent(runnable=RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_log_to_str(x['intermediate_steps']))
})
| PromptTemplate(input_variables=['agent_scratchpad', 'input'], partial_variables={'tools': 'python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.', 'tool_names': 'python_repl_ast'}, template='\n    There will be multiple dfs, so access them with df1, df2 etc.\n    Always call "df1.head()" to see what are the possible columns and values for you to query on.\n    Process the whole df and provide accurate answer.\n    if function_name == "python": function_name = "python_repl_ast"\n    \n\n{tools}\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, s

In [59]:
tools = [
    # Tool.from_function(
    #     func=reporter_agent.invoke,
    #     name="reporter_agent",
    #     description=f"useful when writing a report to a file. Be sure to give the input in a dictionary under key `input`",
    # ),
    Tool.from_function(
        func=csv_agent.invoke,
        name="csv_agent",
        description=f"useful when retrieving the dataframe to process on",
    ),
    write_to_file,
]

In [60]:
agent = create_react_agent(LLM, tools, prompt)

In [61]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [62]:
a = agent_executor.invoke(
    {"input": "Write much revenue malaysia made in 2013 into a file"}
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo answer this question, I need to retrieve the data that contains Malaysia's revenue for the year 2013. Once I have the data, I can write the specific revenue figure into a file.

Action: csv_agent
Action Input: What was Malaysia's revenue in 2013?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to check the dataframe df1 to see if it contains the revenue information for Malaysia in 2013.
Action: python_repl_ast
Action Input: df1.head()[0m[36;1m[1;3m  countries  year  month  revenue
0  malaysia  2013      1        1
1  malaysia  2013      2        2
2  malaysia  2013      3        3
3  malaysia  2013      4        4
4  malaysia  2013      5        5[0m[32;1m[1;3mThe dataframe df1 contains columns for 'countries', 'year', 'month', and 'revenue'. I need to filter the dataframe for Malaysia's revenue in the year 2013 and sum up the revenue across all months.
Action: python_repl_ast
Action I

Python REPL can execute arbitrary code. Use with caution.


[32;1m[1;3m
Invoking: `Python_REPL` with `from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter

c = canvas.Canvas('../data/result.pdf', pagesize=letter)

c.setFont('Helvetica-Bold', 16)
c.drawString(100, 750, 'Malaysia Revenue Report')

c.setFont('Helvetica', 12)
content = "Malaysia's revenue in 2013 was 78."
c.drawString(100, 700, content)

c.save()`


[0m[36;1m[1;3m[0m[32;1m[1;3mI have written a report to `../data/result.pdf`[0m

[1m> Finished chain.[0m
[33;1m[1;3m{'input': "Malaysia's revenue in 2013 was 78.", 'output': 'I have written a report to `../data/result.pdf`'}[0m[32;1m[1;3mI have successfully written the revenue figure for Malaysia in 2013 into a file.

Final Answer: Malaysia's revenue in 2013 has been written into a file located at `../data/result.pdf`.[0m

[1m> Finished chain.[0m


In [63]:
a

{'input': 'Write much revenue malaysia made in 2013 into a file',
 'output': "Malaysia's revenue in 2013 has been written into a file located at `../data/result.pdf`."}