In [2]:
!pip install langchain langgraph openai python-dotenv



In [3]:
from typing import Dict, TypedDict, List, Tuple
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema import BaseMessage
from langchain.tools import Tool
from langchain_experimental.agents.agent_toolkits import create_csv_agent
from langchain.agents.agent_types import AgentType
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import ToolInvocation
import os
from dotenv import load_dotenv

In [4]:
# Load environment variables
load_dotenv()

True

In [5]:

# Define the state
class State(TypedDict):
    messages: List[BaseMessage]
    query: str
    current_csv: str
    results: Dict[str, str]

In [6]:
# Function to create an agent for a single CSV file
def create_single_csv_agent(file_path):
    return create_csv_agent(
        ChatOpenAI(temperature=0, model="gpt-4o"),
        file_path,
        verbose=True,
        agent_type=AgentType.OPENAI_FUNCTIONS,
        allow_dangerous_code=True
    )

In [7]:
# Function to create tools for multiple CSV files
def create_csv_tools(csv_files):
    tools = []
    for file in csv_files:
        agent = create_single_csv_agent(file)
        tool = Tool(
            name=f"Query_{file}",
            func=lambda q, file=file, agent=agent: agent.run(q),
            description=f"Useful for querying the {file} dataset"
        )
        tools.append(tool)
    return tools

In [8]:
# Node to decide which CSV to query
def decide_csv(state):
    messages = state['messages']
    query = state['query']
    
    prompt = ChatPromptTemplate.from_messages([
        ("human", "Based on the following query, which CSV file should I query? Options are: {csv_files}. Query: {query}"),
        ("human", "Respond with just the name of the CSV file, nothing else.")
    ])
    
    model = ChatOpenAI(model="gpt-4o",temperature=0)
    response = model.invoke(prompt.format(csv_files=", ".join(csv_files), query=query))
    
    return {"current_csv": response.content}

In [9]:
# Node to query the selected CSV
def query_csv(state):
    current_csv = state['current_csv']
    query = state['query']
    
    tool = next(tool for tool in tools if tool.name == f"Query_{current_csv}")
    result = tool.func(query)
    
    return {"results": {current_csv: result}}

In [10]:
# Node to decide if more querying is needed
def decide_if_done(state):
    results = state['results']
    query = state['query']
    
    prompt = ChatPromptTemplate.from_messages([
        ("human", "Based on the following query and results, do we need to query more CSV files? Query: {query}\nResults: {results}"),
        ("human", "Respond with either 'YES' if we need to query more files, or 'NO' if we have sufficient information.")
    ])
    
    model = ChatOpenAI(model="gpt-4o",temperature=0)
    response = model.invoke(prompt.format(query=query, results=results))
    
    return "decide_csv" if response.content.strip().upper() == "YES" else END

In [11]:
# Node to compile final answer
def compile_answer(state):
    results = state['results']
    query = state['query']
    
    prompt = ChatPromptTemplate.from_messages([
        ("human", "Based on the following query and results from multiple CSV files, provide a comprehensive answer. Query: {query}\nResults: {results}"),
        ("human", "Provide a detailed answer that synthesizes information from all relevant CSV files.")
    ])
    
    model = ChatOpenAI(model="gpt-4o",temperature=0)
    response = model.invoke(prompt.format(query=query, results=results))
    
    return {"messages": state['messages'] + [response]}

In [12]:
# Create the graph
def create_graph():
    workflow = StateGraph(State)
    
    workflow.add_node("decide_csv", decide_csv)
    workflow.add_node("query_csv", query_csv)
    workflow.add_node("decide_if_done", decide_if_done)
    workflow.add_node("compile_answer", compile_answer)
    
    workflow.set_entry_point("decide_csv")
    workflow.add_edge("decide_csv", "query_csv")
    workflow.add_edge("query_csv", "decide_if_done")
    workflow.add_edge("decide_if_done", "decide_csv")
    workflow.add_edge("decide_if_done", "compile_answer")
    
    workflow.add_edge("compile_answer", END)
    
    return workflow.compile()

In [17]:
def main():
    global csv_files, tools
    
    # List of CSV files
    csv_files = ["./data/Obstructive Lung Diseases - Copy.csv","./data/Fib.csv"]  # Add more CSV files as needed
    
    # Create tools for each CSV file
    tools = create_csv_tools(csv_files)
    
    # Create the graph
    graph = create_graph()
    
    while True:
        # Get user query
        query = input("Enter your question (or 'quit' to exit): ")
        
        if query.lower() == 'quit':
            break
        
        # Process the query
        result = graph.invoke({"messages": [], "query": query, "current_csv": "", "results": {}})
        
        # Print the final answer
        print("\nFinal Answer:")
        print(result['messages'][-1].content)

if __name__ == "__main__":
    main()

ValueError: Already found path for node 'decide_if_done'.
For multiple edges, use StateGraph with an annotated state key.

In [19]:
# Define the state
class State(TypedDict):
    messages: List[BaseMessage]
    query: str
    current_csv: str
    results: Dict[str, str]
    continue_querying: str

# Function to create an agent for a single CSV file
def create_single_csv_agent(file_path):
    return create_csv_agent(
        ChatOpenAI(temperature=0, model='gpt-4o'),
        file_path,
        verbose=True,
        agent_type=AgentType.OPENAI_FUNCTIONS,
        allow_dangerous_code=True,
        handle_parsing_errors=True
    )

# Function to create tools for multiple CSV files
def create_csv_tools(csv_files):
    tools = []
    for file in csv_files:
        agent = create_single_csv_agent(file)
        tool = Tool(
            name=f"Query_{file}",
            func=lambda q, file=file, agent=agent: agent.run(q),
            description=f"Useful for querying the {file} dataset"
        )
        tools.append(tool)
    return tools

# Node to decide which CSV to query
def decide_csv(state):
    messages = state['messages']
    query = state['query']
    
    prompt = ChatPromptTemplate.from_messages([
        ("human", "Based on the following query, which CSV file should I query? Options are: {csv_files}. Query: {query}"),
        ("human", "Respond with just the name of the CSV file, nothing else.")
    ])
    
    model = ChatOpenAI(model='gpt-4o', temperature=0)
    response = model.invoke(prompt.format(csv_files=", ".join(csv_files), query=query))
    
    return {"current_csv": response.content}

# Node to query the selected CSV
def query_csv(state):
    current_csv = state['current_csv']
    query = state['query']
    
    tool = next(tool for tool in tools if tool.name == f"Query_{current_csv}")
    result = tool.func(query)
    
    return {"results": {**state['results'], current_csv: result}}

# Node to decide if more querying is needed
def decide_if_done(state):
    results = state['results']
    query = state['query']
    
    prompt = ChatPromptTemplate.from_messages([
        ("human", "Based on the following query and results, do we need to query more CSV files? Query: {query}\nResults: {results}"),
        ("human", "Respond with either 'YES' if we need to query more files, or 'NO' if we have sufficient information.")
    ])
    
    model = ChatOpenAI(model='gpt-4o',temperature=0)
    response = model.invoke(prompt.format(query=query, results=results))
    
    return {"continue_querying": "YES" if response.content.strip().upper() == "YES" else "NO"}


# Node to compile final answer
def compile_answer(state):
    results = state['results']
    query = state['query']
    
    prompt = ChatPromptTemplate.from_messages([
        ("human", "Based on the following query and results from multiple CSV files, provide a comprehensive answer. Query: {query}\nResults: {results}"),
        ("human", "Provide a detailed answer that synthesizes information from all relevant CSV files.")
    ])
    
    model = ChatOpenAI(model='gpt-4o',temperature=0)
    response = model.invoke(prompt.format(query=query, results=results))
    
    return {"messages": state['messages'] + [response]}

# Create the graph
def create_graph():
    workflow = StateGraph(State)
    
    workflow.add_node("decide_csv", decide_csv)
    workflow.add_node("query_csv", query_csv)
    workflow.add_node("decide_if_done", decide_if_done)
    workflow.add_node("compile_answer", compile_answer)
    
    workflow.set_entry_point("decide_csv")
    workflow.add_edge("decide_csv", "query_csv")
    workflow.add_edge("query_csv", "decide_if_done")
    
    workflow.add_conditional_edges(
        "decide_if_done",
        lambda x: x["continue_querying"],
        {
            "YES": "decide_csv",
            "NO": "compile_answer"
        }
    )
        
    workflow.add_edge("compile_answer", END)
    
    return workflow.compile()

# Main function
def main():
    global csv_files, tools
    
    # List of CSV files
    # csv_files = ['./data/Fib.csv', './data/Obstructive Lung Diseases.csv','./data/Distribution Companies.csv','./data/Supermarket companies.csv']  # Add more CSV files as needed
    csv_files = ["./data/Obstructive Lung Diseases - Copy.csv","./data/Fib.csv"] 
    # Create tools for each CSV file
    tools = create_csv_tools(csv_files)
    
    # Create the graph
    graph = create_graph()
    
    while True:
        # Get user query
        query = input("Enter your question (or 'quit' to exit): ")
        
        if query.lower() == 'quit':
            break
        
        # Process the query
        result = graph.invoke({
            "messages": [],
            "query": query,
            "current_csv": "",
            "results": {},
            "continue_querying": "YES"  # Initialize with YES to start the process
        })
        
        # Print the final answer
        print("\nFinal Answer:")
        print(result['messages'][-1].content)

if __name__ == "__main__":
    main()





[1m> Entering new AgentExecutor chain...[0m


ValueError: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Could not parse tool input: {'arguments': "import pandas as pd\n\n# Sample data based on the provided dataframe\ndata = {\n    'Country': ['United States', 'China', 'Japan', 'Russian Federation'],\n    'Patient Population': [3, 3, 2, 1],\n    'Competition': [3, 3, 3, 2]\n}\n\n# Create the dataframe\ndf = pd.DataFrame(data)\n\n# Calculate the weighted score\ndf['Weighted Score'] = 0.5 * df['Patient Population'] + 0.5 * df['Competition']\n\n# Sort the dataframe based on the weighted score in descending order\ndf_sorted = df.sort_values(by='Weighted Score', ascending=False)\n\n# Extract the country list\ncountry_list = df_sorted['Country'].tolist()\ncountry_list", 'name': 'python_repl_ast'} because the `arguments` is not valid JSON.

### Chatgp Adjusted code

In [22]:
# Define the state
class State(TypedDict):
    messages: List[BaseMessage]
    query: str
    current_csv: str
    results: Dict[str, str]
    continue_querying: str

# Function to create an agent for a single CSV file
def create_single_csv_agent(file_path):
    return create_csv_agent(
        ChatOpenAI(temperature=0, model='gpt-4o'),
        file_path,
        verbose=True,
        agent_type=AgentType.OPENAI_FUNCTIONS,
        allow_dangerous_code=True,
        handle_parsing_errors=True
    )

# Function to create tools for multiple CSV files
def create_csv_tools(csv_files):
    tools = []
    for file in csv_files:
        agent = create_single_csv_agent(file)
        tool = Tool(
            name=f"Query_{file}",
            func=lambda q, file=file, agent=agent: agent.run(q),
            description=f"Useful for querying the {file} dataset"
        )
        tools.append(tool)
    return tools

# Node to decide which CSV to query
def decide_csv(state):
    messages = state['messages']
    query = state['query']
    
    prompt = ChatPromptTemplate.from_messages([
        ("human", "Based on the following query, which CSV file should I query? Options are: {csv_files}. Query: {query}"),
        ("human", "Respond with just the name of the CSV file, nothing else.")
    ])
    
    model = ChatOpenAI(model='gpt-4o', temperature=0)
    response = model.invoke(prompt.format(csv_files=", ".join(csv_files), query=query))
    
    return {"current_csv": response.content.strip()}

# Node to query the selected CSV
def query_csv(state):
    current_csv = state['current_csv']
    query = state['query']
    
    tool = next(tool for tool in tools if tool.name == f"Query_{current_csv}")
    result = tool.func(query)
    
    return {"results": {**state['results'], current_csv: result}}

# Node to decide if more querying is needed
def decide_if_done(state):
    results = state['results']
    query = state['query']
    
    prompt = ChatPromptTemplate.from_messages([
        ("human", "Based on the following query and results, do we need to query more CSV files? Query: {query}\nResults: {results}"),
        ("human", "Respond with either 'YES' if we need to query more files, or 'NO' if we have sufficient information.")
    ])
    
    model = ChatOpenAI(model='gpt-4o',temperature=0)
    response = model.invoke(prompt.format(query=query, results=results))
    
    return {"continue_querying": "YES" if response.content.strip().upper() == "YES" else "NO"}

# Node to compile final answer
def compile_answer(state):
    results = state['results']
    query = state['query']
    
    prompt = ChatPromptTemplate.from_messages([
        ("human", "Based on the following query and results from multiple CSV files, provide a comprehensive answer. Query: {query}\nResults: {results}"),
        ("human", "Provide a detailed answer that synthesizes information from all relevant CSV files.")
    ])
    
    model = ChatOpenAI(model='gpt-4o',temperature=0)
    response = model.invoke(prompt.format(query=query, results=results))
    
    return {"messages": state['messages'] + [response]}

# Create the graph
def create_graph():
    workflow = StateGraph(State)
    
    workflow.add_node("decide_csv", decide_csv)
    workflow.add_node("query_csv", query_csv)
    workflow.add_node("decide_if_done", decide_if_done)
    workflow.add_node("compile_answer", compile_answer)
    
    workflow.set_entry_point("decide_csv")
    workflow.add_edge("decide_csv", "query_csv")
    workflow.add_edge("query_csv", "decide_if_done")
    
    workflow.add_conditional_edges(
        "decide_if_done",
        lambda x: x["continue_querying"],
        {
            "YES": "decide_csv",
            "NO": "compile_answer"
        }
    )
        
    workflow.add_edge("compile_answer", END)
    
    return workflow.compile()

# Main function
def main():
    global csv_files, tools
    
    # List of CSV files
    csv_files = ["./data/Obstructive Lung Diseases - Copy.csv","./data/Fib.csv"] 
    # Create tools for each CSV file
    tools = create_csv_tools(csv_files)
    
    # Create the graph
    graph = create_graph()
    
    while True:
        # Get user query
        query = input("Enter your question (or 'quit' to exit): ")
        
        if query.lower() == 'quit':
            break
        
        # Process the query
        result = graph.invoke({
            "messages": [],
            "query": query,
            "current_csv": "",
            "results": {},
            "continue_querying": "YES"  # Initialize with YES to start the process
        })
        
        # Print the final answer
        print ("\n************************************************************************************\n")
        print("\nFinal Answer:")
        print(result['messages'][-1].content)

if __name__ == "__main__":
    main()






[1m> Entering new AgentExecutor chain...[0m


ValueError: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Could not parse tool input: {'arguments': 'query = """\nimport pandas as pd\n\n# Creating the dataframe\ndata = {\n    \'Unnamed: 0\': [\'Country\', \'United States\', \'China\', \'Japan\', \'Russian Federation\'],\n    \'Unnamed: 1\': [\'Driver\', \'Core\', \'Core\', \'Core\', \'Core\'],\n    \'Key Drivers - Key Questions\': [\'Patient Population\', \'3\', \'3\', \'2\', \'1\'],\n    \'Unnamed: 3\': [\'Competition\', \'3\', \'3\', \'3\', \'2\'],\n    \'Unnamed: 4\': [\'TAE/Country\', \'3\', \'3\', \'3\', \'3\'],\n    \'Unnamed: 5\': [\'Country Operations (Historical)\', \'2\', \'1\', \'2\', \'3\']\n}\n\ndf = pd.DataFrame(data)\n\n# Renaming columns for easier access\ndf.columns = [\'Country\', \'Driver\', \'Patient Population\', \'Competition\', \'TAE/Country\', \'Country Operations (Historical)\']\n\n# Dropping the first row which is a header row\ndf = df.drop(0)\n\n# Converting relevant columns to numeric\ndf[\'Patient Population\'] = pd.to_numeric(df[\'Patient Population\'])\ndf[\'Competition\'] = pd.to_numeric(df[\'Competition\'])\n\n# Calculating the weighted score\ndf[\'Weighted Score\'] = 0.5 * df[\'Patient Population\'] + 0.5 * df[\'Competition\']\n\n# Sorting by the weighted score\ndf_sorted = df.sort_values(by=\'Weighted Score\', ascending=False)\n\n# Extracting the country list\ncountry_list = df_sorted[\'Country\'].tolist()\ncountry_list\n"""\n', 'name': 'python_repl_ast'} because the `arguments` is not valid JSON.