In [None]:
import os
import re
import json
import networkx as nx
import nx_arangodb as nxadb
import gradio as gr
from langchain.agents import AgentExecutor, create_react_agent
from adbnx_adapter import ADBNX_Adapter
from arango import ArangoClient
from langchain_openai import ChatOpenAI
from langchain_community.graphs import ArangoGraph
from langchain_community.chains.graph_qa.arangodb import ArangoGraphQAChain
from arango.exceptions import JWTAuthError
from langchain.tools import tool
from langchain_core.prompts import PromptTemplate

@tool
def use_aql(query: str) -> str:
  """This tool calls the ArangoGraphQAChain object, which enables you to
  translate a Natural Language Query into AQL, execute
  the query, and translate the result back into Natural Language.
  """

  # Straight forward query
  arango_query = f"""
    I have a NetworkX Graph called `G_adb`. It has the following schema: {arango_graph.schema}
    I have the following graph analysis query: {query}.
    Please provide the AQL syntax to generate the solution to the query.
    """

  # Set up the QA Chain
  llm = ChatOpenAI(temperature=0, model_name="gpt-4o")

  chain = ArangoGraphQAChain.from_llm(
    llm=llm,
    graph=arango_graph,
    verbose=False,
    allow_dangerous_requests=True
  )

  # Obtain the AQL and the result
  chain.return_aql_query = True
  chain.return_aql_result = False

  result = chain.invoke(arango_query)
  arangoGraphQAChain_response = str(result["result"])
  arangoGraphQAChain_aql = str(result["aql_query"])

  return "use_aql", arangoGraphQAChain_response

@tool
def use_networkx(query: str) -> str:
  """This tool is available to invoke a NetworkX Algorithm on
  the ArangoDB Graph. You are responsible for accepting the
  Natural Language Query, establishing which algorithm needs to
  be executed, executing the algorithm, and translating the results back
  to Natural Language, with respect to the original query.
  If the query (e.g traversals, shortest path, etc.) can be solved using the Arango Query Language, then do not use
  this tool.
  """

  networkx_query = f"""
    I have a NetworkX Graph called `G_adb`. It has the following schema: {arango_graph.schema}

    I have the following graph analysis query: {query}.

    Generate the Python Code required to answer the query using the `G_adb` object.
    Be very precise on the NetworkX algorithm you select to answer this query. Think step by step.
    Only assume that networkx is installed, and other base python dependencies. Please do not write
    any code that causes the iteration over all the nodes. Do not filter the collections prior to
    executing code. Speed is very important. Use the predetermined NetworkX algorithms to perform the
    queries - try to avoid writing unnecessary code. If a NetworkX function performs a desired algorithm,
    please use that function rather than writing out the code.

    Always set the last variable as `FINAL_RESULT`, which represents the answer to the original query.
    Only provide python code that I can directly execute via `exec()`. Do not provide any instructions.
    Make sure that `FINAL_RESULT` stores a short & consice answer. Avoid setting this variable to a long sequence.
    Your code:
    """

  # Get the code, clean it up and execute it
  llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
  python_code = llm.invoke(networkx_query).content
  cleaned_up_code = re.sub(r"^```python\n|```$", "", python_code, flags=re.MULTILINE).strip()

  print("Python code generated by NetworkX:")
  print(cleaned_up_code)

  # Global variables
  global_vars = {"G_adb": G_adb, "nx": nx, "nxadb": nxadb}
  local_vars = {}

  exception_raised = False
  exception_message = ""
  attempt = 1
  MAX_ATTEMPTS = 3

  try:
    exec(cleaned_up_code, global_vars, local_vars)
  except Exception as e:
    exception_message = e
    print(f"EXEC ERROR: {e}")
    exception_raised = True

  # Try to massage the error code
  while exception_raised and attempt <= MAX_ATTEMPTS:

      networkx_query = f"""
        I have a NetworkX Graph called `G_adb`. It has the following schema: {arango_graph.schema}
        I have the following graph analysis query: {query}.
        I just generated the following Python code:
        ---
        {python_code}
        ---
        But it generated the following error:
        {exception_message}
        Please generate the corrected code. Again, be very precise on the NetworkX algorithm you select to answer this query.
        Think step by step. Only assume that networkx is installed, and other base python dependencies.
        Please do not write any code that causes the iteration over all the nodes. Filter the collections as much as possible
        in order to create code that executes quickly. Speed is very important.Always set the last
        variable as `FINAL_RESULT`, which represents the answer to the original query. Only provide python code that I can
        directly execute via `exec()`. Do not provide any instructions. Make sure that `FINAL_RESULT` stores a short & consice answer.
        Avoid setting this variable to a long sequence.
        Your code:
        """

      # Get the code, clean it up and execute it
      python_code = llm.invoke(networkx_query).content
      cleaned_up_code = re.sub(r"^```python\n|```$", "", python_code, flags=re.MULTILINE).strip()

      print("Re-attempt #" + str(attempt))
      print("Python code generated by NetworkX:")
      print(cleaned_up_code)

      # Global variables
      global_vars = {"G_adb": G_adb, "nx": nx, "nxadb": nxadb}
      local_vars = {}

      try:
        exec(cleaned_up_code, global_vars, local_vars)
        exception_raised = False
      except Exception as e:
        exception_message = e
        print(f"EXEC ERROR: {e}")
        attempt = attempt + 1

  if not exception_raised:
    FINAL_RESULT = local_vars["FINAL_RESULT"]
    return "use_networkx", f"FINAL_RESULT: {FINAL_RESULT}"
  else:
    return "use_networkx", "The Python code produced by NetworkX could not be executed properly."

@tool
def use_both(query: str) -> str:
  """This tool is available to invoke both AQL and a NetworkX Algorithm on
  the ArangoDB Graph. You are responsible for accepting the
  Natural Language Query, establishing which algorithm needs to
  be executed, executing the algorithm, and translating the results back
  to Natural Language, with respect to the original query.
  If the query (e.g traversals, shortest path, etc.) can be solved using the Arango Query Language, then do not use
  this tool. This tool should only be used when both AQL and NetworkX are necessary to solve the query.
  Please note that this tool can only solve queries that require ONE AQL call and ONE NetworkX call. It can have
  two, and only two, steps. If there are more steps necessary, then this tool cannot be used.
  """

  # Break down the query into two steps
  breakdown_prompt = f"""
  This query: {query}
  will use both AQL and NetworkX to complete. You will return the following four variables. Your response will only be
  these four variables. Please do not add anything else to your response. You will not include the thought process behind
  how you obtained the variable. You will only include the four variables in your answer.

  aql_first: boolean
  This boolean will be either true or false. It will be true if the AQL query should be executed first or false if the
  NetworkX call should be execute first. Please only return true or false for the variable aql_first.

  dependent_queries: boolean
  This boolean will be either true or false. It will be true if the second query is dependent on the result of the first query.
  An example would be if the second query needs a value obtained in the first query. It will be false if the second query is
  wholly independent on the first query. An example would be a count of items that is not based on any result obtained in the
  first query. Please only return true or false for the variable dependent_queries.

  aql_query: string
  This string will contain the portion of the query that will be executed with AQL. It will only contain the portion
  of the query pertaining to AQL. Please do not set this variable to the actual AQL to execute. I only need the portion
  of {query} that contains the portion that can be executed via AQL.

  networkx_query: string
  This string will contain the portion of the query that will be executed with NetworkX. It will only contain the portion
  of the query pertaining to NetworkX. Please do not set this variable to the actual NetworkX code to execute. I only need the portion
  of {query} that contains the portion that pertains to NetworkX.

  The response will be in this format, and only this format. It will be a JSON formatted like this:
  {{\"aql_first\": aql_first, \"dependent_queries\" : dependent_queries, \"aql_query\": aql_query, \"networkx_query\": networkx_query}}
  It is very important that your response is formatted as described above. Please do not deviate from this format.
  """

  llm = ChatOpenAI(temperature=0, model_name="gpt-4o")

  result = llm.invoke(breakdown_prompt).content
  result = re.sub(r"^```json\n|```$", "", result, flags=re.MULTILINE).strip()
  print(result)

  # Find the three variables
  result_json = json.loads(result)
  print(result_json)

  if len(result_json) == 4:

    aql_first = result_json['aql_first']
    dependent_queries = result_json['dependent_queries']
    aql_query = result_json['aql_query']
    networkx_query = result_json['networkx_query']

    use_both_tool_summary = "use_both\n"
    if aql_first:
      print("AQL will go first? YES")
      use_both_tool_summary = use_both_tool_summary + "AQL will go first? YES\n"
    else:
      print("AQL will go first? NO")
      use_both_tool_summary = use_both_tool_summary + "AQL will go first? NO\n"

    if dependent_queries:
      print("Dependent queries? YES")
      use_both_tool_summary = use_both_tool_summary + "Dependent queries? YES\n"
    else:
      print("Dependent queries? NO")
      use_both_tool_summary = use_both_tool_summary + "Dependent queries? NO\n"

    print("AQL query: " + aql_query)
    print("NetworkX query: " + networkx_query)

    use_both_tool_summary = use_both_tool_summary + "AQL query: " + aql_query + "\n"
    use_both_tool_summary = use_both_tool_summary + "NetworkX query: " + networkx_query

    if aql_first:
      aql_result = use_aql(aql_query)
      networkx_query_enhanced = networkx_query
      if dependent_queries:
        networkx_query_enhanced = f"""
        Use the result from the AQL query above: {aql_result}
        to run this query: {networkx_query}
        """

      return use_both_tool_summary, use_networkx(networkx_query_enhanced)

    else:
      networkx_result = use_networkx(networkx_query)
      aql_query_enhanced = aql_query
      if dependent_queries:
        aql_query_enhanced = f"""
        Use the result from the NetworkX query above: {networkx_result}
        to run this query: {aql_query}
        """

      return use_both_tool_summary, use_aql(aql_query_enhanced)

  else:
    return use_both_tool_summary, "The use_both tool could not obtain the correct answer to your query. Please try to formulate it in a different way."

@tool
def none_apply(query: str) -> str:
  """This tool is only used when the query does not apply to the Graph or its data, or when the query would
  require more than one AQL query and more than one NetworkX algorithm. These queries would be outside the
  realm of possibility for the program and the user must be made aware of this.
  """

  return "none_apply", "The query either 1) does not pertain to the Graph or its data or 2) requires more than one AQL and more than one NetworkX call to complete. Please try again."

# Main program
os.environ["OPENAI_API_KEY"] = "<<YOUR OPENAI API KEY HERE>>"

db = ArangoClient(hosts="https://e9a126af6ccc.arangodb.cloud:8529").db(username="root", password="0fUUgeTSDRCIWtZc7bL6", verify=True)
arango_graph = ArangoGraph(db)

adbnx_adapter = ADBNX_Adapter(db)
graph_name = "SYNTHEA_P100"
G_adb = nxadb.Graph(name=graph_name, db=db)

tools = [use_aql, use_networkx, use_both, none_apply]
tool_names = ["use_aql", "use_networkx", "use_both", "none_apply"]

def query_graph(query: str):

    template = """Answer the following questions as best you can. You have access to the following tools:
    {tools}
    Use the following format:
    Question: the input question you must answer
    Thought: you should always think about what to do
    Action: the action to take, should be one of [{tool_names}]
    Action Input: the input to the action
    Observation: the result of the action
    ... (this Thought/Action/Action Input/Observation can repeat N times)
    Thought: I now know the final answer
    Final Answer: the final answer to the original input question
    Begin!
    Question: {input}
    Thought:{agent_scratchpad}"""

    prompt = PromptTemplate.from_template(template)

    llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
    agent = create_react_agent(llm, tools, prompt)
    agent_executor = AgentExecutor.from_agent_and_tools(agent, tools)
    final_state = agent.invoke({
      "input" : query,
      "intermediate_steps": []
    })

    print(final_state)

    if not hasattr(final_state, 'tool'):
      return none_apply(query)
    else:
      if final_state.tool == "use_aql":
        return use_aql(query)
      elif final_state.tool == "use_networkx":
        return use_networkx(query)
      elif final_state.tool == "use_both":
        return use_both(query)
      else:
        return none_apply(query)

gr.Interface(fn=query_graph, inputs=gr.Textbox(label="Enter your query here based on the SYNTHEA_P100 dataset"), outputs=[gr.Textbox(label="Tool used"), gr.Textbox(label="Response")], title="Arango RX").launch(debug=True, share=True)

[2025/02/19 23:17:43 +0000] [2514] [INFO] - adbnx_adapter: Instantiated ADBNX_Adapter with database '_system'
INFO:adbnx_adapter:Instantiated ADBNX_Adapter with database '_system'
[23:17:43 +0000] [INFO]: Graph 'SYNTHEA_P100' exists.
INFO:nx_arangodb:Graph 'SYNTHEA_P100' exists.
[23:17:44 +0000] [INFO]: Default node type set to 'allergies'
INFO:nx_arangodb:Default node type set to 'allergies'


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://276582ce6645b69e71.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


tool='use_aql' tool_input='FOR allergy IN allergies COLLECT name = allergy.name WITH COUNT INTO frequency SORT frequency DESC LIMIT 1 RETURN {name, frequency}' log='To determine the most common allergy, I need to analyze data that includes information about various allergies and their frequencies. This type of query can be addressed using the Arango Query Language (AQL) to aggregate and find the most frequent allergy. \n\nAction: use_aql\nAction Input: "FOR allergy IN allergies COLLECT name = allergy.name WITH COUNT INTO frequency SORT frequency DESC LIMIT 1 RETURN {name, frequency}"'
tool='use_both' tool_input="Find the shortest path from patient 'patients/01fd0320-1260-3613-95fb-7703f53e6a08' to patient 'patients/0dec9290-26b0-0bac-1ef0-f8b3c6cf18ba'. Also, find the total number of patients in the graph." log='To answer the question, I need to perform two tasks:\n\n1. Find the shortest path between two patients in the graph.\n2. Find the total number of patients in the graph.\n\nThe 

In [None]:
!pip install networkx nx_arangodb adbnx_adapter python-arango langchain langchain_community langchain_openai gradio

Collecting nx_arangodb
  Downloading nx_arangodb-1.3.0-py3-none-any.whl.metadata (9.3 kB)
Collecting adbnx_adapter
  Downloading adbnx_adapter-5.0.6-py3-none-any.whl.metadata (21 kB)
Collecting python-arango
  Downloading python_arango-8.1.4-py3-none-any.whl.metadata (8.3 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.17-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain_openai
  Downloading langchain_openai-0.3.6-py3-none-any.whl.metadata (2.3 kB)
Collecting gradio
  Downloading gradio-5.16.1-py3-none-any.whl.metadata (16 kB)
Collecting networkx
  Downloading networkx-3.4-py3-none-any.whl.metadata (6.3 kB)
Collecting phenolrs~=0.5 (from nx_arangodb)
  Downloading phenolrs-0.5.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_communi