<a href="https://colab.research.google.com/github/tomasonjo/blogs/blob/master/llm/graphreader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install --quiet neo4j langchain-community langchain-core langchain-openai langchain-text-splitters tiktoken wikipedia

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/52.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.0/52.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/362.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.9/362.9 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.9/318.9 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
import os
import asyncio
import getpass
from langchain_core.prompts import ChatPromptTemplate

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector

from typing import List, Dict

from hashlib import md5

In [3]:
os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "password"

graph = Neo4jGraph(refresh_schema=False)

graph.query("""CREATE CONSTRAINT IF NOT EXISTS FOR (c:Chunk) REQUIRE c.id IS UNIQUE""")
graph.query("""CREATE CONSTRAINT IF NOT EXISTS FOR (c:AtomicFact) REQUIRE c.id IS UNIQUE""")
graph.query("""CREATE CONSTRAINT IF NOT EXISTS FOR (c:KeyElement) REQUIRE c.id IS UNIQUE""")

[]

In [4]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

OpenAI API Key: ········


In [5]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(doc_content_chars_max=10000))

text = wikipedia.run("Joan of Arc")

In [6]:
len(text)

6581

In [7]:
from langchain_text_splitters import TokenTextSplitter

In [8]:
construction_system = """
You are now an intelligent assistant tasked with meticulously extracting both key elements and
atomic facts from a long text.
1. Key Elements: The essential nouns (e.g., characters, times, events, places, numbers), verbs (e.g.,
actions), and adjectives (e.g., states, feelings) that are pivotal to the text’s narrative.
2. Atomic Facts: The smallest, indivisible facts, presented as concise sentences. These include
propositions, theories, existences, concepts, and implicit elements like logic, causality, event
sequences, interpersonal relationships, timelines, etc.
Requirements:
#####
1. Ensure that all identified key elements are reflected within the corresponding atomic facts.
2. You should extract key elements and atomic facts comprehensively, especially those that are
important and potentially query-worthy and do not leave out details.
3. Whenever applicable, replace pronouns with their specific noun counterparts (e.g., change I, He,
She to actual names).
4. Ensure that the key elements and atomic facts you extract are presented in the same language as
the original text (e.g., English or Chinese).
"""

In [9]:
construction_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            construction_system,
        ),
        (
            "human",
            (
                "Use the given format to extract information from the "
                "following input: {input}"
            ),
        ),
    ]
)

In [94]:
class AtomicFact(BaseModel):
    key_elements: List[str] = Field(description="""The essential nouns (e.g., characters, times, events, places, numbers), verbs (e.g.,
actions), and adjectives (e.g., states, feelings) that are pivotal to the atomic fact's narrative.""")
    atomic_fact: str = Field(description="""The smallest, indivisible facts, presented as concise sentences. These include
propositions, theories, existences, concepts, and implicit elements like logic, causality, event
sequences, interpersonal relationships, timelines, etc.""")

class Extraction(BaseModel):
    atomic_facts: List[AtomicFact] = Field(description="List of atomic facts")


model = ChatOpenAI(model="gpt-4-turbo", temperature=0.3)
structured_llm = model.with_structured_output(Extraction)

In [95]:
construction_chain = construction_prompt | structured_llm

In [96]:
result = construction_chain.invoke({"input":text})
#print(result)

In [97]:
import_query = """
UNWIND $data AS row
MERGE (c:Chunk {id: row.chunk_id})
SET c.text = row.chunk_text,
    c.index = row.index,
    c.document_name = row.document_name
WITH c, row
UNWIND row.atomic_facts AS af
MERGE (a:AtomicFact {id: af.id})
SET a.text = af.atomic_fact
MERGE (c)-[:HAS_ATOMIC_FACT]->(a)
WITH c, a, af
UNWIND af.key_elements AS ke
MERGE (k:KeyElement {id: ke})
MERGE (a)-[:HAS_KEY_ELEMENT]->(k)
"""

def encode_md5(text):
    return md5(text.encode("utf-8")).hexdigest()

In [98]:
# Paper used 2k token size
async def process_document(text, document_name, chunk_size=2000, chunk_overlap=200):
    text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    texts = text_splitter.split_text(text)
    print(f"Total text chunks: {len(texts)}")
    tasks = [
        asyncio.create_task(construction_chain.ainvoke({"input":chunk_text}))
        for index, chunk_text in enumerate(texts)
    ]
    results = await asyncio.gather(*tasks)
    docs = [el.dict() for el in results]
    for index, doc in enumerate(docs):
        doc['chunk_id'] = encode_md5(texts[index])
        doc['chunk_text'] = texts[index]
        doc['index'] = index
        for af in doc["atomic_facts"]:
            af["id"] = encode_md5(af["atomic_fact"])
    graph.query(import_query, 
            params={"data": docs})
    graph.query("""MATCH (c:Chunk) WHERE c.document_name = $document_name
WITH c ORDER BY c.index WITH collect(c) AS nodes CALL apoc.nodes.link(nodes, 'NEXT')""",
           params={"document_name":document_name})

In [99]:
await process_document(text, "Joan of Arc", chunk_size=500, chunk_overlap=100)

Total text chunks: 4


# Agent part

In [100]:
rational_plan_system = """As an intelligent assistant, your primary objective is to answer the question by gathering
supporting facts from a given article. To facilitate this objective, the first step is to make
a rational plan based on the question. This plan should outline the step-by-step process to
resolve the question and specify the key information required to formulate a comprehensive answer.
Example:
#####
User: Who had a longer tennis career, Danny or Alice?
Assistant: In order to answer this question, we first need to find the length of Danny’s
and Alice’s tennis careers, such as the start and retirement of their careers, and then compare the
two.
#####
Please strictly follow the above format. Let’s begin."""

rational_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            rational_plan_system,
        ),
        (
            "human",
            (
                "{input}"
            ),
        ),
    ]
)

rational_chain = rational_prompt | model | StrOutputParser()

In [101]:
initial_node_system = """
As an intelligent assistant, your primary objective is to answer questions based on information
contained within a text. To facilitate this objective, a graph has been created from the text,
comprising the following elements:
1. Text Chunks: Chunks of the original text.
2. Atomic Facts: Smallest, indivisible truths extracted from text chunks.
3. Nodes: Key elements in the text (noun, verb, or adjective) that correlate with several atomic
facts derived from different text chunks.
Your current task is to check a list of nodes, with the objective of selecting the most relevant initial nodes from the graph to efficiently answer the question. You are given the question, the
rational plan, and a list of node key elements. These initial nodes are crucial because they are the
starting point for searching for relevant information.
Requirements:
#####
1. Once you have selected a starting node, assess its relevance to the potential answer by assigning
a score between 0 and 100. A score of 100 implies a high likelihood of relevance to the answer,
whereas a score of 0 suggests minimal relevance.
2. Present each chosen starting node in a separate line, accompanied by its relevance score. Format
each line as follows: Node: [Key Element of Node], Score: [Relevance Score].
3. Please select at least 10 starting nodes, ensuring they are non-repetitive and diverse.
4. In the user’s input, each line constitutes a node. When selecting the starting node, please make
your choice from those provided, and refrain from fabricating your own. The nodes you output
must correspond exactly to the nodes given by the user, with identical wording.
Finally, I emphasize again that you need to select the starting node from the given Nodes, and
it must be consistent with the words of the node you selected. Please strictly follow the above
format. Let’s begin.
"""

initial_node_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            initial_node_system,
        ),
        (
            "human",
            (
                """Question: {question}
Plan: {rational_plan}
Nodes: {nodes}"""
            ),
        ),
    ]
)

class Node(BaseModel):
    key_element: str = Field(description="""Key element or name of a relevant node""")
    score: int = Field(description="""Relevance to the potential answer by assigning
a score between 0 and 100. A score of 100 implies a high likelihood of relevance to the answer,
whereas a score of 0 suggests minimal relevance.""")

class InitialNodes(BaseModel):
    initial_nodes: List[Node] = Field(description="List of relevant nodes to the question and plan")

initial_nodes_chain = initial_node_prompt | model.with_structured_output(InitialNodes) 

In [102]:
# Just sending all nodes sounds like a not scalable solution, 
# so we will use vector index to retrieve top 50 most relevant nodes

nodes_vector = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(model="text-embedding-3-small"),
    index_name="keyelements",
    node_label="KeyElement",
    text_node_properties=["id"],
    embedding_node_property="embedding",
    retrieval_query="RETURN node.id AS text, score, {} AS metadata"
)



In [103]:
def get_potential_nodes(question: str) -> List[str]:
    data = nodes_vector.similarity_search(question, k=50)
    return [el.page_content for el in data]

In [104]:
nodes = get_potential_nodes(question)
print(nodes)

['Joan of Arc', 'The Passion of Joan of Arc', 'siege of Orléans', 'Loire Campaign', 'Hundred Years War', "Hundred Years' War", 'Joan', 'French army', 'Charles VII', 'King of France', 'Burgundian troops', 'French morale', 'French nationalists', 'burned at the stake', 'Archbishop of Paris', 'Saint Margaret', 'Burgundians', 'French Revolution', 'Saint Catherine', 'besieged', 'siege', 'northeast France', 'French', 'French nation', 'Bishop Pierre Cauchon', 'English domination', 'Compiègne', 'military leader', 'April 1429', 'Rouen prison', 'France', "court's faith", 'canonized', 'early feminist', 'martyr', 'Renée Jeanne Falconetti', 'Paris', 'patron saint', 'medieval architecture', 'Faye Dunaway', '30 May 1431', 'iègne', 'historical icons', 'inquisitorial court', 'Milla Jovovich', 'unsuccessful', 'Roman Catholic Church', 'La Charité', 'nineteen', 'silent historical film']


In [105]:
initial_nodes = initial_nodes_chain.invoke({"question":question, "rational_plan":rational_plan, "nodes":nodes})
# paper uses 5 initial nodes
atomic_facts_check_queue = sorted(initial_nodes.initial_nodes, key=lambda node: node.score, reverse=True)[:5]
print(atomic_facts_check_queue)

[Node(key_element='Joan of Arc', score=100), Node(key_element='military leader', score=95), Node(key_element='siege of Orléans', score=90), Node(key_element='Loire Campaign', score=85), Node(key_element='unsuccessful', score=85)]


In [106]:
def get_atomic_facts(key_elements: List[str]) -> List[Dict[str, str]]:
    data = graph.query("""
    MATCH (k:KeyElement)<-[:HAS_KEY_ELEMENT]-(fact)<-[:HAS_ATOMIC_FACT]-(chunk)
    WHERE k.id IN $key_elements
    RETURN distinct chunk.id AS chunk_id, fact.text AS text
    """, params={"key_elements": key_elements})
    return data

In [107]:
atomic_fact_check_system = """As an intelligent assistant, your primary objective is to answer questions based on information
contained within a text. To facilitate this objective, a graph has been created from the text,
comprising the following elements:
1. Text Chunks: Chunks of the original text.
2. Atomic Facts: Smallest, indivisible truths extracted from text chunks.
3. Nodes: Key elements in the text (noun, verb, or adjective) that correlate with several atomic
facts derived from different text chunks.
Your current task is to check a node and its associated atomic facts, with the objective of
determining whether to proceed with reviewing the text chunk corresponding to these atomic facts.
Given the question, the rational plan, previous actions, notebook content, and the current node’s
atomic facts and their corresponding chunk IDs, you have the following Action Options:
#####
1. read_chunk(List[ID]): Choose this action if you believe that a text chunk linked to an atomic
fact may hold the necessary information to answer the question. This will allow you to access
more complete and detailed information.
2. stop_and_read_neighbor(): Choose this action if you ascertain that all text chunks lack valuable
information.
#####
Strategy:
#####
1. Reflect on previous actions and prevent redundant revisiting nodes or chunks.
2. You can choose to read multiple text chunks at the same time.
3. Atomic facts only cover part of the information in the text chunk, so even if you feel that the
atomic facts are slightly relevant to the question, please try to read the text chunk to get more
complete information.
#####
Finally, it is emphasized again that even if the atomic fact is only slightly relevant to the
question, you should still look at the text chunk to avoid missing information. You should only
choose stop_and_read_neighbor() when you are very sure that the given text chunk is irrelevant to
the question. Please strictly follow the above format. Let’s begin.
"""

class AtomicFactOutput(BaseModel):
    updated_notebook: str = Field(description="""First, combine your current notebook with new insights and findings about
the question from current atomic facts, creating a more complete version of the notebook that
contains more valid information.""")
    rational_next_action: str = Field(description="""Based on the given question, the rational plan, previous actions, and
notebook content, analyze how to choose the next action.""")
    chosen_action: str = Field(description="""1. read_chunk(List[ID]): Choose this action if you believe that a text chunk linked to an atomic
fact may hold the necessary information to answer the question. This will allow you to access
more complete and detailed information.
2. stop_and_read_neighbor(): Choose this action if you ascertain that all text chunks lack valuable
information.""")

atomic_fact_check_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            atomic_fact_check_system,
        ),
        (
            "human",
            (
                """Question: {question}
Plan: {rational_plan}
Previous actions: {previous_actions}
Notebook: {notebook}
Atomic facts: {atomic_facts}"""
            ),
        ),
    ]
)

atomic_fact_chain = atomic_fact_check_prompt | model.with_structured_output(AtomicFactOutput)

In [108]:
import re
import ast

def parse_function(input_str):
    # Regular expression to capture the function name and arguments
    pattern = r'(\w+)(?:\((.*)\))?'
    
    match = re.match(pattern, input_str)
    if match:
        function_name = match.group(1)  # Extract the function name
        raw_arguments = match.group(2)  # Extract the arguments as a string        
        # If there are arguments, attempt to parse them
        arguments = []
        if raw_arguments:
            try:
                # Use ast.literal_eval to safely evaluate and convert the arguments
                parsed_args = ast.literal_eval(f'({raw_arguments})')  # Wrap in tuple parentheses
                # Ensure it's always treated as a tuple even with a single argument
                arguments = list(parsed_args) if isinstance(parsed_args, tuple) else [parsed_args]
            except (ValueError, SyntaxError):
                # In case of failure to parse, return the raw argument string
                arguments = [raw_arguments.strip()]
        

        return {
            'function_name': function_name,
            'arguments': arguments
        }
    else:
        return None

In [109]:
def get_chunks(chunk_ids: List[str]) -> List[Dict[str, str]]:
    data = graph.query("""
    MATCH (c:Chunk)
    WHERE c.id IN $chunk_ids
    RETURN distinct c.id AS chunk_id, c.text AS text
    """, params={"chunk_ids": chunk_ids})
    return data

In [110]:
def get_neighbors(nodes):
    graph_query("""
    MATCH (k:KeyElement)<-[:HAS_KEY_ELEMENT]-()-[:HAS_KEY_ELEMENT]->(neighbor)
    WHERE k.id IN $nodes AND NOT neighbor.id IN $nodes
    RETURN neighbor.id AS key_element, count(*) AS count
    ORDER BY count DESC LIMIT 25
    """)    

In [111]:
chunk_read_system_prompt = """As an intelligent assistant, your primary objective is to answer questions based on information
within a text. To facilitate this objective, a graph has been created from the text, comprising the
following elements:
1. Text Chunks: Segments of the original text.
2. Atomic Facts: Smallest, indivisible truths extracted from text chunks.
3. Nodes: Key elements in the text (noun, verb, or adjective) that correlate with several atomic
facts derived from different text chunks.
Your current task is to assess a specific text chunk and determine whether the available information
suffices to answer the question. Given the question, rational plan, previous actions, notebook
content, and the current text chunk, you have the following Action Options:
#####
1. search_more(): Choose this action if you think that the essential information necessary to
answer the question is still lacking.
2. read_previous_chunk(): Choose this action if you feel that the previous text chunk contains
valuable information for answering the question.
3. read_subsequent_chunk(): Choose this action if you feel that the subsequent text chunk contains
valuable information for answering the question.
4. termination(): Choose this action if you believe that the information you have currently obtained
is enough to answer the question. This will allow you to summarize the gathered information and
provide a final answer.
#####
Strategy:
#####
1. Reflect on previous actions and prevent redundant revisiting of nodes or chunks.
2. You can only choose one action.
#####
Please strictly follow the above format. Let’s begin
"""

class ChunkOutput(BaseModel):
    updated_notebook: str = Field(description="""First, combine your previous notes with new insights and findings about the
question from current text chunks, creating a more complete version of the notebook that contains
more valid information.""")
    rational_next_move: str = Field(description="""Based on the given question, rational plan, previous actions, and
notebook content, analyze how to choose the next action.""")
    chosen_action: str = Field(description="""1. search_more(): Choose this action if you think that the essential information necessary to
answer the question is still lacking.
2. read_previous_chunk(): Choose this action if you feel that the previous text chunk contains
valuable information for answering the question.
3. read_subsequent_chunk(): Choose this action if you feel that the subsequent text chunk contains
valuable information for answering the question.
4. termination(): Choose this action if you believe that the information you have currently obtained
is enough to answer the question. This will allow you to summarize the gathered information and
provide a final answer.""")

chunk_read_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            chunk_read_system_prompt,
        ),
        (
            "human",
            (
                """Question: {question}
Plan: {rational_plan}
Previous actions: {previous_actions}
Notebook: {notebook}
Chunk: {chunk}"""
            ),
        ),
    ]
)

chunk_read_chain = chunk_read_prompt | model.with_structured_output(ChunkOutput)

In [112]:
neighbor_select_system_prompt = """
As an intelligent assistant, your primary objective is to answer questions based on information
within a text. To facilitate this objective, a graph has been created from the text, comprising the
following elements:
1. Text Chunks: Segments of the original text.
2. Atomic Facts: Smallest, indivisible truths extracted from text chunks.
3. Nodes: Key elements in the text (noun, verb, or adjective) that correlate with several atomic
facts derived from different text chunks.
Your current task is to assess all neighboring nodes of the current node, with the objective of determining whether to proceed to the next neighboring node. Given the question, rational
plan, previous actions, notebook content, and the neighbors of the current node, you have the
following Action Options:
#####
1. read_neighbor_node(key element of node): Choose this action if you believe that any of the
neighboring nodes may contain information relevant to the question. Note that you should focus
on one neighbor node at a time.
2. termination(): Choose this action if you believe that none of the neighboring nodes possess
information that could answer the question.
#####
Strategy:
#####
1. Reflect on previous actions and prevent redundant revisiting of nodes or chunks.
2. You can only choose one action. This means that you can choose to read only one neighbor
node or choose to terminate.
#####
Please strictly follow the above format. Let’s begin.
"""

class NeighborOutput(BaseModel):
    rational_next_action: str = Field(description="""Based on the given question, rational plan, previous actions, and
notebook content, analyze how to choose the next action.""")
    chosen_action: str = Field(description="""You have the following Action Options:
1. read_neighbor_node(key element of node): Choose this action if you believe that any of the
neighboring nodes may contain information relevant to the question. Note that you should focus
on one neighbor node at a time.
2. termination(): Choose this action if you believe that none of the neighboring nodes possess
information that could answer the question.""")

neighbor_select_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            neighbor_select_system_prompt,
        ),
        (
            "human",
            (
                """Question: {question}
Plan: {rational_plan}
Previous actions: {previous_actions}
Notebook: {notebook}
Neighbor nodes: {nodes}"""
            ),
        ),
    ]
)

neighbor_select_chain = neighbor_select_prompt | model.with_structured_output(NeighborOutput)

In [113]:
def get_subsequent_chunk_id(chunk):
    data = graph.query("""
    MATCH (c:Chunk)-[:NEXT]->(next)
    WHERE c.id = $id
    RETURN next.id AS next
    """)
    return data

def get_previous_chunk_id(chunk):
    data = graph.query("""
    MATCH (c:Chunk)<-[:NEXT]-(previous)
    WHERE c.id = $id
    RETURN previous.id AS previous
    """)
    return data

In [114]:
question = "Did Joan of Arc lose any battles?"

notebook = ""
previous_actions = []
chunks_check_queue = []

rational_plan = rational_chain.invoke({"input": question})
print(f"Rational plan is:{rational_plan}")
initial_nodes = initial_nodes_chain.invoke({"question":question, "rational_plan":rational_plan, "nodes":nodes})
# paper uses 5 initial nodes
atomic_facts_check_queue = [el.key_element for el in sorted(initial_nodes.initial_nodes, key=lambda node: node.score, reverse=True)][:5]
print(f"Initial node selection:{atomic_facts_check_queue}")
while True:
    if atomic_facts_check_queue:
        # Atomic facts are processed in a single LLM call
        atomic_facts = get_atomic_facts(atomic_facts_check_queue)
        print(f"Reading atomic facts about: {atomic_facts_check_queue}")
        atomic_facts_results = atomic_fact_chain.invoke({"question":question, "rational_plan": rational_plan, 
                                    "notebook": notebook, "atomic_facts": atomic_facts,
                                   "previous_actions": previous_actions})
        # Reset atomic facts queue
        atomic_facts_check_queue = []
        notebook = atomic_facts_results.updated_notebook
        print(f"Rational for next action after atomic check: {atomic_facts_results.rational_next_action}")
        chosen_action = parse_function(atomic_facts_results.chosen_action)
        print(f"Function signature: {chosen_action}")
        if chosen_action.get("function_name") == "stop_and_read_neighbor":
            neighbors = get_neighbors([el.key_element for el in atomic_facts_queue])
            # Pass neighbors to chain
            read_neighbor_results = neighbor_select_chain.invoke({
                {"question":question, "rational_plan": rational_plan, 
                                                "notebook": notebook, "nodes": nodes,
                                               "previous_actions": previous_actions}
            })
            # Reset atomic facts queue
            print(f"Rational for next action after reading neighbors: {read_neighbor_results.rational_next_move}")
            chosen_action = parse_function(read_neighbor_results.chosen_action)
            print(f"Function signature: {chosen_action}")
            if chosen_action.get("function_name") == 'termination':
                break
            elif chosen_action.get("function_name") == 'read_neighbor_node':
                atomic_facts_check_queue = [chosen_action.get("arguments")[0]]
                continue
                
        elif chosen_action.get("function_name") == "read_chunk":
            chunks_check_queue = chosen_action.get("arguments")[0]
            continue
    if chunks_check_queue:
        # Get the first chunk
        chunk_id = chunks_check_queue.pop()
        print(f"Reading chunk id: {chunk_id}")
        chunks_text = get_chunks([chunk_id])
        read_chunk_results = chunk_read_chain.invoke({"question":question, "rational_plan": rational_plan, 
                                    "notebook": notebook, "chunk": chunks_text,
                                   "previous_actions": previous_actions})
        print(read_chunk_results)
        notebook = results.updated_notebook
        print(f"Rational for next action after reading chunks: {read_chunk_results.rational_next_move}")
        chosen_action = parse_function(read_chunk_results.chosen_action)
        print(f"Function signature: {chosen_action}")
        if chosen_action.get('function_name') == 'read_subsequent_chunk':
            subsequent_id = get_subsequent_chunk_id(chunk_id)
            chunks_check_queue.append(subsequent_id)
            continue
            
        elif chosen_action.get('function_name') == 'read_previous_chunk':
            previous_id = get_previous_chunk_id(chunk_id)
            chunks_check_queue.append(previous_id)
            continue
            
        elif chosen_action.get('function_name') == 'search_more':
            # Go over to next chunk
            if chunks_check_queue:
                continue
            else:
                # Get neighbors: Todo
                continue
        elif chosen_action.get('function_name') == 'termination':
            break


Rational plan is:In order to answer this question, we first need to identify the key battles that Joan of Arc participated in during her military career. Then, we need to determine the outcomes of these battles to see if there were any that she lost.
Initial node selection:['Joan of Arc', 'unsuccessful', 'siege of Orléans', 'Loire Campaign', 'Compiègne']
Reading atomic facts about: ['Joan of Arc', 'unsuccessful', 'siege of Orléans', 'Loire Campaign', 'Compiègne']
Rational for next action after atomic check: To answer the question about whether Joan of Arc lost any battles, the provided atomic facts indicate that she did experience defeats in some battles, such as the unsuccessful sieges of Paris and La Charité. Therefore, further exploration of these specific battles could provide more detailed information about the circumstances and outcomes of these defeats.
Function signature: {'function_name': 'read_chunk', 'arguments': [['3feaa3f3137a5f839bfc207bc5c2ffdb']]}
updated_notebook='Joan

In [115]:
print(notebook)

Joan of Arc participated in several battles during her military career. She played a significant role in the siege of Orléans, which was a victory for the French. She also encouraged the French to pursue the English during the Loire Campaign, leading to another victory at Patay. However, Joan of Arc also experienced defeats, such as the unsuccessful siege of Paris in September 1429 and the failed siege of La Charité in November. These defeats reduced the court's faith in her. Additionally, in early 1430, she organized a company of volunteers to relieve Compiègne, but she was captured by Burgundian troops in May 1430.


In [116]:
answer_reasoning_system_prompt = """
As an intelligent assistant, your primary objective is to answer questions based on information
within a text. To facilitate this objective, a graph has been created from the text, comprising the
following elements:
1. Text Chunks: Segments of the original text.
2. Atomic Facts: Smallest, indivisible truths extracted from text chunks.
3. Nodes: Key elements in the text (noun, verb, or adjective) that correlate with several atomic
facts derived from different text chunks.
You have now explored multiple paths from various starting nodes on this graph, recording key information for each path in a notebook.
Your task now is to analyze these memories and reason to answer the question.
Strategy:
#####
1. You should first analyze each notebook content before providing a final answer.
2. During the analysis, consider complementary information from other notes and employ a
majority voting strategy to resolve any inconsistencies.
3. When generating the final answer, ensure that you take into account all available information.
#####
Example:
#####
User:
Question: Who had a longer tennis career, Danny or Alice?
Notebook of different exploration paths:
1. We only know that Danny’s tennis career started in 1972 and ended in 1990, but we don’t know
the length of Alice’s career.
2. ......
Assistant:
Analyze:
The summary of search path 1 points out that Danny’s tennis career is 1990-1972=18 years.
Although it does not indicate the length of Alice’s career, the summary of search path 2 finds this
information, that is, the length of Alice’s tennis career is 15 years. Then we can get the final
answer, that is, Danny’s tennis career is longer than Alice’s.
Final answer:
Danny’s tennis career is longer than Alice’s.
#####
Please strictly follow the above format. Let’s begin
"""

answer_reasoning_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            answer_reasoning_system_prompt,
        ),
        (
            "human",
            (
                """Question: {question}
Notebook: {notebook}"""
            ),
        ),
    ]
)

class AnswerReasonOutput(BaseModel):
    analyze: str = Field(description="""You should first analyze each notebook content before providing a final answer.
    During the analysis, consider complementary information from other notes and employ a
majority voting strategy to resolve any inconsistencies.""")
    final_answer: str = Field(description="""When generating the final answer, ensure that you take into account all available information.""")

answer_reasoning_chain = answer_reasoning_prompt | model.with_structured_output(AnswerReasonOutput)

In [117]:
answer_reasoning_chain.invoke({"question": question, "notebook": notebook})

AnswerReasonOutput(analyze='The notebook indicates that Joan of Arc participated in multiple battles, achieving victories in some, like the siege of Orléans and the battle of Patay. However, she also faced defeats, specifically during the siege of Paris in September 1429 and the siege of La Charité in November of the same year. Additionally, her capture by Burgundian troops in May 1430 during an attempt to relieve Compiègne further underscores her experiences with losses in battle.', final_answer='Yes, Joan of Arc did lose some battles, including the siege of Paris and the siege of La Charité.')