In [None]:
from typing import Tuple, List, Optional

In [None]:
from langchain_ollama import ChatOllama

MODEL = 'llama3.2'
#MODEL = 'firefunction-v2'
#MODEL = 'mistral-nemo'


llm = ChatOllama(
    model=MODEL,
    temperature=0,
)


In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from pydantic import BaseModel, Field

class Entities(BaseModel):
    """Identifying information about entities."""
    names: List[str] = Field(
        description="All the entities that appear in the text", default_factory=list)
    
prompt = ChatPromptTemplate.from_messages([(
    "system",
    "You are extracting all names from the text.",
),
(
    "human",
    "Use the given format to extract information from the following input: {question}",
),
])

entity_chain = prompt | llm.with_structured_output(Entities)
# entity_chain.invoke({"question": 'What is the school zone of GRACE KING HIGH SCHOOL?'})
# entity_chain.invoke({"question": 'What are the flood areas of Joshua Butler Elementary School, River Oaks Hospital, Westbank Community School, Paul J. Solis Elementary School, and Stella Worley Middle School?'})

In [None]:
import importlib
from langchain_core.documents import Document

import classes
from classes import basic, formatted
from classes import lambda_basic

classes = importlib.reload(classes)

def structured_retriever(question: str) -> List[Document]:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    entity = entity_chain.invoke({"question": question})

    # return formatted(entity.names)        
    # return basic(entity.names)
    return lambda_basic(entity.names)

print(structured_retriever("What are the flood areas of Joshua Butler Elementary School, River Oaks Hospital, Westbank Community School, Paul J. Solis Elementary School, and Stella Worley Middle School?"))

In [None]:
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from langchain_core.messages import AIMessage, HumanMessage

# Condense a chat history and follow-up question into a standalone question
_template = """
Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}

Follow Up Input: {question}

Standalone question:"""  # noqa: E501

CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | llm
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

In [None]:
def retriever(question: str):
    structured_data = structured_retriever(question)
    # unstructured_data = [el.page_content for el in existing_graph.similarity_search(question)]
    unstructured_data = []    
    final_data = f"""Structured data:
{structured_data}
Unstructured data:
{"#Document ". join(unstructured_data)}
    """
    return final_data

In [None]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)


In [None]:
chain.invoke({"question": "What is the school zone of GRACE KING HIGH SCHOOL?"})

In [None]:
# 
# Basic question using a Neo4j rag chain
# 

questions = [
    "What is the school zone of GRACE KING HIGH SCHOOL?",
    "What is the flood area of GRACE KING HIGH SCHOOL?",
    "What school zones have schools in the New Orleans East Bank flood area?"
]

for question in questions:
    response = chain.invoke({'question': question, "chat_history": [] })
    print(f"Question: {question}")
    print(f"Answer: {response}")
    print()    


In [None]:
# 
# Chat history approach to get information in multiple steps
# 

questions = [
    "List five schools in the school zone Jefferson Parish School District?",
    "What are the flood areas of those schools?"
]

chat_history = []

for question in questions:
    response = chain.invoke({'question': question, "chat_history": chat_history })
    print(f"Question: {question}")
    print(f"Answer: {response}")
    print()
    
    chat_history.append((question, response));
    
    

In [None]:

from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever


class Neo4jRetriever(BaseRetriever):

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        """Sync implementations for retriever."""
        return structured_retriever(query)        


In [None]:
# 
# Neo4j structured query tool definition
# 

from pydantic import BaseModel, Field
from typing import Annotated, List
from langchain_core.tools import tool

class LocationInformation(BaseModel):
    name: str = Field(description="Name of the location to get information about")


@tool("Intermediate Answer", args_schema=LocationInformation, return_direct=True)
def location_search(name: str) -> List[Document]:
    """Get locations with are associated with a given location."""
    return Neo4jRetriever().invoke(name)


# Let's inspect some of the attributes associated with the tool.
print(location_search.name)
print(location_search.description)
print(location_search.args)
print(location_search.return_direct)

In [None]:
location_search.invoke({'name': 'Jefferson Parish School District'})

In [None]:
# 
# Planner approach
# 

from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from typing import List
from typing_extensions import TypedDict


class ReWOO(TypedDict):
    task: str
    plan_string: str
    steps: List
    results: dict
    result: str
    
prompt = """For the following task, make plans that can solve the problem step by step. For each plan, indicate \
which external tool together with tool input to retrieve evidence. You can store the evidence into a \
variable #E that can be called by later tools. (Plan, #E1, Plan, #E2, Plan, ...)

Tools can be one of the following:
(1) SZA[input]: Worker that searches school zones from graph for associated school information. Useful when you need to find what
schools are within a school zone. The input should be a the name of a school zone or a school.
(2) FAA[input]: Worker that searches flood areas from graph for associated school information. Useful when you need to find what
schools are within a flood area. The input should be a the name of a flood area or a school.
(3) LLM[input]: A pretrained LLM like yourself. Useful when you need to act with general
world knowledge and common sense. Prioritize it when you are confident in solving the problem
yourself. Input can be any instruction.

For example,
Task: What flood areas are connected to the school zone Jefferson Parish School District?
Plan: Given school zones are associated with schools, find which schools are within the Jefferson Parish School District.
      #E1 = SZA[Jefferson Parish School District]
Plan: List the schools from the school names. 
      #E2 = LLM[List names given #E1]
Plan: Given flood areas are associated with schools, find which flood areas are associated with a school give #E2.
      #E3 = FAA[#E2]
Plan: List the flood area names. 
      #E4 = LLM[List flood area names given #E3]

Begin! 
Describe your plans with rich details. Each Plan should be followed by only one #E.

Task: {task}"""

task = "What flood areas are connected to the school zone Jefferson Parish School District?"

result = llm.invoke(prompt.format(task=task))

print(result.content)




In [None]:
import re 
from langchain_core.prompts import ChatPromptTemplate

# Regex to match expressions of the form E#... = ...[...]
regex_pattern = r"Plan:\s*(.+)\s*(#E\d+)\s*=\s*(\w+)\s*\[([^\]]+)\]"
prompt_template = ChatPromptTemplate.from_messages([("user", prompt)])
planner = prompt_template | llm


def get_plan(state: ReWOO):
    task = state["task"]
    result = planner.invoke({"task": task})
    # Find all matches in the sample text
    matches = re.findall(regex_pattern, result.content)
    matches = matches[:len(matches)//2]
    
    return {"steps": matches, "plan_string": result.content}

def _get_current_task(state: ReWOO):
    if "results" not in state or state["results"] is None:
        return 1
    if len(state["results"]) == len(state["steps"]):
        return None
    else:
        return len(state["results"]) + 1


def tool_execution(state: ReWOO):
    """Worker node that executes the tools of a given plan."""
    _step = _get_current_task(state)
    _, step_name, tool, tool_input = state["steps"][_step - 1]
    _results = (state["results"] or {}) if "results" in state else {}
    for k, v in _results.items():
        tool_input = tool_input.replace(k, v)
    if tool == "SZA":
        print('SZA Tool input: ' + str(tool_input))
        result = Neo4jRetriever().invoke(tool_input)
    elif tool == "FAA":
        tool_pattern = r"content='(.*)' additional_kwargs=.*"
        tool_matches = re.search(tool_pattern, tool_input)

        print('FAA Tool input: ' + tool_matches.group(1))        
        result = Neo4jRetriever().invoke(tool_matches.group(1))
    elif tool == "LLM":
        result = llm.invoke(tool_input)
    else:
        raise ValueError
    _results[step_name] = str(result)
    return {"results": _results}

solve_prompt = """Solve the following task or problem. To solve the problem, we have made step-by-step Plan and \
retrieved corresponding Evidence to each Plan. Use them with caution since long evidence might \
contain irrelevant information.

{plan}

Now solve the question or task according to provided Evidence above. Respond with the answer
directly with no extra words.

Task: {task}
Response:"""


def solve(state: ReWOO):
    plan = ""
    for _plan, step_name, tool, tool_input in state["steps"]:
        _results = (state["results"] or {}) if "results" in state else {}
        for k, v in _results.items():
            tool_input = tool_input.replace(k, v)
            step_name = step_name.replace(k, v)
        plan += f"Plan: {_plan}\n{step_name} = {tool}[{tool_input}]"
    prompt = solve_prompt.format(plan=plan, task=state["task"])
    result = llm.invoke(prompt)
    return {"result": result.content}

def _route(state):
    _step = _get_current_task(state)
    if _step is None:
        # We have executed all tasks
        return "solve"
    else:
        # We are still executing tasks, loop back to the "tool" node
        return "tool"

from langgraph.graph import END, StateGraph, START

graph = StateGraph(ReWOO)
graph.add_node("plan", get_plan)
graph.add_node("tool", tool_execution)
graph.add_node("solve", solve)
graph.add_edge("plan", "tool")
graph.add_edge("solve", END)
graph.add_conditional_edges("tool", _route)
graph.add_edge(START, "plan")

app = graph.compile()

for s in app.stream({"task": task}):
    print(s)
    print("---")



In [None]:
# 
# ReAct loop approach
# 

from langchain import hub
from langchain.agents import AgentExecutor, create_react_agent, AgentOutputParser
from langchain.tools.retriever import create_retriever_tool
from pydantic import BaseModel, Field
from typing import Annotated, List
from langchain_core.tools import tool
from typing import List, Union
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema import AgentAction, AgentFinish, HumanMessage, SystemMessage, BaseMessage

from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver  # an in-memory checkpointer
import operator


# Defining state
class GraphState(TypedDict):
   input: str
   chat_history: list[BaseMessage]
   intermediate_steps: Annotated[list[tuple[AgentAction, str]], operator.add]
   
   
class LocationSearch(BaseModel):
    name: str = Field(description="Name of the location to get information about")

@tool("LOC_SEARCH", args_schema=LocationSearch, return_direct=True)
def search(name: str) -> str:
    """Get locations with are associated with a given location."""
    if name.index('School District') != -1:
        return "The following schools are part of Jefferson Parish School District: JOSHUA BUTLER ELEMENTARY SCHOOL, RIVER OAKS HOSPITAL, WESTBANK COMMUNITY SCHOOL, PAUL J. SOLIS ELEMENTARY SCHOOL, WEST JEFFERSON HIGH SCHOOL, BONELLA A. ST. VILLE ELEMENTARY SCHOOL, STELLA WORLEY MIDDLE SCHOOL, CONGETTA TRIPPE JANET ELEMENTARY SCHOOL, JEFFERSON ELEMENTARY SCHOOL, HARRY S. TRUMAN MIDDLE SCHOOL, L.H. MARRERO MIDDLE SCHOOL, RIVERSIDE ALTERNATIVE HIGH SCHOOL, VIC A. PITRE ELEMENTARY SCHOOL, RUPPEL ACADEMIE FRANCAISE, J.C. ELLIS ELEMENTARY SCHOOL."
    
    # return (', '.join(map(lambda doc: doc.page_content, Neo4jRetriever().invoke(name) ))) 
    return "The following schools are in the flood area New Orleans West Bank: JOSHUA BUTLER ELEMENTARY SCHOOL, WESTBANK COMMUNITY SCHOOL, STELLA WORLEY MIDDLE SCHOOL. The following schools are in the flood area New Orleans East Bank: RIVER OAKS HOSPITAL"
   

@tool("final_answer")
def final_answer(
   locations: list[str]
):
   """Returns a natural language response to the user as list of the locations
   """
   if type(locations) is list:
       locations = "\n".join([f"- {r}" for r in locations])
   return ""

tools=[
   search,
   final_answer
]

tool_str_to_func = {
   "LOC_SEARCH": search,
   'final_answer': final_answer   
}

def create_scratchpad(intermediate_steps: list[AgentAction]):
   research_steps = []
   for i, action in enumerate(intermediate_steps):
       if action.log != "TBD":
           # this was the ToolExecution
           research_steps.append(
               f"Tool: {action.tool}, input: {action.tool_input}\n"
               f"Output: {action.log}"
           )
   return "\n---\n".join(research_steps)

system_prompt = """You are the oracle, the great AI decision maker.
Given the user's query you must decide what to do with it based on the
list of tools provided to you.


If you see that a tool has been used (in the scratchpad) with a particular
query, do NOT use that same tool with the same query again. Also, do NOT use
any tool more than twice (ie, if the tool appears in the scratchpad twice, do
not use it again).


You should aim to collect information from a diverse range of sources before
providing the answer to the user. Once you have collected plenty of information
to answer the user's question (stored in the scratchpad) use the final_answer
tool.


The data is structured such that flood areas are only associated with schools.  
School zones are only associated with schools. In order to get the flood area
of a school zone, you must find the schools assocaited with that school zone
and then find the flood areas associated with those schools.


Tools can be one of the following:
(1) LOC_SEARCH[input]: Worker that searches locations and returns a list of associated locations.
(2) final_answer[input]: Formats the final answer
"""

prompt = ChatPromptTemplate.from_messages([
   ("system", system_prompt),
   MessagesPlaceholder(variable_name="chat_history"),
   ("user", "{input}"),
   ("assistant", "scratchpad: {scratchpad}")
])

llm_with_tools = ChatOllama(model=MODEL, temperature=0).bind_tools(tools)

oracle = (
   {
       "input": lambda x: x["input"],
       "chat_history": lambda x: x["chat_history"],
       "scratchpad": lambda x: create_scratchpad(
           intermediate_steps=x["intermediate_steps"]
       ),
   }
   | prompt
   | llm_with_tools
)

counter = 0

def run_oracle(state: GraphState) -> SystemMessage:
   global counter
#    print(f"run oracle state: {state}")   
   print(state)
   
   # This is where the reasoning happens
   out =  oracle.invoke(state)
   
   print(out)
   
#    tool_name = out.tool_calls[0]["name"]
#    tool_args = out.tool_calls[0]["args"]
   
#    action_out = AgentAction(
#        tool=tool_name,
#        tool_input=tool_args,
#        log="TBD"
#    )

   action_out = AgentAction(
       tool="LOC_SEARCH",
       tool_input="Jefferson Parish School District" if counter == 0 else "JOSHUA BUTLER ELEMENTARY SCHOOL",
       log='TBD'
   )
   
   counter += 1
   
   return {
       "intermediate_steps": [action_out]
   }

def router(state: GraphState):
   # return the tool name to use
   if isinstance(state["intermediate_steps"], list):
       return state["intermediate_steps"][-1].tool
   else:
       # if we output bad format go to final answer
       print("Router invalid format")
       return "final_answer"

def run_tool(state: GraphState):
   # use this as helper function so we repeat less code
   tool_name = state["intermediate_steps"][-1].tool
   tool_args = state["intermediate_steps"][-1].tool_input
   
   print(f"Executing tool: {tool_name}.invoke(input={tool_args})")
      
   # run tool
   out = tool_str_to_func[tool_name].invoke(input=tool_args)
   action_out = AgentAction(
       tool=tool_name,
       tool_input=tool_args,
       log=str(out)
   )
   
   return {"intermediate_steps": [action_out]}
   

def print_stream(stream):
    for s in stream:
        print(s)
        # messages = s["chat_history"]        
        # message = messages[-1] if messages else ""        
        
        # if isinstance(message, tuple):
        #     print(message)
        # elif isinstance(message, BaseMessage):
        #     message.pretty_print()


memory = MemorySaver()


# initialize the graph with our AgentState
workflow = StateGraph(GraphState)

# add nodes
workflow.add_node("oracle", run_oracle)
workflow.add_node("LOC_SEARCH", run_tool)
workflow.add_node("final_answer", run_tool)

# specify the entry node
workflow.set_entry_point("oracle")


# add the conditional edges which use the router
workflow.add_conditional_edges(
   source="oracle",  # where in graph to start
   path=router,  # function to determine which node is called
)


# create edges from each tool back to the oracle
for tool_obj in tools:
   if tool_obj.name != "final_answer":
       workflow.add_edge(tool_obj.name, "oracle")

# if anything goes to final answer, it must then move to END
workflow.add_edge("final_answer", END)


# finally, we compile our graph
graph = workflow.compile()
config = {"configurable": {"thread_id": "test-thread"}}

inputs = {
    "input": "What flood areas are connected to schools in the school zone Jefferson Parish School District?",
    "chat_history": [],
    "intermediate_steps": [],
}

# out = llm_with_tools.invoke("What flood areas are connected to schools in the school zone Jefferson Parish School District?")
# print(out)
# print(out.tool_calls)

# out = (prompt | llm_with_tools).invoke({
#     "input": "What flood areas are connected to schools in the school zone Jefferson Parish School District?",
#     "chat_history": [],
#     "intermediate_steps": [],
#     "scratchpad": ""
# })
# print(out)
# print(out.tool_calls)


# out = oracle.invoke(inputs)
# print(out)
# out.tool_calls
            
# print_stream(graph.stream(inputs, config, stream_mode="values"))
graph.invoke(inputs, config)