In [None]:
import os
os._exit(00)

In [None]:

import os
from dotenv import load_dotenv 
from langchain_aws import ChatBedrock

load_dotenv() 

llm = ChatBedrock(
    provider="meta",
    model_id="arn:aws:bedrock:us-west-2:961902606948:inference-profile/us.meta.llama3-2-3b-instruct-v1:0",
    model_kwargs=dict(temperature=0.5),
    region_name=os.getenv('AWS_REGION'),
    aws_access_key_id=os.getenv('AWS_KEY_ID'),
    aws_secret_access_key=os.getenv('AWS_SECRET_KEY'),
    temperature=0,

)

In [129]:
from typing import Tuple, List, Optional
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from pydantic import BaseModel, Field
from langchain_ollama import ChatOllama

class Entities(BaseModel):
    """Identifying information about entities."""
    names: List[str] = Field(
        description="All the locations that appear in the text", default_factory=list)
    
prompt = ChatPromptTemplate.from_messages([(
    "system",
    "You are extracting all the location names from the text.",
),
(
    "human",
    """Use the given format to list the location names from the following input: 
     
     {question}
    """,
),
])

# ChatBedrock doesn't support with_structured_output ??
entity_chain = prompt | ChatOllama(
    model='llama3.2',
    temperature=0,
).with_structured_output(Entities)

print((prompt | llm).invoke({"question": 'What is the school zone of GRACE KING HIGH SCHOOL?'}))

print(entity_chain.invoke({"question": 'What is the school zone of GRACE KING HIGH SCHOOL?'}))


content='Here is the location name extracted from the input:\n\n* GRACE KING HIGH SCHOOL' additional_kwargs={'usage': {'prompt_tokens': 57, 'completion_tokens': 17, 'total_tokens': 74}, 'stop_reason': 'stop', 'model_id': 'arn:aws:bedrock:us-west-2:961902606948:inference-profile/us.meta.llama3-2-3b-instruct-v1:0'} response_metadata={'usage': {'prompt_tokens': 57, 'completion_tokens': 17, 'total_tokens': 74}, 'stop_reason': 'stop', 'model_id': 'arn:aws:bedrock:us-west-2:961902606948:inference-profile/us.meta.llama3-2-3b-instruct-v1:0'} id='run-743756bc-edf5-4c2d-bf77-7ccf61a305bf-0' usage_metadata={'input_tokens': 57, 'output_tokens': 17, 'total_tokens': 74}
names=['GRACE KING HIGH SCHOOL']


In [140]:
import importlib
from langchain_core.documents import Document

import classes
from classes import basic, formatted
from classes import lambda_basic

classes = importlib.reload(classes)

def structured_retriever(question: str) -> List[Document]:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    entity = entity_chain.invoke({"question": question})

    # return formatted(entity.names)        
    # return basic(entity.names)
    return lambda_basic(entity.names)

print(structured_retriever("What are the flood areas of Joshua Butler Elementary School, River Oaks Hospital, Westbank Community School, Paul J. Solis Elementary School, and Stella Worley Middle School?"))

[Document(metadata={}, page_content='School JOSHUA BUTLER ELEMENTARY SCHOOL - IN_FLOOD_AREA -> New Orleans West Bank\nSchool JOSHUA BUTLER ELEMENTARY SCHOOL - HAS_SCHOOL_ZONE -> Jefferson Parish School DistrictSchool RIVER OAKS HOSPITAL - HAS_SCHOOL_ZONE -> Jefferson Parish School District\nSchool RIVER OAKS HOSPITAL - IN_FLOOD_AREA -> New Orleans East BankSchool WESTBANK COMMUNITY SCHOOL - IN_FLOOD_AREA -> New Orleans West Bank\nSchool WESTBANK COMMUNITY SCHOOL - HAS_SCHOOL_ZONE -> Jefferson Parish School DistrictSchool STELLA WORLEY MIDDLE SCHOOL - HAS_SCHOOL_ZONE -> Jefferson Parish School District\nSchool STELLA WORLEY MIDDLE SCHOOL - IN_FLOOD_AREA -> New Orleans West Bank')]


In [131]:
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from langchain_core.messages import AIMessage, HumanMessage

# Condense a chat history and follow-up question into a standalone question
_template = """
Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}

Follow Up Input: {question}

Standalone question:"""  # noqa: E501

CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | llm
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

In [132]:
def retriever(question: str):
    structured_data = structured_retriever(question)
    # unstructured_data = [el.page_content for el in existing_graph.similarity_search(question)]
    unstructured_data = []    
    final_data = f"""Structured data:
{structured_data}
Unstructured data:
{"#Document ". join(unstructured_data)}
    """
    return final_data

In [133]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)


In [134]:
questions = [
    "What is the school zone of GRACE KING HIGH SCHOOL?",
    "What is the flood area of GRACE KING HIGH SCHOOL?",
    "What school zones have schools in the New Orleans East Bank flood area?"
]

for question in questions:
    response = chain.invoke({'question': question, "chat_history": [] })
    print(f"Question: {question}")
    print(f"Answer: {response}")
    print()    


Question: What is the school zone of GRACE KING HIGH SCHOOL?
Answer: The school zone of GRACE KING HIGH SCHOOL is Jefferson Parish School District.

Question: What is the flood area of GRACE KING HIGH SCHOOL?
Answer: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

Question: What school zones have schools in the New Orleans East Bank flood area?
Answer: The following schools are located in the New Orleans East Bank flood area:

* School RIVERDALE MIDDLE SCHOOL
* School RIVER OAKS HOSPITAL
* School LANGSTON HUGHES CHARTER ACADEMY


In [135]:
questions = [
    "List five schools in the school zone Jefferson Parish School District?",
    "What are the flood areas of those schools?"
]

chat_history = []

for question in questions:
    response = chain.invoke({'question': question, "chat_history": chat_history })
    print(f"Question: {question}")
    print(f"Answer: {response}")
    print()
    
    chat_history.append((question, response))

Question: List five schools in the school zone Jefferson Parish School District?
Answer: Here are five schools in the Jefferson Parish School District:

1. Joshua Butler Elementary School
2. River Oaks Hospital
3. Westbank Community School
4. Paul J. Solis Elementary School
5. Harry S. Truman Middle School

Question: What are the flood areas of those schools?
Answer: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!



In [136]:
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from pydantic import BaseModel, Field
from typing import List
from langchain_core.tools import tool


class Neo4jRetriever(BaseRetriever):

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        """Sync implementations for retriever."""
        return structured_retriever(query)        

class LocationInformation(BaseModel):
    name: str = Field(description="Name of the location to get information about")


@tool("Intermediate Answer", args_schema=LocationInformation, return_direct=True)
def location_search(name: str) -> List[Document]:
    """Get locations with are associated with a given location."""
    return Neo4jRetriever().invoke(name)

In [137]:
# 
# Planner approach
# 

from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from typing import List
from typing_extensions import TypedDict


class ReWOO(TypedDict):
    task: str
    plan_string: str
    steps: List
    results: dict
    result: str
    
prompt = """For the following task, make plans that can solve the problem step by step. For each plan, indicate \
which external tool together with tool input to retrieve evidence. You can store the evidence into a \
variable #E that can be called by later tools. (Plan, #E1, Plan, #E2, Plan, ...)

Tools can be one of the following:
(1) SZA[input]: Worker that searches school zones from graph for associated school information. Useful when you need to find what
schools are within a school zone. The input should be a the name of a school zone or a school.
(2) FAA[input]: Worker that searches flood areas from graph for associated school information. Useful when you need to find what
schools are within a flood area. The input should be a the name of a flood area or a school.
(3) LLM[input]: A pretrained LLM like yourself. Useful when you need to act with general
world knowledge and common sense. Prioritize it when you are confident in solving the problem
yourself. Input can be any instruction.

For example,
Task: What flood areas are connected to the school zone Jefferson Parish School District?
Plan: Given school zones are associated with schools, find which schools are within the Jefferson Parish School District.
      #E1 = SZA[Jefferson Parish School District]
Plan: List the schools from the school names. 
      #E2 = LLM[List names given #E1]
Plan: Given flood areas are associated with schools, find which flood areas are associated with a school give #E2.
      #E3 = FAA[#E2]
Plan: List the flood area names. 
      #E4 = LLM[List flood area names given #E3]

Begin! 
Describe your plans with rich details. Each Plan should be followed by only one #E.

Task: {task}"""

task = "What flood areas are connected to the school zone Jefferson Parish School District?"

result = llm.invoke(prompt.format(task=task))

print(result.content)

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


In [138]:
import re 
from langchain_core.prompts import ChatPromptTemplate

# Regex to match expressions of the form E#... = ...[...]
regex_pattern = r"Plan:\s*(.+)\s*(#E\d+)\s*=\s*(\w+)\s*\[([^\]]+)\]"
prompt_template = ChatPromptTemplate.from_messages([("user", prompt)])
planner = prompt_template | llm


def get_plan(state: ReWOO):
    task = state["task"]
    result = planner.invoke({"task": task})
    # Find all matches in the sample text
    matches = re.findall(regex_pattern, result.content)
    matches = matches[:len(matches)]
    
    return {"steps": matches, "plan_string": result.content}

def _get_current_task(state: ReWOO):
    if "results" not in state or state["results"] is None:
        return 1
    if len(state["results"]) == len(state["steps"]):
        return None
    else:
        return len(state["results"]) + 1


def tool_execution(state: ReWOO):
    """Worker node that executes the tools of a given plan."""
    _step = _get_current_task(state)
    _, step_name, tool, tool_input = state["steps"][_step - 1]
    _results = (state["results"] or {}) if "results" in state else {}
    for k, v in _results.items():
        tool_input = tool_input.replace(k, v)
    if tool == "SZA":
        print('SZA Tool input: ' + str(tool_input))
        result = Neo4jRetriever().invoke(tool_input)
    elif tool == "FAA":
        tool_pattern = r"content='(.*)' additional_kwargs=.*"
        tool_matches = re.search(tool_pattern, tool_input)

        print('FAA Tool input: ' + tool_matches.group(1))        
        result = Neo4jRetriever().invoke(tool_matches.group(1))
    elif tool == "LLM":
        result = llm.invoke(tool_input)
    else:
        raise ValueError
    _results[step_name] = str(result)
    return {"results": _results}

solve_prompt = """Solve the following task or problem. To solve the problem, we have made step-by-step Plan and \
retrieved corresponding Evidence to each Plan. Use them with caution since long evidence might \
contain irrelevant information.

{plan}

Now solve the question or task according to provided Evidence above. Respond with the answer
directly with no extra words.

Task: {task}
Response:"""


def solve(state: ReWOO):
    plan = ""
    for _plan, step_name, tool, tool_input in state["steps"]:
        _results = (state["results"] or {}) if "results" in state else {}
        for k, v in _results.items():
            tool_input = tool_input.replace(k, v)
            step_name = step_name.replace(k, v)
        plan += f"Plan: {_plan}\n{step_name} = {tool}[{tool_input}]"
    prompt = solve_prompt.format(plan=plan, task=state["task"])
    result = llm.invoke(prompt)
    return {"result": result.content}

def _route(state):
    _step = _get_current_task(state)
    if _step is None:
        # We have executed all tasks
        return "solve"
    else:
        # We are still executing tasks, loop back to the "tool" node
        return "tool"

from langgraph.graph import END, StateGraph, START

graph = StateGraph(ReWOO)
graph.add_node("plan", get_plan)
graph.add_node("tool", tool_execution)
graph.add_node("solve", solve)
graph.add_edge("plan", "tool")
graph.add_edge("solve", END)
graph.add_conditional_edges("tool", _route)
graph.add_edge(START, "plan")

app = graph.compile()

for s in app.stream({"task": task}):
    print(s)
    print("---")


{'plan': {'plan_string': 'Here are the plans to solve the problem step by step:\n\nPlan: Find schools within the Jefferson Parish School District.\n      #E1 = SZA[Jefferson Parish School District]\n\nThis plan uses the SZA tool to search for school zones associated with the Jefferson Parish School District. The output will be a list of school zones.\n\nPlan: List the school names from the school zones.\n      #E2 = LLM[List names given #E1]\n\nThis plan uses the LLM tool to extract the school names from the list of school zones retrieved in the previous step. The output will be a list of school names.\n\nPlan: Find flood areas associated with the schools.\n      #E3 = FAA[#E2]\n\nThis plan uses the FAA tool to search for flood areas associated with the schools listed in the previous step. The output will be a list of flood areas.\n\nPlan: List the flood area names.\n      #E4 = LLM[List flood area names given #E3]\n\nThis plan uses the LLM tool to extract the flood area names from the

In [139]:
# 
# ReAct agent approach
# 

from langchain import hub
from langchain.agents import AgentExecutor, create_react_agent
from langchain.tools.retriever import create_retriever_tool
from langchain_core.tools import tool
from langchain.agents import AgentExecutor

neo_retriever = Neo4jRetriever()

tool_search = create_retriever_tool(
    retriever=neo_retriever,
    name="loc_a_tool",
    description="Searches and returns locations associated with the input.",
)

prompt = hub.pull("hwchase17/react")
agent = create_react_agent(llm, [tool_search], prompt)

agent_executor = AgentExecutor(
    agent=agent, 
    tools=[tool_search],
    verbose=True,
    handle_parsing_errors=False,
    streaming=True,
    max_iterations = 5 # useful when agent is stuck in a loop
)

print(prompt)
print(agent_executor.invoke({"input": "What flood areas are connected to schools in the school zone Jefferson Parish School District?"}))




input_variables=['agent_scratchpad', 'input', 'tool_names', 'tools'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'react', 'lc_hub_commit_hash': 'd15fe3c426f1c4b3f37c9198853e4a86e20c425ca7f4752ec0c9b0e97ca7ea4d'} template='Answer the following questions as best you can. You have access to the following tools:\n\n{tools}\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: {input}\nThought:{agent_scratchpad}'


[1m> Entering new AgentExecutor chain...[0m


ValueError: Stop sequence key name for meta is not supported.