In [6]:
from src.infrastructure.loader import DocumentLoader
from langchain_huggingface import HuggingFaceEmbeddings
from src.app.ingestor import ingest_documents
from src.infrastructure.json_cache import JsonCache
from src.infrastructure.splitter import RecursiveTextSplitterWithHash
from langchain_chroma import Chroma
import pathlib 
from langchain_core.documents import Document
from langchain_ollama import OllamaLLM, ChatOllama

In [7]:
embedder = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [8]:
# loader = DocumentLoader(paths=(pathlib.Path('./docs'),), glob=["*.txt", "*.md"])
# splitter = RecursiveTextSplitterWithHash( enable_hash=True, chunk_size=500, chunk_overlap=100,add_start_index=True)
vector_store=Chroma(collection_name="research_docs", persist_directory='./data/chroma', embedding_function=embedder)
# cache=JsonCache()

In [9]:
# ingest_documents(
#     loader=loader,
#     splitter=splitter,
#     vector_store=vector_store,
#     cache=cache
# )

In [24]:
llm = ChatOllama(
    model='mistral:7b',
    num_gpu=1,
    keep_alive=300,
    temperature=0.1,
    base_url='http://localhost:11434',
)

In [25]:
from pydantic import BaseModel

class State(BaseModel):
    question: str
    answer: str | None = None
    context: list[Document] | None = None

In [None]:
# def retrieve_documents(state: State)->State:
#     retriever = vector_store.as_retriever(
#         search_type="similarity",
#         search_kwargs={"k": 4}
#     )
#     context = retriever.invoke(state.question)
#     state.context = context
#     return state

In [None]:
# def generate_answer(state: State)->State:
#     from langchain_core.prompts import PromptTemplate

#     template = """Use the following pieces of context to answer the question at the end.
#     If you don't know the answer, just say that you don't know, don't try to make up an answer.
#     Use three sentences maximum and keep the answer as concise as possible.

#     Context from documents: 
#     {context}

#     Question: {question}

#     Helpful Answer:"""
    
#     doc_content = "\n".join([doc.page_content for doc in state.context])

#     prompt = PromptTemplate.from_template(template)  
#     messages = prompt.invoke({'question': state.question, 'context': doc_content})
#     answer = llm.invoke(messages)
#     state.answer = answer
#     return state

In [1]:
from langgraph.graph import START, StateGraph, END

In [None]:
# workflow = StateGraph(State).add_sequence([retrieve_documents, generate_answer])
# workflow.add_edge(START, "retrieve_documents")
# graph = workflow.compile()

In [None]:
# initial_message = State(
#     question='What is Artificial Answer?'
# )

In [None]:
# answer = graph.invoke(initial_message)

In [2]:
from langgraph.graph import MessagesState
from langchain_core.messages.system import SystemMessage
from langchain_core.tools import tool

@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query."""

    retriever = vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 4}
    )
    retrieved_docs = retriever.invoke(query)
    print(retrieved_docs)
    content = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return content, retrieved_docs

def query_or_respond(state: MessagesState):
    """Generate tool call for retrieval or respond."""
    llm_with_tools = llm.bind_tools([retrieve])
    response = llm_with_tools.invoke(state["messages"])
    # MessagesState appends messages to state instead of overwriting
    return {"messages": [response]}


def generate(state: MessagesState):
    
    recent_tool_messages = []
    for message in reversed(state["messages"]):
        if message.type == "tool":
            recent_tool_messages.append(message)
        else:
            print(message)
            break
    
    tool_messages = recent_tool_messages[::-1]

    docs_content = "\n\n".join(doc.content for doc in tool_messages)

    system_message_content =(
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        f"{docs_content}"
    )
    conversation_messages = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and not message.tool_calls)
    ]

    prompt = [SystemMessage(system_message_content)] + conversation_messages

    answer = llm.invoke(prompt)
    return {'messages': [answer]}


In [3]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import ToolNode, tools_condition

tools = ToolNode([retrieve])

memory = MemorySaver()

workflow = StateGraph(MessagesState)
workflow.add_node(query_or_respond)
workflow.add_node(tools)
workflow.add_node(generate)

workflow.set_entry_point("query_or_respond")
workflow.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END: END, "tools": "tools"}
)
workflow.add_edge("tools", "generate")
workflow.add_edge("generate", END)


graph = workflow.compile(memory)

In [29]:

from langchain_core.messages import HumanMessage
# input_message = [HumanMessage("What is AI?")]
# response = graph.invoke({"messages":input_message}, config)

In [None]:
config = {"configurable": {"thread_id": "0"}}
for message in graph.stream({"messages": [HumanMessage("What is Artificial Intelligence?")]}, config, stream_mode='messages'):
    print(message)


KeyboardInterrupt: 