In [11]:
%%capture --no-stderr
%pip install -U --quiet langchain-community langchain-chroma langchain-ollama tiktoken langchainhub chromadb langchain langgraph langchain-text-splitters beautifulsoup4

In [12]:
import requests

url = "https://www.gutenberg.org/files/1661/1661-0.txt"
response = requests.get(url)

file_path = "sherlock_holmes.txt"
with open(file_path, "w", encoding='utf-8') as f:
    f.write(response.text)

print(f"✅ Downloaded and saved to {file_path}")

✅ Downloaded and saved to sherlock_holmes.txt


In [13]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = TextLoader(file_path, encoding='utf-8')
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
split_docs = splitter.split_documents(docs)
print(f"✅ Split into {len(split_docs)} chunks")

✅ Split into 774 chunks


In [14]:
import os
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma

embedding = OllamaEmbeddings(model="nomic-embed-text")


persist_directory = "./chroma_sherlock"

if os.path.exists(persist_directory):
    vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding, collection_name="sherlock")
else:
    vectorstore = Chroma.from_documents(split_docs, embedding=embedding, collection_name="sherlock", persist_directory=persist_directory)
    vectorstore.persist()

retriever = vectorstore.as_retriever()


In [15]:
from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(
    retriever,
    name="retrieve_sherlock",
    description="Search for information in Sherlock Holmes stories."
)

tools = [retriever_tool]

In [16]:
from typing import Annotated, Sequence
from typing_extensions import TypedDict
from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages

class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]

In [17]:
from langchain import hub
from langchain_core.messages import HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_ollama import OllamaLLM, ChatOllama
from pydantic import BaseModel, Field


def agent(state):
    print("---CALL AGENT---")
    model = ChatOllama(model="llama3", temperature=0, streaming=True)
    # model = model.bind_tools(tools)
    return {"messages": [model.invoke(state["messages"])]}


def rewrite(state):
    print("---REWRITE---")
    question = state["messages"][0].content
    msg = [HumanMessage(content=f"""Improve the following user question:\n{question}""")]
    model = OllamaLLM(model="phi3", temperature=0, streaming=True)
    return {"messages": [model.invoke(msg)]}


def generate(state):
    print("---GENERATE---")
    question = state["messages"][0].content
    context = state["messages"][-1].content
    prompt = hub.pull("rlm/rag-prompt")
    chain = prompt | OllamaLLM(model="phi3", temperature=0) | StrOutputParser()
    return {"messages": [chain.invoke({"context": context, "question": question})]}

In [18]:
from typing import Literal

def grade_documents(state) -> Literal["generate", "rewrite"]:
    print("---GRADE DOCUMENTS---")
    class Grade(BaseModel):
        binary_score: str = Field(description="Relevance score 'yes' or 'no'")

    model = OllamaLLM(temperature=0, model="phi3")
    prompt = PromptTemplate(
        template="""
You are assessing if the following context is relevant to the user question.
Context:
{context}
Question:
{question}
Reply with 'yes' or 'no'.
""",
        input_variables=["context", "question"]
    )
    chain = prompt | model.with_structured_output(Grade)

    question = state["messages"][0].content
    context = state["messages"][-1].content
    result = chain.invoke({"context": context, "question": question})

    return "generate" if result.binary_score == "yes" else "rewrite"

In [19]:
from langgraph.graph import END, START, StateGraph
from langgraph.prebuilt import ToolNode, tools_condition

workflow = StateGraph(AgentState)
workflow.add_node("agent", agent)
workflow.add_node("retrieve", ToolNode([retriever_tool]))
workflow.add_node("rewrite", rewrite)
workflow.add_node("generate", generate)
workflow.set_entry_point("agent")

workflow.add_conditional_edges("agent", tools_condition, {"tools": "retrieve", END: END})
workflow.add_conditional_edges("retrieve", grade_documents)
workflow.add_edge("generate", END)
workflow.add_edge("rewrite", "agent")

graph = workflow.compile()

In [20]:
import pprint

inputs = {
    "messages": [
        HumanMessage(content="Who is Sherlock Holmes and what kind of detective is he?")
    ]
}

for output in graph.stream(inputs):
    for key, value in output.items():
        pprint.pprint(f"Output from node '{key}':")
        pprint.pprint(value)
    print("\n---\n")

---CALL AGENT---
"Output from node 'agent':"
{'messages': [AIMessage(content='Sherlock Holmes is a fictional character created by Sir Arthur Conan Doyle. He is a consulting detective, which means that he uses his extraordinary abilities of observation, deduction, and analytical thinking to solve complex cases for clients who seek his expertise.\n\nHolmes is often referred to as a "rationalist" or a "scientific detective," because he approaches crime-solving using the principles of science and reason. He relies on empirical evidence, observation, and logical reasoning to piece together the facts of a case, rather than relying on intuition or superstition.\n\nSome of Holmes\' notable characteristics include:\n\n1. **Exceptional powers of observation**: Holmes has an uncanny ability to notice tiny details that others might miss, which allows him to gather crucial evidence and piece together the puzzle of a crime.\n2. **Rapid-fire thinking**: Holmes is known for his incredible speed and ac