In [1]:
from langchain_ollama import ChatOllama

# Initialize the Ollama model
llm = ChatOllama(model="llama3.2")


In [2]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Contextualization prompt - reformats user input into a standalone question
contextualize_prompt = ChatPromptTemplate.from_messages([
    ("system", "Convert the latest user question into a standalone question given the chat history. Only return the standalone question."),
    ("placeholder", "{chat_history}"),
    ("human", "{question}")
])

# Question-Answering (QA) prompt - answers a user question using retrieved context
qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer the user question given the following context:\n\n{context}."),
    ("human", "{question}")
])

# Combine contextualization prompt with LLM and parser
contextualize_question = contextualize_prompt | llm | StrOutputParser()


In [3]:
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

# Function that determines whether to contextualize the question
def contextualize_if_needed(input_: dict):
    if input_.get("chat_history"):
        return contextualize_question  # Returns a Runnable that will be executed
    else:
        return RunnablePassthrough() | (lambda x: x["question"])  # Passes question unchanged

contextualize_if_needed_runnable = RunnableLambda(contextualize_if_needed)


In [4]:
# Fake retriever that returns hardcoded data
def fake_retriever(input_: dict) -> str:
    return "Egypt's population in 2024 is about 111 million."

retriever_runnable = RunnableLambda(fake_retriever)


In [None]:
# Constructing the full pipeline
full_chain = (
    RunnablePassthrough()  # Starts with raw input
    .assign(question=contextualize_if_needed_runnable)  # Decides whether to rephrase question
    .assign(context=retriever_runnable)  # Retrieves relevant context
    | qa_prompt  # Feeds everything into the QA prompt
    | llm  # Queries the model
    | StrOutputParser()  # Parses output
)

# Execute the chain
response = full_chain.invoke({
    "question": "what about Egypt",
    "chat_history": [
        ("human", "what's the population of Indonesia"),
        ("ai", "about 276 million"),
    ]
})

print(response)


In [None]:
# Stream response chunks as they are generated
for chunk in contextualize_if_needed_runnable.stream({
    "question": "what about Egypt",
    "chat_history": [
        ("human", "what's the population of Indonesia"),
        ("ai", "about 276 million"),
    ]
}):
    print(chunk, end=" ")
