In [1]:
from dotenv import load_dotenv
load_dotenv()


True

In [2]:
from operator import itemgetter

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores.faiss import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import (
    RunnableLambda,
    RunnablePassthrough,
    RunnableBranch,
)

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
def get_documents_from_web(url: str):
    loader = WebBaseLoader(url)
    docs = loader.load()
    print("Loaded docs:", len(docs))
    print("First chunk preview:\n", docs[0].page_content[:500])

    splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=20)
    return splitter.split_documents(docs)


In [4]:
def create_db(docs):
    embedding = OpenAIEmbeddings()
    return FAISS.from_documents(docs, embedding=embedding)

In [5]:
def _format_docs(docs) -> str:
    # "Stuff" documents into a single context string
    return "\n\n".join(
        f"[Source {i+1}]\n{d.page_content}"
        for i, d in enumerate(docs)
    )

In [6]:
# ---------------------------
# Build chain (LangChain-only LCEL)
# ---------------------------

def create_chain(vectorstore):
    llm = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.4)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

    # 1) Prompt to generate a better search query when we have chat history
    query_prompt = ChatPromptTemplate.from_messages([
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
        ("human", "Create a concise search query to retrieve relevant context for the user's last question."),
    ])

    query_chain = query_prompt | llm | StrOutputParser()

    # 2) History-aware retriever (no langchain-classic)
    # If chat_history is empty -> use input directly
    # Else -> use LLM to rewrite/generate a search query, then retrieve with it
    history_aware_retriever = RunnableBranch(
        # If chat history exists → generate query → retrieve
        (
            lambda x: bool(x.get("chat_history")),
            query_chain | retriever
        ),
        # Else → retrieve using the raw user input
        itemgetter("input") | retriever
    )

    # 3) Answer prompt uses {context} + chat_history + input
    answer_prompt = ChatPromptTemplate.from_messages([
        ("system", "Answer the user's questions based only on the context below.\n\n{context}"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ])

    # 4) Combine: retrieve docs -> format to context -> answer
    chain = (
        RunnablePassthrough.assign(docs=history_aware_retriever)
        .assign(context=RunnableLambda(lambda x: _format_docs(x["docs"])))
        | answer_prompt
        | llm
        | StrOutputParser()
    )

    return chain

In [7]:
def process_chat(chain, question, chat_history):
    return chain.invoke({"input": question, "chat_history": chat_history})


In [10]:
# ---------------------------
# Main
# ---------------------------

if __name__ == "__main__":
    docs = get_documents_from_web("https://python.langchain.com/docs/expression_language/")
    vectorstore = create_db(docs)
    chain = create_chain(vectorstore)

    chat_history = []

    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            break

        response = process_chat(chain, user_input, chat_history)

        chat_history.append(HumanMessage(content=user_input))
        chat_history.append(AIMessage(content=response))

        print("You:", user_input)
        print("Assistant:", response)


Loaded docs: 1
First chunk preview:
 LangChain overview - Docs by LangChainSkip to main contentDocs by LangChain home pageLangChain + LangGraphSearch...⌘KSupportGitHubTry LangSmithTry LangSmithSearch...NavigationLangChain overviewLangChainLangGraphDeep AgentsIntegrationsLearnReferenceContributePythonOverviewGet startedInstallQuickstartChangelogPhilosophyCore componentsAgentsModelsMessagesToolsShort-term memoryStreamingStructured outputMiddlewareOverviewBuilt-in middlewareCustom middlewareAdvanced usageGuardrailsRuntimeContext engin
You: hi, I am Premasis
Assistant: Hello, Premasis! How can I assist you today?
You: What is the Capital of France?
Assistant: Based on the context provided, I don't have the specific information about the capital of France. If you have any questions related to LangChain, agent architecture, or model integrations, feel free to ask!
You: What is my name?
Assistant: Based on the context provided, your name is Premasis. If you have any questions related to LangC