# **Building a Conversational Chatbot with Langchain**

# **Description:**
In this activity, let's walk through the process of using LangChain, an open-source framework that enables the development of applications with large language models (LLMs) like OpenAI’s GPT-3.5-turbo.


Setup & Environment
What this does:

Installs modern LangChain packages and friends

Pins sane minimum versions to avoid API drift

Includes Gradio for the web UI

Imports & Config
What this does:

Loads env vars from .env (or uses values you assign here)

Centralizes all tunables (model, chunking, vectorstore path)

Verifies your OpenAI project-scoped key is present

In [11]:
%pip install -q -U langchain>=0.2.10 langchain-community>=0.2.10 langchain-openai>=0.2.7 langchain-text-splitters>=0.2.2 chromadb>=0.5.5 pypdf>=4.2.0 python-dotenv>=1.0.1 gradio>=4.44.0


Note: you may need to restart the kernel to use updated packages.


In [None]:
# Cell 2

import os
from pathlib import Path
from dotenv import load_dotenv

# 1) Load environment variables from .env if present
load_dotenv(override=True)

# 2) Direct assignment for demo (OK for local/classroom use, NOT for public repos)
os.environ["OPENAI_API_KEY"] = ""
# If you want to be explicit about API base:
os.environ["OPENAI_API_BASE"] = "https://api.openai.com/v1"

# Normalize env and make base explicit for the new OpenAI SDK
os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY"].strip()
os.environ["OPENAI_BASE_URL"] = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")


# 3) Config — tweak as needed
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-3-small")

# Paths
DATA_DIR = Path(os.getenv("PDF_DIR", "data"))
PERSIST_DIR = Path(os.getenv("CHROMA_DIR", "chroma_index"))
PERSIST_DIR.mkdir(parents=True, exist_ok=True)
DATA_DIR.mkdir(parents=True, exist_ok=True)

# RAG chunking
CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", 1000))
CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", 150))
TOP_K = int(os.getenv("TOP_K", 4))

# Basic validation
api_key = os.getenv("OPENAI_API_KEY")
if not api_key or not api_key.strip():
    raise RuntimeError("OPENAI_API_KEY not set. Put it in a .env file or assign in the cell above.")



LangChain Primitives
What this does:

Uses modern import locations (post-LangChain 0.2)

Defines a clean, idempotent index builder

Persists Chroma so you don’t rebuild every run

In [13]:
# Cell 3

API_KEY = os.environ["OPENAI_API_KEY"]
BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")

from typing import List, Dict, Any
from pathlib import Path
import os, textwrap

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Pull env each time so the cell is self-contained
API_KEY = os.environ.get("OPENAI_API_KEY", "").strip()
BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-3-small")

def format_docs(docs: List[Any]) -> str:
    if not docs:
        return "NO CONTEXT AVAILABLE."
    return "\n\n".join(
        f"[{i+1}] {d.metadata.get('source','unknown')} — p.{d.metadata.get('page','?')}\n{d.page_content}"
        for i, d in enumerate(docs)
    )

# Embeddings with resilient fallback
def make_embeddings(prefer_openai: bool = True):
    if prefer_openai:
        try:
            eb = OpenAIEmbeddings(model=EMBEDDING_MODEL, api_key=API_KEY, base_url=BASE_URL)
            _ = eb.embed_documents(["ping"])
            print("Embeddings: OpenAI OK")
            return eb
        except Exception as e:
            print("Embeddings: OpenAI failed →", repr(e))
            print("Falling back to FakeEmbeddings (local, deterministic).")
    from langchain_community.embeddings import FakeEmbeddings
    return FakeEmbeddings(size=1536)

# Build/load vectorstore
def get_or_build_vectorstore(pdf_paths: List[Path], persist_dir: Path) -> Chroma:
    try:
        vs = Chroma(persist_directory=str(persist_dir), embedding_function=make_embeddings())
        if vs._collection.count() > 0:
            return vs
    except Exception:
        pass

    all_docs = []
    for pdf in pdf_paths:
        loader = PyPDFLoader(str(pdf))
        docs = loader.load()
        for d in docs:
            d.metadata["source"] = pdf.name
        all_docs.extend(docs)

    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True)
    splits = splitter.split_documents(all_docs)

    vs = Chroma.from_documents(documents=splits, embedding=make_embeddings(), persist_directory=str(persist_dir))
    vs.persist()
    return vs

# LLM with robust fallback to a local simulator
class SimulatedLLM:
    model_name = "simulated-llm"
    def invoke(self, prompt: str):
        # Very small, deterministic response that uses the injected context
        text = textwrap.dedent("""\
            Using the provided context, here is a concise answer:

            - I synthesized key points from the retrieved chunks.
            - If details are missing, the source material did not cover them.
            - See the cited sources below for provenance.

            """)
        class Obj: 
            def __init__(self, content): self.content = content
        return Obj(text)

def make_llm():
    try:
        llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0.2, api_key=API_KEY, base_url=BASE_URL, timeout=60, max_retries=2)
        _ = llm.invoke("ping")
        print(f"LLM: {OPENAI_MODEL} OK")
        return llm
    except Exception as e:
        print("LLM: OpenAI failed →", repr(e))
        print("Falling back to SimulatedLLM (local).")
        return SimulatedLLM()

llm = make_llm()



LLM: OpenAI failed → AuthenticationError("Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-*********************************************************************************************.etc. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}")
Falling back to SimulatedLLM (local).


In [14]:
# Cell 3A — Seed corpus utilities (RAG without PDFs)

from langchain_core.documents import Document

SEED_MINI_CORPUS = True  # set False when you switch to real PDFs

# (source, page, text)
SEED_TEXTS = [
    ("seed_rag.txt", 1, "Retrieval-Augmented Generation (RAG) improves factual accuracy by constraining the model to respond using retrieved context chunks."),
    ("seed_rag.txt", 2, "A typical RAG pipeline: ingest -> chunk -> embed -> index -> retrieve -> compose prompt with citations -> generate."),
    ("seed_langchain.txt", 1, "LangChain’s LCEL composes runnables like retrievers, prompts, and LLMs into a single graph you can invoke or stream."),
    ("seed_langchain.txt", 2, "Use ChatOpenAI for chat models and OpenAIEmbeddings for embedding; community vectorstores like Chroma handle local persistence."),
    ("seed_chroma.txt", 1, "Chroma is a lightweight vector store suitable for local demos. Persist the index to disk to avoid rebuilding every run."),
    ("seed_eval.txt", 1, "Quality levers in RAG include chunk_size, chunk_overlap, retriever k, and re-ranking. Start with k=4 and chunk_size≈1000."),
]

def build_seed_vectorstore(persist_dir: Path) -> Chroma:
    docs = [Document(page_content=t, metadata={"source": src, "page": pg}) for (src, pg, t) in SEED_TEXTS]
    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True)
    splits = splitter.split_documents(docs)
    vs = Chroma.from_documents(
        documents=splits,
        embedding=make_embeddings(),
        persist_directory=str(persist_dir),
    )
    vs.persist()
    return vs




oint at Your PDFs
What this does:

Lists PDFs under data/ (or your custom PDF_DIR)

Builds or loads the Chroma index

Creates a retriever

How to use:

Drop one or more PDFs into ./data/ before running.

Re-run this cell whenever you change the corpus. Delete ./chroma_index/ to fully rebuild.

In [26]:
# Cell 4

# Discover PDFs (recursive, case-insensitive)
DATA_DIR = DATA_DIR.expanduser().resolve()
patterns = ("*.pdf", "*.PDF")
pdfs = []
for pat in patterns:
    pdfs.extend(DATA_DIR.rglob(pat))

try:
    if pdfs:
        print("Indexing PDFs:")
        for p in sorted(pdfs):
            print(" -", p)
        vectorstore = get_or_build_vectorstore(pdfs, PERSIST_DIR)
        retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})
        mode = f"PDFs ({len(pdfs)} files)"
    elif SEED_MINI_CORPUS:
        print("[Seed] No PDFs found. Building tiny seed corpus to demonstrate RAG…")
        vectorstore = build_seed_vectorstore(PERSIST_DIR)
        retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})
        mode = f"Seed corpus ({len(SEED_TEXTS)} snippets)"
    else:
        print("[Bypass] No PDFs and seeding disabled. Running with NO CONTEXT.")
        vectorstore = None
        retriever = lambda q: []
        mode = "No context"

    print("Vectorstore:", "ready @ " + str(PERSIST_DIR.resolve()) if vectorstore else "disabled")
    print("Mode:", mode)

except Exception as e:
    # Final safety net: in-memory FAISS so the demo always runs
    print("Index build failed:", repr(e))
    print("Falling back to in-memory FAISS.")
    from langchain_community.vectorstores import FAISS
    from langchain_text_splitters import RecursiveCharacterTextSplitter
    from langchain_core.documents import Document

    docs = [Document(page_content=t, metadata={"source": src, "page": pg}) for (src, pg, t) in SEED_TEXTS] if SEED_MINI_CORPUS else []
    splits = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, add_start_index=True).split_documents(docs) if docs else []
    embeddings = make_embeddings(prefer_openai=False)
    if splits:
        vectorstore = FAISS.from_documents(splits, embeddings)  # in-memory
        retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})
        mode = "Seed corpus (FAISS fallback)"
    else:
        vectorstore = None
        retriever = lambda q: []
        mode = "No context (FAISS fallback)"
    print("Vectorstore:", "in-memory FAISS" if vectorstore else "disabled")
    print("Mode:", mode)





[Seed] No PDFs found. Building tiny seed corpus to demonstrate RAG…
Embeddings: OpenAI failed → AuthenticationError("Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-*********************************************************************************************.etc. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}")
Falling back to FakeEmbeddings (local, deterministic).
Vectorstore: ready @ C:\Users\jim\OneDrive\Desktop\AMJ Group\Teaching\Class Materials\AGS_Advanced_Generative_AI_Building_LLM_Applications_ILT_Material\Guided_Practice\Lesson_04_LangChain_for_LLM_Application_Development_Part_2\chroma_index
Mode: Seed corpus (6 snippets)


RAG Prompt, Chain, and answer()
What this does:

Builds a grounded prompt that cites sources.

Wires retriever → prompt → llm → text.

answer(question) returns answer text, sources, and latency, and tags the current mode (Seed, PDFs, or No context).

In [27]:
# Cell 5

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_openai import ChatOpenAI

SYSTEM_PROMPT = """You are a precise technical assistant. Answer the user’s question using ONLY the provided context.
If the answer cannot be found in the context, say you don’t know.
Cite sources as [#] matching the provided context items.

Context:
{context}
"""

prompt = ChatPromptTemplate.from_messages([("system", SYSTEM_PROMPT), ("human", "{question}")])

# If llm is a real ChatOpenAI runnable, use it; otherwise use a local string-producing runnable.
if isinstance(llm, ChatOpenAI):
    model_runnable = llm
else:
    model_runnable = RunnableLambda(
        lambda _msgs: (
            "Using the provided context, here is a concise answer.\n\n"
            "- Key points synthesized from retrieved chunks.\n"
            "- If details are missing, the context did not cover them.\n"
            "- See Sources for provenance."
        )
    )

# Chain: retriever → formatter → prompt → model (real or simulated) → string
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model_runnable
    | StrOutputParser()
)

def answer(question: str) -> Dict[str, Any]:
    import time
    t0 = time.time()
    # Retrieval (graceful if bypass lambda)
    try:
        docs = retriever.get_relevant_documents(question)
    except AttributeError:
        docs = []

    sources = [
        {
            "source": d.metadata.get("source", "unknown"),
            "page": d.metadata.get("page"),
            "snippet": (d.page_content[:280] + "…") if len(d.page_content) > 280 else d.page_content,
        }
        for d in docs
    ]

    try:
        text = rag_chain.invoke(question)
    except Exception as e:
        text = f"Request failed while calling the chat model.\nError: {repr(e)}"
        return {"answer": text, "sources": sources, "latency_s": round(time.time() - t0, 3)}

    mode_tag = "Seed corpus" if docs and any(s["source"].startswith("seed_") for s in sources) else ("PDFs" if docs else "No context")
    text += f"\n\n---\nMode: {mode_tag}."
    return {"answer": text, "sources": sources, "latency_s": round(time.time() - t0, 3)}




In [28]:
print(answer("Give a 2-bullet summary of the RAG pipeline. Cite sources.")["answer"])


Using the provided context, here is a concise answer.

- Key points synthesized from retrieved chunks.
- If details are missing, the context did not cover them.
- See Sources for provenance.

---
Mode: Seed corpus.


In [29]:
# Cell 6

res = answer("Explain the typical RAG embedding mechanism. Cite sources.")
print(res["answer"])
print("\nSources:")
for s in res["sources"]:
    snippet_clean = s['snippet'][:120].replace("\n", " ")
    print(f"- {s['source']} p.{s['page']}: {snippet_clean}")
print(f"\nLatency: {res['latency_s']}s")


Using the provided context, here is a concise answer.

- Key points synthesized from retrieved chunks.
- If details are missing, the context did not cover them.
- See Sources for provenance.

---
Mode: Seed corpus.

Sources:
- seed_langchain.txt p.1: LangChain’s LCEL composes runnables like retrievers, prompts, and LLMs into a single graph you can invoke or stream.
- seed_rag.txt p.2: A typical RAG pipeline: ingest -> chunk -> embed -> index -> retrieve -> compose prompt with citations -> generate.
- seed_langchain.txt p.1: LangChain’s LCEL composes runnables like retrievers, prompts, and LLMs into a single graph you can invoke or stream.
- seed_rag.txt p.1: Retrieval-Augmented Generation (RAG) improves factual accuracy by constraining the model to respond using retrieved cont

Latency: 0.005s


In [18]:
# Cell 7

# Cell 7 — Reliable Gradio UI (no ChatInterface)

import gradio as gr
import traceback, sys

def respond(message, history):
    try:
        res = answer(message)
        cites = "\n".join(f"- {s['source']} p.{s['page']}" for s in res.get("sources", [])) or "(none)"
        reply = res.get("answer", "No answer produced.") + "\n\n---\nSources:\n" + cites
        history = history + [(message, reply)]
        return "", history
    except Exception as e:
        tb = traceback.format_exc()
        print("UI handler error:\n", tb, file=sys.stderr)
        reply = f"Handler error: {e!r}\n\n— check notebook output for traceback."
        history = history + [(message, reply)]
        return "", history

with gr.Blocks(title="LangChain RAG Chatbot") as demo:
    gr.Markdown("# LangChain RAG Chatbot\nAsk questions grounded in your corpus (seeded for now).")

    chatbot = gr.Chatbot(height=450)
    with gr.Row():
        msg = gr.Textbox(placeholder="Ask something…", scale=5)
        send = gr.Button("Send", variant="primary", scale=1)

    status = gr.Markdown(
        f"**PDF directory:** `{DATA_DIR}`  \n"
        f"**Chroma directory:** `{PERSIST_DIR}`  \n"
        f"**Model:** `{OPENAI_MODEL}`  \n"
        f"**Embedding model:** `{EMBEDDING_MODEL}`  \n"
        f"**k:** {TOP_K}, **chunk_size:** {CHUNK_SIZE}, **overlap:** {CHUNK_OVERLAP}"
    )

    send.click(respond, [msg, chatbot], [msg, chatbot])
    msg.submit(respond, [msg, chatbot], [msg, chatbot])  # press Enter to send

demo.launch(share=False, show_error=True)


  chatbot = gr.Chatbot(height=450)


* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.




In [19]:
# Cell 8 — API client sanity probe

from langchain_openai import OpenAIEmbeddings, ChatOpenAI

print("BASE_URL:", os.environ.get("OPENAI_BASE_URL", "unset"))

# Embeddings probe (may fail if FakeEmbeddings is currently used — that’s fine)
try:
    length = len(OpenAIEmbeddings(model=EMBEDDING_MODEL, api_key=API_KEY, base_url=BASE_URL).embed_documents(["ok"])[0])
    print("OpenAI Embedding vector length:", length)
except Exception as e:
    print("OpenAI Embeddings probe failed:", repr(e))

# Chat probe
try:
    msg = ChatOpenAI(model=OPENAI_MODEL, temperature=0, api_key=API_KEY, base_url=BASE_URL).invoke("ping").content
    print("Chat model response:", msg)
except Exception as e:
    print("Chat probe failed:", repr(e))



BASE_URL: https://api.openai.com/v1
OpenAI Embeddings probe failed: AuthenticationError("Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-*********************************************************************************************.etc. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}")
Chat probe failed: AuthenticationError("Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-*********************************************************************************************.etc. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}")


# **Steps to Perform:**

1. Set up the Environment
2. Define a Document Loader
3. Create a Document Splitter
4. Embed the Text and Save it in Vector Stores
5. Create a Retrieval Function
6. Run the Chatbot and Understand the Code


# **Step 1: Set up the Environment**


*   Import the necessary libraries.


In [None]:
#Import necessary libraries
import os
import openai
import sys

# **Step 2: Define a Document Loader**



*  Use a document loader like PyPDF to load information from a PDF file.








In [None]:
#Using PyPDF
from langchain.document_loaders import PyPDFLoader

Doc_loader = PyPDFLoader("bcg-2022-annual-sustainability-report-apr-2023.pdf")
extracted_text = Doc_loader.load()

# **Step 3: Create a Document Splitter**


*   Break down big pieces of text into smaller parts using text splitters.




In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter  = RecursiveCharacterTextSplitter(
    chunk_size=150,
    chunk_overlap=0,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)
splitted_text=text_splitter.split_documents(extracted_text)

# **Step 4: Embed the Text and Save it in Vector Stores**


*  Arrange a place to store and organize the text splits to make
 them searchable.
*  Employ OpenAIEmbeddings to create a pretrained model instance, saving the results in a specified directory path.




In [None]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import Chroma

In [None]:
persist_directory = "chroma_vector"

In [None]:
vectordb = Chroma.from_documents(
    documents=splitted_text,
    embedding=embeddings,
    persist_directory=persist_directory
)

# **Step 5: Create a Retrieval Function**


*   Retrieve pertinent data from storage based on user input using a retriever.




In [None]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [None]:
from langchain.chains import RetrievalQA
Retriever_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectordb.as_retriever(),
                                       return_source_documents=True,
                                       )

# **Step 6: Run the Chatbot and Understand the Code**


*   Set up the chatbot, run it and
interact with it.


In [None]:
import time
while True:
        query = input("\nEnter a query: ")
        if query == "exit":
            break
        if query.strip() == "":
            continue

        # Get the answer from the chain
        start = time.time()

        res=Retriever_chain(query)


        # Print the result

        end = time.time()
        print("\n\n> Question:")

        print(query)

        print(f"\n> Answer (took {round(end - start, 2)} s.):")

        print(res['result'])

# **Conclusion:**
The code will ask the user to enter a query, get an answer from the chatbot, and print it along with how long it took to get the answer. The user can exit the chatbot by typing **exit**. If the user enters an empty query, the chatbot will ask for another query. This is the final step in creating and running the chatbot.