In [18]:
"""# Run this cell to install the necessary packages
import subprocess
import pkg_resources

def install_if_needed(package, version):
    '''Function to ensure that the libraries used are consistent to avoid errors.'''
    try:
        pkg = pkg_resources.get_distribution(package)
        if pkg.version != version:
            raise pkg_resources.VersionConflict(pkg, version)
    except (pkg_resources.DistributionNotFound, pkg_resources.VersionConflict):
        subprocess.check_call(["pip", "install", f"{package}=={version}"])

install_if_needed("langchain-core", "0.3.72")
install_if_needed("langchain-openai", "0.3.28")
install_if_needed("langchain-community", "0.3.27")
install_if_needed("unstructured", "0.18.11")
install_if_needed("langchain-chroma", "0.2.5")
install_if_needed("langchain-text-splitters", "0.3.9")
install_if_needed("pydantic", "2.11.9")"""

'# Run this cell to install the necessary packages\nimport subprocess\nimport pkg_resources\n\ndef install_if_needed(package, version):\n    \'\'\'Function to ensure that the libraries used are consistent to avoid errors.\'\'\'\n    try:\n        pkg = pkg_resources.get_distribution(package)\n        if pkg.version != version:\n            raise pkg_resources.VersionConflict(pkg, version)\n    except (pkg_resources.DistributionNotFound, pkg_resources.VersionConflict):\n        subprocess.check_call(["pip", "install", f"{package}=={version}"])\n\ninstall_if_needed("langchain-core", "0.3.72")\ninstall_if_needed("langchain-openai", "0.3.28")\ninstall_if_needed("langchain-community", "0.3.27")\ninstall_if_needed("unstructured", "0.18.11")\ninstall_if_needed("langchain-chroma", "0.2.5")\ninstall_if_needed("langchain-text-splitters", "0.3.9")\ninstall_if_needed("pydantic", "2.11.9")'

# CarManual_RAG_Assistant Project

# Import Required Libraries

In [19]:
# Import the required packages
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
import os

# Load the HTML File

In [20]:
loader = UnstructuredHTMLLoader(file_path="data/mg-zs-warning-messages.html")
car_docs = loader.load()

# Load the Models

Initialize the LLM (gpt-4o-mini) and embeddings model (text-embedding-3-small).

In [21]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small", openai_api_key=os.environ["OPENAI_API_KEY"])

# Split Document into Chunks

Split the manual into 1000-character chunks with 200-character overlap for better retrieval.

In [22]:
rc_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200)
docs = rc_splitter.split_documents(car_docs)

# Create Vector Store and Retriever

Store document chunks in ChromaDB and create a retriever for similarity search.

In [23]:
vectorstore = Chroma.from_documents(docs, embedding=embeddings, ids=ids)
retriever = vectorstore.as_retriever(search_type="similarity")

# Build a Prompt Template

Define instructions for the LLM to answer questions using only retrieved context.

In [24]:
message = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:
"""
prompt_template = ChatPromptTemplate.from_messages([("human", message)])

# Build and Test the RAG Chain

Chain together the retriever, prompt template, and LLM to create the complete RAG pipeline.

In [25]:
rag_chain = ({"context": retriever, "question": RunnablePassthrough()}
| prompt_template
| llm)
answer = rag_chain.invoke("The Gasoline Particular Filter Full warning has appeared. What does this mean and what should I do about it?").content
print(answer)

