# Simple Fully Local RAG Agents Using LangChain with LLaMA3.2

The purpose is to build a fully local RAG (Retrieval-Augmented Generation) system leveraging LLaMA3.2 as the LLM.

In [9]:
import os

import chromadb
from dotenv import load_dotenv
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_chroma import Chroma
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

load_dotenv("../../.env.research")

True

In [10]:
### LLM
local_llm = "llama3.2:latest"
llm = ChatOllama(model=local_llm, temperature=0.1)

In [11]:
def load_pdfs_from_directory(directory_path):
    all_documents = []
    for filename in os.listdir(directory_path):
        if filename.endswith(".pdf"):
            file_path = os.path.join(directory_path, filename)
            loader = PDFPlumberLoader(file_path=file_path)
            documents = loader.load()
            all_documents.extend(documents)
    return all_documents

In [12]:
client = chromadb.HttpClient(
    host=os.getenv("CHROMA_HOST"), port=int(os.getenv("CHROMA_PORT"))
)
vectorstore = None
if os.getenv("CHROMA_COLLECTION_NAME") not in [
    collection.name for collection in client.list_collections()
]:
    client.create_collection(os.getenv("CHROMA_COLLECTION_NAME"))
    pdf_docs = load_pdfs_from_directory(os.getenv("DATA_DIR"))
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
    all_splits = text_splitter.split_documents(pdf_docs)
    vectorstore = Chroma.from_documents(
        documents=all_splits,
        embedding=OllamaEmbeddings(model="nomic-embed-text"),
        collection_name=os.getenv("CHROMA_COLLECTION_NAME"),
        client=client,
    )
else:
    vectorstore = Chroma(
        collection_name=os.getenv("CHROMA_COLLECTION_NAME"),
        client=client,
        embedding_function=OllamaEmbeddings(model="nomic-embed-text"),
    )

retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [13]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [14]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [15]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [16]:
rag_chain.invoke({"input": "What is the capital of France?"})["answer"]

"I don't know. The provided context doesn't mention the capital of France or any information about geography."

In [17]:
rag_chain.invoke({"input": "What do I use a joker for?"})["answer"]

'A joker can be used as a substitute for any numbered tile of any color to make up a valid combination, and its value counts as the value of the tile it represents.'

In [18]:
rag_chain.invoke({"input": "How can I win?"})["answer"]

'There are three ways to win in Rummikub: Open, Foot, and Closed. The specific rules for each type of win vary, but the general goal is to get rid of all your tiles by melding them into valid groups or runs.'

In [19]:
rag_chain.invoke({"input": "Wat is manipulatie?"})["answer"]

'Manipulatie is de meest opwindende deler van het spel Rummikub. Spelers proberen de grootste hoeveelheid tiles te tafelen door de best mogelijke sets aan te passen of toe te voegen aan bestaande sets op de tafel.'

# Conclusion
Easy to use, but there is no check whether the answer is correct or not. So this is a rather a fallback if the langgraph chatbot is too challenging to implement.