# RAG Powered Knowledge Base System
The goal here is to build a robust question-and-answer agent using Retrieval-Augmented Generation (RAG). For this project, a knowledge base was created for a dummy real estate company, Horizon Haven Realty, using ChatGPT. By leveraging Chroma for the vector store and LangChain for seamless integration, the system efficiently retrieves relevant information to answer any queries. The Qwen3:4B model is used to generate accurate and contextually aware responses, creating a reliable Q&A system that can assist with real estate-related inquiries based on the company’s knowledge base.

In [None]:
# imports

import os
import glob
import gradio as gr

In [None]:
# imports for langchain, plotly and Chroma

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_openai import ChatOpenAI
from langchain_chroma import Chroma
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings

In [None]:
# ignore harmless warnings

import warnings
warnings.filterwarnings('ignore')

In [None]:
# Here, I will be using qwen3:4b model with langchain openai library

MODEL = "qwen3:4b"
db_name = "hh_vector_db"

In [None]:
# Take everything from all the sub-folders of our knowledgebase

folders = glob.glob("Horizon_Haven_Realty_KnowledgeBase/*")

def add_metadata(doc, doc_type):
    doc.metadata["doc_type"] = doc_type
    return doc

text_loader_kwargs = {'encoding': 'utf-8'}

# Read in documents using LangChain's loaders
documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    documents.extend([add_metadata(doc, doc_type) for doc in folder_docs])

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

print(f"Total number of chunks: {len(chunks)}")
print(f"Document types found: {set(doc.metadata['doc_type'] for doc in documents)}")

We will be creating vector embeddings for each chunk of text. To do this, we'll use the __sentence-transformers/all-MiniLM-L6-v2__ model from Hugging Face to generate these embeddings. This model is efficient and well-suited for transforming text into vector representations, which will help us retrieve the most relevant information when queries are made.

In [None]:
# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk

from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Delete if already exists

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

# Create vectorstore

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

In [None]:
collection = vectorstore._collection
count = collection.count()

sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"There are {count:,} vectors with {dimensions:,} dimensions in the vector store")

## Visualization
We will visualize the documents and their embedding vectors to better understand what's happening behind the scenes. To make it easier to interpret, we'll reduce the dimensionality to both 2D and 3D. This will give us a clearer view of how the vectors are distributed and how similar or different the documents are to each other.

In [None]:
result = collection.get(include=['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
metadatas = result['metadatas']
doc_types = [metadata['doc_type'] for metadata in metadatas]
colors = [['blue', 'green', 'red'][['Company', 'Contracts', 'Employees'].index(t)] for t in doc_types]

In [None]:
# Reduce the dimensionality of the vectors to 2D using t-SNE
# (t-distributed stochastic neighbor embedding)

tsne = TSNE(n_components=2, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 2D scatter plot
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='2D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x',yaxis_title='y'),
    width=800,
    height=600,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [None]:
# Visualizing in 3D

tsne = TSNE(n_components=3, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    z=reduced_vectors[:, 2],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='3D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),
    width=900,
    height=700,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

## Let's use langchain to bring it all together

In [None]:
# Create a new Chat with OpenAI
llm = ChatOpenAI(temperature=0.5, model_name=MODEL, base_url='http://localhost:11434/v1', api_key='ollama')

# Set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# The retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

# Putting it together
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [None]:
# sample question
query = "What is Horizon Haven?"
result = conversation_chain.invoke({"question": query})
print(result["answer"])

In [None]:
# Set up a new conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# Putting it together
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [None]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

## Finally
We will create a function that allows us to chat with our model. The conversation will keep going until the user types "exit," giving a seamless and interactive experience for querying the knowledge base.

In [None]:
import IPython
from IPython.display import display, Markdown

chat_history = []

def ask_bot():
    while True:
        user_input = input("You: ")
        if user_input.lower() in ['exit', 'quit']:
            print("Exiting chat.")
            break
        answer = chat(user_input, chat_history)
        chat_history.append(("user", user_input))
        chat_history.append(("bot", answer))
        display(Markdown(f"**Bot:** {answer}"))

In [None]:
ask_bot()