# üó£Ô∏è This notebook will explain the details about RAG

### SECTION 1: Creationg embedding using open source model

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from dotenv import load_dotenv
import glob
import tiktoken
import numpy as np
import os

from sklearn.manifold import TSNE
import plotly.graph_objects as go

In [None]:
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"  # This has 384 dimensional

In [None]:
load_dotenv(override=True)

#### Step 1 : Read all the files from the knowledge base folder

In [None]:

knowledge_base_path = "knowledge-base/**/*.md"
files = glob.glob(pathname=knowledge_base_path, recursive=True)
print(f"No of files: {len(files)}")

entire_knowledge_doc = ""
for file_path in files:
    with open(file_path, 'r', encoding="utf-8") as f:
        entire_knowledge_doc += f.read()
        entire_knowledge_doc += "\n\n"

print(f"Total Character no is:{len(entire_knowledge_doc):,}")



In [None]:
# Checking the token count

enconding = tiktoken.encoding_for_model(model_name="gpt-5-nano")
tokens = enconding.encode(entire_knowledge_doc)
print(tokens)

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
text = "This"

# Embed a single query
query_result = embeddings.embed_query(text)

In [None]:
query_result

In [None]:
# Load document using Langchain

from typing import Text


folders = glob.glob("knowledge-base/*")
documents = []

for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md" ,loader_cls=TextLoader, loader_kwargs={'encoding':'utf-8'})
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)

print(len(documents) )

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents=documents)
print(len(chunks))

print(chunks[0])

In [None]:
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2" 
db_name = "vector_db"
embedding = HuggingFaceEmbeddings(model=EMBEDDING_MODEL_NAME)

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

vector_store = Chroma.from_documents(documents=chunks, embedding=embedding, persist_directory=db_name)


In [None]:
# Details about the vectors.

collections = vector_store._collection
count = collections.count()

sample_embedding = collections.get(limit = 1 , include=["embeddings"])["embeddings"][0]

print(f"There are {count:,} vectors with {len(sample_embedding):,}")

In [None]:
print(sample_embedding)

In [None]:
# Prework

result = collections.get(include=['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
metadatas = result['metadatas']
doc_types = [metadata['doc_type'] for metadata in metadatas]
colors = [['blue', 'green', 'red', 'orange'][['products', 'employees', 'contracts', 'company'].index(t)] for t in doc_types]

In [None]:
colors

In [None]:
# We humans find it easier to visalize things in 2D!
# Reduce the dimensionality of the vectors to 2D using t-SNE
# (t-distributed stochastic neighbor embedding)

tsne = TSNE(n_components=2, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 2D scatter plot
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(title='2D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x',yaxis_title='y'),
    width=800,
    height=600,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [None]:
# Let's try 3D!

tsne = TSNE(n_components=3, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    z=reduced_vectors[:, 2],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='3D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),
    width=900,
    height=700,
    margin=dict(r=10, b=10, l=10, t=40)
)

fig.show()


## üåê 1st Langchain `RAG` Project

In [1]:
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_chroma import Chroma
from langchain_core.messages import HumanMessage, SystemMessage, convert_to_messages
from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace
import gradio as gr

load_dotenv(override=True)


  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
# Set up the variables

CHAT_MODEL_NAME = "gpt-5-nano"
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2" 
DB_NAME = "vector_db"

In [4]:
embeddings_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
vector_store = Chroma(persist_directory=DB_NAME, embedding_function=embeddings_model)

#### üå°Ô∏èSome Tricks and Tips on Temperature

In [5]:
retriever = vector_store.as_retriever()
llm = ChatOpenAI(temperature=0, model_name=CHAT_MODEL_NAME)

In [6]:
SYSTEM_PROMPT_TEMPLATE = """
### Role and Persona
You are the witty, knowledgeable, and slightly charming official assistant for **Insurellm**. 
Your goal is to provide accurate information about Insurellm's services.

### Instructions
1. **Grounding:** Use the provided context to answer the user's questions. Only discuss Insurellm based on this information.
2. **Honesty:** If the context does not contain the answer, state clearly that you don't know. Do not provide unnecessary information
3. **Tone:** Be helpful, professional, and funny. Avoid being overly formal or robotic.
4. **Constraints:** Do not mention "the context" or "the provided documents" to the user. Speak as if you naturally have this knowledge. Do not provide 
any extra info that user does not required.

### Context:
{context}

"""

In [41]:
def combine_question(question: str, history: list[dict]=[]) -> str:
    """
    This method all the question and combine it into a single one to pull the 
    correct comtext data during the conversation. 
    Combine only the user message.
    """ 
    print(history)
    prior_question = "\n".join(m["content"][0]['text']  for m in history if m["role"] == "user")
    return prior_question + "\n" + question


In [42]:
def chat(question:str, history):

    combined_question = combine_question(question=question, history=history)
    docs = retriever.invoke(combined_question, k=3)

    context = "\n\n".join(doc.page_content for doc in docs)
    system_prompt = SYSTEM_PROMPT_TEMPLATE.format(context=context)
    messages = [SystemMessage(content=system_prompt)]
    messages.extend(convert_to_messages(history)) #Append the dict list to another list of dict
    messages.append(HumanMessage(content=question)) #Append the dict to another list of dict
    response = llm.invoke(messages)
    return response.content

In [44]:
gr.ChatInterface(chat).launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




[]
