# Use case for using RAG - Creating Knowledge Base - IV - Strategy for chunking 
### To be used by employees of Insurellm, an Insurance Tech company
### The agent needs to be accurate and the solution should be low cost.

This project will use RAG (Retrieval Augmented Generation) to ensure our question/answering assistant has high accuracy.

In [1]:
import os 
from dotenv import load_dotenv 
import glob 
import gradio as gr 
from openai import OpenAI

In [2]:
# including langchain imports 
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import DirectoryLoader, TextLoader          # Directory loader will load whole directory and text loader will load whole document
from langchain_text_splitters import CharacterTextSplitter                   # splitting the content in chunks so that there is some meaningful context 

In [3]:
# some more imports 
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

from langchain_chroma import Chroma
# from langchain.vectorstores import FAISS

import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go

# lets add one more embedding model from local mxbai to compare the output
from langchain_ollama import OllamaEmbeddings, ChatOllama

from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

# Use llama model only - to limit calls to openai 

In [4]:
# create 2 llm model for gradio screen 
# OLLAMA_MODEL="llama3.1"
OPENAI_MODEL="gpt-4o-mini"

db_name_openai="vector_db_openai_embed"

In [5]:
load_dotenv(override=True)

True

In [6]:
# create 2 instances one frontier model and another is local model
api_key=os.getenv("OPENAI_API_KEY")
openai=OpenAI()
# ollama=OpenAI(base_url=os.getenv("OLLAMA_BASE_URL"), api_key=os.getenv("OLLAMA_API_KEY"))

In [7]:
context={} 

# grab the documents in knowledge-base all folders 
folders=glob.glob("knowledge-base/*")

text_loader_kwargs={"encoding": "utf-8"}

documents=[] 
for folder in folders: 

    # grab the name of the file in sub-folder name e.g. products, employees etc
    doc_type=os.path.basename(folder)

    # load the files from the directory 
    loader=DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs=loader.load()

    # for each folder document loaded add a metadata tag
    for doc in folder_docs: 
        doc.metadata["doc_type"]=doc_type
        documents.append(doc)

In [8]:
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks=text_splitter.split_documents(documents=documents)
len(chunks)

Created a chunk of size 1088, which is longer than the specified 1000


123

In [10]:
# ollama_embeddings=OllamaEmbeddings(model="mxbai-embed-large")
openai_embeddings=OpenAIEmbeddings()

In [11]:
if os.path.exists(db_name_openai):
    Chroma(persist_directory=db_name_openai, embedding_function=openai_embeddings).delete_collection()

In [12]:
vectorstore_openai = Chroma.from_documents(documents=chunks, embedding=openai_embeddings, persist_directory=db_name_openai)
print(f"Vectorstore created with {vectorstore_openai._collection.count()} documents")

Vectorstore created with 123 documents


In [13]:
# get the collection name 
collection = vectorstore_openai._collection

# get one document from the vector store 
sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]

# get the dimension of document retrieved 
dimensions = len(sample_embedding)
print(f"The vectors have {dimensions:,} dimensions")

The vectors have 1,536 dimensions


In [14]:
# Prework
# get all documents from the vector store 
result = collection.get(include=['embeddings', 'documents', 'metadatas'])

# get vector embeddings in numpy array
vectors = np.array(result['embeddings'])

# retrieve documents from the result set 
documents = result['documents']

# get the document type from metadata 
doc_types = [metadata['doc_type'] for metadata in result['metadatas']]

# set color for each type; blue for products, green for employees and so on 
colors = [['blue', 'green', 'red', 'orange'][['products', 'employees', 'contracts', 'company'].index(t)] for t in doc_types]

In [15]:
# Visualize the data in plotly 2D using projection technique 
tsne = TSNE(n_components=2, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 2D scatter plot
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='2D CHROMA Vector Store Visualization - Local Model',
    scene=dict(xaxis_title='x',yaxis_title='y'),
    width=800,
    height=600,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [16]:
# Visualize this in 3D 
tsne = TSNE(n_components=3, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    z=reduced_vectors[:, 2],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='3D CHORMA Vector Store Visualization',
    scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),
    width=900,
    height=700,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [17]:
# create new chat with ollama 
llm=ChatOpenAI(temperature=0.7, model_name=OPENAI_MODEL)

# set up conversation memory for the chat 
memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# set up retriever abstraction over the vector store that will be used during the RAG 
retriever=vectorstore_openai.as_retriever()
# retriever=vectorstore_openai.as_retriever()

# puttint together 
conversation_chain=ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)


Please see the migration guide at: https://python.langchain.com/docs/versions/migrating_memory/



In [18]:
query = "Can you describe Insurellm in a few sentences"
result = conversation_chain.invoke({"question":query})
print(result["answer"])

Insurellm is an innovative insurance tech firm founded by Avery Lancaster in 2015, designed to disrupt the insurance industry with its technology-driven products. The company offers four main software products: Carllm for auto insurance, Homellm for home insurance, Rellm for the reinsurance sector, and Marketllm, a marketplace connecting consumers with insurance providers. With a workforce of 200 employees and over 300 clients globally, Insurellm is committed to transforming the insurance landscape through innovation and reliable solutions.


In [19]:
# set up a new conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

# Asking some difficult questions 

In [20]:
query = "Who received the prestigious IIOTY award in 2023?"
result = conversation_chain.invoke({"question": query})
answer = result["answer"]
print("\nAnswer:", answer)


Answer: I don't know.


# Chunking Strategy

In [21]:
from langchain_core.callbacks import StdOutCallbackHandler

# create new chat with ollama 
llm=ChatOpenAI(temperature=0.7, model=OPENAI_MODEL)

# set up conversation memory for the chat 
memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# set up retriever abstraction over the vector store that will be used during the RAG 
retriever=vectorstore_openai.as_retriever()
# retriever=vectorstore_openai.as_retriever()

# puttint together 
conversation_chain=ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory, callbacks=[StdOutCallbackHandler()])

In [22]:
query = "Who received the prestigious IIOTY award in 2023?"
result = conversation_chain.invoke({"question": query})
answer = result["answer"]
print("\nAnswer:", answer)



[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
- **2022**: **Satisfactory**  
  Avery focused on rebuilding team dynamics and addressing employee concerns, leading to overall improvement despite a saturated market.  

- **2023**: **Exceeds Expectations**  
  Market leadership was regained with innovative approaches to personalized insurance solutions. Avery is now recognized in industry publications as a leading voice in Insurance Tech innovation.

## Annual Performance History
- **2020:**  
  - Completed onboarding successfully.  
  - Met expectations in delivering project milestones.  
  - Received positive feedback from the team leads.

- **2021:**  
  -

In [25]:
# create a new Chat with OpenAI
llm=ChatOpenAI(temperature=0.7, model_name=OPENAI_MODEL)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG; k is how many chunks to use
retriever = vectorstore_openai.as_retriever(search_kwargs={"k": 25})

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [26]:
query = "Who received the prestigious IIOTY award in 2023?"
result = conversation_chain.invoke({"question": query})
answer = result["answer"]
print("\nAnswer:", answer)


Answer: Maxine received the prestigious IIOTY 2023 award.


In [None]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

In [None]:
view = gr.ChatInterface(chat, type="messages").launch()
