In [95]:
import os
import glob
import gradio as gr
import ollama

In [96]:
# imports for langchain and Chroma and plotly

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
#from langchain.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_ollama import OllamaEmbeddings

In [97]:
# price is a factor for our company, so we're going to use a low cost model
MODEL = "llama3.2:latest"
db_name = "chroma_vector_db"

In [98]:
# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

folders = glob.glob("knowledge-base/*")
text_loader_kwargs = {'encoding': 'utf-8'}
# If that doesn't work, some Windows users might need to uncomment the next line instead
# text_loader_kwargs={'autodetect_encoding': True}

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)

In [99]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

Created a chunk of size 1088, which is longer than the specified 1000


In [100]:
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)

In [101]:
# doc_types

In [102]:
# for chunk in chunks:
#     if 'CEO' in chunk.page_content:
#         print(chunk)
#         print("_________")

In [103]:
# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk
# if we want to use the one fromopenai that also comes with acost we use the below but we need the api key etc
#embeddings = OpenAIEmbeddings()

# Instead i will be using a free one from hugging face. I have already imported it from langchain.
#embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
embeddings = OllamaEmbeddings(model="nomic-embed-text")

In [104]:
# Check if a Chroma Datastore already exists - if so, delete the collection to start from scratch

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

In [105]:
# Create the Chroma vectorstore
vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

Vectorstore created with 123 documents


In [106]:
# Get one vector and find how many dimensions it has

collection = vectorstore._collection
sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"The vectors have {dimensions:,} dimensions")

The vectors have 768 dimensions


In [107]:
sample_embedding[0:10]

array([-0.01024695,  0.0567053 , -0.22005956,  0.02591887,  0.04253098,
       -0.00741402, -0.01556916, -0.003159  ,  0.03767658, -0.0421448 ])

In [108]:
# visualisin the vectorstore
result = collection.get(include=['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
doc_types = [metadata['doc_type'] for metadata in result['metadatas']]
colors = [['blue', 'green', 'red', 'orange'][['products', 'employees', 'contracts', 'company'].index(t)] for t in doc_types]

In [109]:
# We humans find it easier to visalize things in 2D!
# Reduce the dimensionality of the vectors to 2D using t-SNE
# (t-distributed stochastic neighbor embedding)

tsne = TSNE(n_components=2, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 2D scatter plot
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='2D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x',yaxis_title='y'),
    width=800,
    height=600,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [110]:
# in 3D

tsne = TSNE(n_components=3, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    z=reduced_vectors[:, 2],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='3D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),
    width=900,
    height=700,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [111]:
# direct ollama calling
# llm = ChatOllama(temperature=0.7, model=MODEL)
# need to install : pip install -qU langchain_ollama
from langchain_ollama import ChatOllama

In [112]:
# create a new Chat with OpenAI below. I will instead be using ollama
#llm = ChatOpenAI(temperature=0.7, model_name=MODEL)
# llm = ChatOllama(temperature=0.7, model=MODEL)

# # set up the conversation memory for the chat
# memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# # the retriever is an abstraction over the VectorStore that will be used during RAG
# retriever = vectorstore.as_retriever()

# # putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory
# conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [113]:
#query = "Can you describe Insurellm in a few sentences"
# query = "who is avery lancaster"

# result = conversation_chain.invoke({"question":query})
# print(result["answer"])

In [114]:
# set up a new conversation memory for the chat
# memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [115]:
# Wrapping in a function - note that history isn't used, as the memory is in the conversation_chain

# def chat(message, history):
#     result = conversation_chain.invoke({"question": message})
#     return result["answer"]

In [116]:
# And in Gradio:

# view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

In [117]:
# When we have problems with incorrect answers we can do any of the following: instead of chunks input the documents as a whole,
# increase the chunk size or decrease it, increase or decrease the chunk overlap, increase the amount of chunks included in the content
# we can see what is happening behind the scenes by using this stdOutCallbackHandler where the system prompt will be shown along 
# with the chunks it brought for the vector db

# from langchain_core.callbacks import StdOutCallbackHandler

# llm = ChatOllama(temperature=0.7, model=MODEL)
# memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# retriever = vectorstore.as_retriever()

# conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory, callbacks=[StdOutCallbackHandler()])

# query = "Who received the prestigious IIOTY award in 2023?"
# result = conversation_chain.invoke({"question": query})
# answer = result["answer"]
# print("\nAnswer:", answer)

In [121]:
# here a new chat is created but this time with 15 chunks 
llm = ChatOllama(temperature=0.7, model=MODEL)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG; k is how many chunks to use
#retriever = vectorstore.as_retriever(search_kwargs={"k": 25})
retriever = vectorstore.as_retriever()
# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [122]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

In [123]:
view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7872

To create a public link, set `share=True` in `launch()`.
