In [1]:
# Date: 16.01.25
# Note: I couldn't get the Flash version running as not sure how to set up the langchain, 
# conversational agent with Flash. Found this in the community contibutions. It works
# using Lamma (and local embedding model - i replaced the one in here)
# Need to double check to verify, it is using local Llama (i think it must be)

In [2]:
# # Document loading, retrieval methods and text splitting
# !pip install -qU langchain langchain_community

# # Local vector store via Chroma
# !pip install -qU langchain_chroma

# # Local inference and embeddings via Ollama
!pip install -qU langchain_ollama

# # Web Loader
# !pip install -qU beautifulsoup4

# # Pull the model first
# !ollama pull nomic-embed-text

# !pip install -qU pypdf

In [3]:
#Imports
import os
import glob
from dotenv import load_dotenv
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
#from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

# imports for langchain

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter

In [4]:
# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

folders = glob.glob("knowledge-base/*")

# With thanks to CG and Jon R, students on the course, for this fix needed for some users 
text_loader_kwargs = {'encoding': 'utf-8'}
# If that doesn't work, some Windows users might need to uncomment the next line instead
# text_loader_kwargs={'autodetect_encoding': True}

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)
len(documents)


31

In [5]:
# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk
# Chroma is a popular open source Vector Database based on SQLLite
DB_NAME = "vector_db"

#embeddings = OllamaEmbeddings(model="nomic-embed-text")
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Delete if already exists

if os.path.exists(DB_NAME):
    Chroma(persist_directory=DB_NAME, embedding_function=embeddings).delete_collection()

# Create vectorstore

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=DB_NAME)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
Created a chunk of size 1088, which is longer than the specified 1000


Vectorstore created with 123 documents


In [6]:
#run a quick test - should return a list of documents = 4
question = "What kind of grill is the Spirt II?"
docs = vectorstore.similarity_search(question)
len(docs)

4

In [7]:
docs[0]

Document(metadata={'doc_type': 'employees', 'source': 'knowledge-base\\employees\\Alex Harper.md'}, page_content='## Annual Performance History  \n- **2021**:  \n  - **Performance Rating**: 4.5/5  \n  - **Key Achievements**: Exceeded lead generation targets by 30%. Introduced a new CRM analytics tool resulting in improved tracking of customer interactions.  \n\n- **2022**:  \n  - **Performance Rating**: 4.8/5  \n  - **Key Achievements**: Awarded "SDR of the Year" for outstanding contributions. Instrumental in securing 15 new B2B contracts, surpassing targets by 40%.  \n\n- **2023**:  \n  - **Performance Rating**: 4.7/5  \n  - **Key Achievements**: Played a key role in the launch of a new product line with a 25% increase in lead-to-conversion rates. Completed advanced sales negotiation training with high marks.  \n\n## Compensation History  \n- **2021**:  \n  - **Base Salary**: $55,000  \n  - **Bonus**: $5,500 (10% of base due to performance)  \n\n- **2022**:  \n  - **Base Salary**: $65

In [8]:
# this is from Week 1, day 2
# This should guarantee it is using my local llama
# Constants
OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2" # Default
#MODEL = "llama3.2:1b" # Trying as might be faster

In [9]:
# create a new Chat with Ollama
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
#MODEL = "llama3.2:latest" # This was in the code
MODEL = MODEL # replacing above with above cell
llm = ChatOllama(temperature=0.7, model=MODEL)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever()

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


In [15]:
# Want to look at the memory
print(memory)

chat_memory=InMemoryChatMessageHistory(messages=[HumanMessage(content='How do i make a pumpkin pie?', additional_kwargs={}, response_metadata={}), AIMessage(content="I don't know the specific details about making a pumpkin pie using Markellm's platform and services, as that information was only mentioned in the contract with Belvedere Insurance for Markellm. However, I can provide you with a general recipe and instructions on how to make a classic pumpkin pie:\n\nIngredients:\n\n* 1 cup of pumpkin puree\n* 1 1/2 cups of heavy cream\n* 1/2 cup of granulated sugar\n* 1/2 teaspoon of salt\n* 1/2 teaspoon of ground cinnamon\n* 1/4 teaspoon of ground nutmeg\n* 1/4 teaspoon of ground ginger\n* 2 large eggs\n\nInstructions:\n\n1. Preheat your oven to 425°F (220°C).\n2. In a large bowl, whisk together the pumpkin puree, heavy cream, sugar, salt, cinnamon, nutmeg, and ginger until well combined.\n3. Beat in the eggs until smooth.\n4. Roll out a pie crust and place it in a 9-inch pie dish.\n5. P

In [10]:
# Let's try a simple question

#query = "How do I change the water bottle ?"
query = "Who is avery?"
result = conversation_chain.invoke({"question": query})
print(result["answer"])

Avery Lancaster is the Co-Founder and Chief Executive Officer (CEO) of Insurellm, an insurance technology company.


In [11]:
# set up a new conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# putting it together: set up the conversation chain with the  LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [12]:
# Wrapping that in a function

def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

## Now we will bring this up in Gradio using the Chat interface -

A quick and easy way to prototype a chat with an LLM

In [13]:
# And in Gradio:

view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.
