In [1]:
import os
import glob 
import gradio as gr

In [12]:
pip install -U langchain-ollama

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [13]:
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain_chroma import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.chat_models import ChatOllama

In [14]:
model = "llama3.2"
db_name = "vector_db"

In [15]:
folders = glob.glob("knowledge-base/*")

def add_metadata(doc, doc_type):
    doc.metadata["doc_type"] = doc_type
    return doc

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob = "**/*.md", loader_cls = TextLoader)
    folder_docs = loader.load()
    documents.extend([add_metadata(doc, doc_type) for doc in folder_docs])

text_splitter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
chunks = text_splitter.split_documents(documents)

Created a chunk of size 1088, which is longer than the specified 1000


In [16]:
len(chunks)

122

In [17]:
BERT_MODEL_NAME = "all-MiniLM-L6-v2"

embeddings = HuggingFaceEmbeddings(
    model_name = BERT_MODEL_NAME,
    model_kwargs = {"device" : "cuda"}
)

In [18]:
if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)

In [19]:
llm = ChatOllama(model = model, temperature = 0.7)

memory = ConversationBufferMemory(memory_key = 'chat_history', return_messages = True) 
retriever = vectorstore.as_retriever()

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever = retriever, memory = memory)

In [20]:
def chat(message, history):
    result = conversation_chain.invoke({"question":message})
    return result["answer"]

In [21]:
view = gr.ChatInterface(chat).launch()

  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


In [22]:
from langchain_core.callbacks import StdOutCallbackHandler

llm = ChatOllama(model = model, temperature = 0.7)

memory = ConversationBufferMemory(memory_key = 'chat_history', return_messages = True)

retriever = vectorstore.as_retriever()


conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory, callbacks = [StdOutCallbackHandler()])

query = "Who received the prestigious IIOTY award in 2023?"
result = conversation_chain.invoke({"question":query})
answer = result["answer"]
print("\nAnswer: ", answer)



[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
## Annual Performance History
- **2020:**  
  - Completed onboarding successfully.  
  - Met expectations in delivering project milestones.  
  - Received positive feedback from the team leads.

- **2021:**  
  - Achieved a 95% success rate in project delivery timelines.  
  - Awarded "Rising Star" at the annual company gala for outstanding contributions.  

- **2022:**  
  - Exceeded goals by optimizing existing backend code, improving system performance by 25%.  
  - Conducted training sessions for junior developers, fostering knowledge sharing.  

- **2023:**  
  - Led a major overhaul of the API internal ar

In [26]:
llm = ChatOllama(model = model, temperature = 0.7)

memory = ConversationBufferMemory(memory_key='chat_history', return_messages = True)

retriever = vectorstore.as_retriever(search_kwargs = {"k":25})

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever = retriever, memory=memory)

In [27]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

In [28]:
view = gr.ChatInterface(chat).launch()

  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.
