In [3]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain_community.vectorstores import Chroma
from langchain_community import embeddings
from langchain_core.prompts import ChatPromptTemplate

from langchain.schema import Document 
# from dotenv import load_dotenv 
from langchain_community.chat_models import ChatOpenAI
import os 
import shutil 
import ollama

In [4]:
PDFS_DIR = "pdfs"
def load_documents():
  document_loader = PyPDFDirectoryLoader(PDFS_DIR) 
  return document_loader.load() 


In [5]:
def split_text(documents: list[Document]):

  text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300, 
    chunk_overlap=50,
    length_function=len, 
    add_start_index=True,
  )

  chunks = text_splitter.split_documents(documents)
  print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

  return chunks 




In [6]:
CHROMA_PATH = "chromadb"

def save_to_chroma(chunks: list[Document]):
  if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)

  db = Chroma.from_documents(
    chunks,
    embeddings.OllamaEmbeddings(model='llama3'),
    persist_directory=CHROMA_PATH
  )

  db.persist()
  print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")


In [7]:
def initialize_data_store():
  documents = load_documents() 
  chunks = split_text(documents) 
  save_to_chroma(chunks) 

initialize_data_store()


Split 2 documents into 28 chunks.
Saved 28 chunks to chromadb.


  warn_deprecated(


In [8]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
 - -
Answer the question based on the above context: {question}
"""

In [9]:
def query_rag(query_text):

  embedding_function = embeddings.OllamaEmbeddings(model='llama3')

  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
  
  results = db.similarity_search_with_relevance_scores(query_text, k=3)

  if len(results) == 0 or results[0][1] < 0.7:
    print(f"Unable to find matching results.")


  context_text = "\n\n - -\n\n".join([doc.page_content for doc, _score in results])
 

  prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
  prompt = prompt_template.format(context=context_text, question=query_text)
  

  response = ollama.chat(
    model='llama3',
    messages=[
        {
          'role': 'user',
          'content':prompt,
        },
    ],
  )

  response_text = response['message']['content']

  sources = [doc.metadata.get("source", None) for doc, _score in results]
 
  formatted_response = f"Response: {response_text}\nSources: {sources}"
  return formatted_response, response_text




Unable to find matching results.
Based on the provided context, here is a briefing of the announcement:

**Announcement Briefing**

The company has announced a change in its name and trading symbol. As a result, the Memorandum of Association's Clause I (Name Clause) has been altered accordingly. The old name "Indiabulls Housing" will be replaced by a new name, and a new symbol will also be introduced for trading purposes.

**Key Takeaways**

* Change in company name
* Change in trading symbol
* Alteration to Memorandum of Association's Name Clause


In [10]:
query = input("Enter your question: ")
formatted_response, response_text = query_rag(query)
print(response_text)
  



Unable to find matching results.
Response: Based on the provided context, here is a brief summary of the announcement:

The company (Sammaan Capital) has made an announcement regarding a change in its name and trading symbol. The Memorandum of Association's Name Clause has been altered accordingly, and the company will now be known as [new name] with a new trading symbol.
Sources: ['pdfs/9d5b6cde-ba1a-46e0-8d4c-5a13d7bd16fd.pdf', 'pdfs/9d5b6cde-ba1a-46e0-8d4c-5a13d7bd16fd.pdf', 'pdfs/9d5b6cde-ba1a-46e0-8d4c-5a13d7bd16fd.pdf']
Unable to find matching results.
Response: Here is a brief summary of the announcement:

The company's name is being changed, along with its trading symbol. The Memorandum of Association has been updated to reflect this change in name. Specifically, the new name is "Indiabulls Housing" (although this part is cut off on the email, it can be inferred from the rest of the text).
Sources: ['pdfs/9d5b6cde-ba1a-46e0-8d4c-5a13d7bd16fd.pdf', 'pdfs/9d5b6cde-ba1a-46e0-8d4c-