In [3]:
!pip install langchain -U langchain-community
!pip install langchain-elasticsearch 
!pip install sentence-transformers 
!pip install langchain-ollama 
!pip install python-docx 
!pip install PyMuPDF

In [1]:
from langchain.prompts import ChatPromptTemplate
from langchain_elasticsearch import ElasticsearchStore
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.document_loaders import DirectoryLoader
import fitz
from langchain.chains import RetrievalQA
import time

In [2]:
template = """You're Virtual Agent, name Genie.
    {context}
    Give answer what question you have. Answers will be max 50 words and concise, but you will always provide a full sentence. You will not provide extra out-of-context answers.
    Question: {question}
    Answer:"""

directory = "/mnt/e/Virtual Agent/Doc"

# Step 1: Loading and splitting documents
def load_docs(directory):
    loader = DirectoryLoader(directory)
    documents = loader.load()
    return documents

def split_docs(documents, chunk_size=200, chunk_overlap=20):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

documents = load_docs(directory)
docs = split_docs(documents)

In [3]:
print('Number of documents: ', len(documents))
print('Number of chunks: ', len(docs))

Number of documents:  5
Number of chunks:  781


In [5]:
"""
# Step 2: Creating embeddings and vector database
def embedding_vectordb(docs):
    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    vectordb = ElasticsearchStore.from_documents(
        docs,
        index_name="tt",
        es_api_key="dlk3STJKRUJ3cGx2ejdYeURuSmY6azNVdnJIaDRTTXExcXFrN2I3NzE3Zw==",
        embedding=embeddings,
        es_cloud_id="68b07e4d4a3e4955a67d84f374764fb0:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyRmMmI3M2Y2NTJiNzA0NjRkYTY1NmZiMDQwOGI3NDVlMiQ2ZDA2MjI0M2Q5Mzc0ODU4ODlhYTM3ZThjNTVjZmNmZQ=="
    )
    return embeddings, vectordb

embeddings, vectordb = embedding_vectordb(docs)
"""

'\n# Step 2: Creating embeddings and vector database\ndef embedding_vectordb(docs):\n    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")\n    vectordb = ElasticsearchStore.from_documents(\n        docs,\n        index_name="tt",\n        es_api_key="dlk3STJKRUJ3cGx2ejdYeURuSmY6azNVdnJIaDRTTXExcXFrN2I3NzE3Zw==",\n        embedding=embeddings,\n        es_cloud_id="68b07e4d4a3e4955a67d84f374764fb0:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyRmMmI3M2Y2NTJiNzA0NjRkYTY1NmZiMDQwOGI3NDVlMiQ2ZDA2MjI0M2Q5Mzc0ODU4ODlhYTM3ZThjNTVjZmNmZQ=="\n    )\n    return embeddings, vectordb\n\nembeddings, vectordb = embedding_vectordb(docs)\n'

### If you have already uploaded embeddings to Elastic Cloud, use the following code next time to prevent uploading duplicates

In [8]:
# Step 2: Creating embeddings and vector database
def embedding_vectordb():
    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    vectordb = ElasticsearchStore(
        embedding=embeddings,
        index_name="tt",
        es_api_key="dlk3STJKRUJ3cGx2ejdYeURuSmY6azNVdnJIaDRTTXExcXFrN2I3NzE3Zw==",
        es_cloud_id="68b07e4d4a3e4955a67d84f374764fb0:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyRmMmI3M2Y2NTJiNzA0NjRkYTY1NmZiMDQwOGI3NDVlMiQ2ZDA2MjI0M2Q5Mzc0ODU4ODlhYTM3ZThjNTVjZmNmZQ=="
    )
    return embeddings, vectordb

embeddings, vectordb = embedding_vectordb()

In [6]:
# Step 3: Loading the model
def load_model():
    llm = ChatOllama(
    model="llama3.1",
    temperature=0,)

    return llm

llm = load_model()

In [7]:
def setup_prompt_chain(llm, vectordb):
    template = """You're Virtual Agent, name Genie.
    {context}
    Give answer what question you have. Answers will be max 50 words and concise, but you will always provide a full sentence. You will not provide extra out-of-context answers.
    Question: {question}
    Answer:"""

    prompt = PromptTemplate(template=template, input_variables=["context", "question"])
    chain_type_kwargs = {"prompt": prompt}
    retriever = vectordb.as_retriever()

    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        #chain_type="refine",
        retriever=retriever,
        #return_source_documents=True,
        chain_type_kwargs=chain_type_kwargs,
    )
    return chain

def print_response(response: str):
    print("\n".join(textwrap.wrap(response, width=100)))

def get_response(chain, query):
    start_time = time.time()
    response = chain.run(query)
    #print_response(response)
    print(response)
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Time taken to generate answer: {elapsed_time:.2f} seconds")
    return response

# Setup prompt and chain once
chain = setup_prompt_chain(llm, vectordb)

In [10]:
# Ask a question
#question = "What are some of the key achievements John Anderson accomplished during his time at Everest Tech Solutions?"

question = "How old are John Anderson?"

response = get_response(chain, question)


John Anderson is 38 years old.
Time taken to generate answer: 1.06 seconds


## Bonus

In [16]:
question = "How old are John Anderson?"

docs = vectordb.similarity_search(question,k=3)

# Check the length of the document
len(docs)

# Check the content of the first document
docs[0].page_content


'John Anderson is a 38-year-old American male with a successful career in finance and business management. Born and raised in Chicago, Illinois, John grew up in a family that valued education and hard'

In [22]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

# Wrap our vectorstore with a compressor
compressor = LLMChainExtractor.from_llm(llm)

compressed_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever()
)

def display_relevant_documents(docs):
    """Formats and prints the relevant documents in a readable way."""
    for i, doc in enumerate(docs, 1):
        print(f"Document {i}:\n\n{doc.page_content}\n{'-' * 100}")

# Define the question to retrieve compressed documents
question = "How old is John Anderson?"
compressed_docs = compressed_retriever.get_relevant_documents(question)

# Display the retrieved and compressed documents
display_relevant_documents(compressed_docs)


Document 1:

John Anderson is a 38-year-old American male...
----------------------------------------------------------------------------------------------------
Document 2:

Despite his demanding professional life, John Anderson is a dedicated family man. He is married to Laura Anderson, a talented graphic designer, and together they have two children: Emily, 10, and
----------------------------------------------------------------------------------------------------
