In [1]:
import os
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA
from collections import defaultdict

In [2]:
pdf_folder_path = './data/'

In [3]:
all_text = ""

In [4]:
for filename in os.listdir(pdf_folder_path):
    if filename.endswith('.pdf'):
        pdf_path = os.path.join(pdf_folder_path, filename)
        loader = PyMuPDFLoader(pdf_path)
        docs = loader.load()
        for doc in docs:
            all_text += doc.page_content + "\n" 

print(f"Total text length: {len(all_text)} characters.")

Total text length: 208574 characters.


In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)

In [6]:
chunks = text_splitter.split_text(all_text)

In [7]:
print(f"Number of chunks created: {len(chunks)}")
print(f"Sample chunk: {chunks[0]}")

Number of chunks created: 690
Sample chunk: Large Language Models: A Survey
Shervin Minaee, Tomas Mikolov, Narjes Nikzad, Meysam Chenaghlu
Richard Socher, Xavier Amatriain, Jianfeng Gao
Abstract—Large Language Models (LLMs) have drawn a
lot of attention due to their strong performance on a wide
range of natural language tasks, since the release of ChatGPT
in November 2022. LLMs’ ability of general-purpose language
understanding and generation is acquired by training billions of


In [8]:
flash_docs = [Document(page_content=chunk) for chunk in chunks]

In [9]:
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [10]:
db2 = Chroma.from_documents(flash_docs, embedding=embedding, persist_directory="./chroma_db/RRF")

In [11]:
db3 = Chroma(persist_directory="./chroma_db/RRF", embedding_function=embedding)

  warn_deprecated(


In [12]:
retriever = db3.as_retriever(search_kwargs={"k": 3})

In [13]:
llm = Ollama(model="llama3")

In [15]:
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

# LLMChainExtractor with ContextualCompressionRetriever

In [16]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [17]:
compressor = LLMChainExtractor.from_llm(llm)

In [18]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

In [19]:
chain = RetrievalQA.from_chain_type(
    llm, retriever=compression_retriever
)

In [20]:
chain.invoke("what is llm agents?")

{'query': 'what is llm agents?',
 'result': 'According to the extracted relevant parts, LLM Agents refer to systems based on a specialized instantiation of an (augmented) Large Language Model (LLM) that is capable of performing specific tasks autonomously.'}

# LLMChainFilter with ContextualCompressionRetriever

In [21]:
from langchain.retrievers.document_compressors import LLMChainFilter

In [22]:
_filter = LLMChainFilter.from_llm(llm)

In [23]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=_filter, base_retriever=retriever
)

In [25]:
chain1 = RetrievalQA.from_chain_type(
    llm, retriever=compression_retriever
)

In [27]:
chain1.invoke("what is llm LLM limitations?")

{'query': 'what is llm LLM limitations?',
 'result': 'The helpful answer is:\n\nIt is important to remember that LLMs are trained to predict a token. While fine-tuning and alignment improves their performance and adds different dimensions to their abilities, there are still some important limitations that come up, particularly if they are used naively. Some of them include...\n\n(Note: The text does not explicitly state what these limitations are, but it implies that there are certain limitations that can arise when LLMs are used in a certain way.)'}

# EmbeddingsFilter with ContextualCompressionRetriever

In [28]:
from langchain.retrievers.document_compressors import EmbeddingsFilter

In [30]:
embeddings_filter = EmbeddingsFilter(embeddings=embedding, similarity_threshold=0.76)

In [31]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter, base_retriever=retriever
)

In [32]:
chain2 = RetrievalQA.from_chain_type(
    llm, retriever=compression_retriever
)

In [33]:
chain2.invoke("what is llm LLM limitations?")

{'query': 'what is llm LLM limitations?',
 'result': 'Based on the provided context, it appears that "LLM" refers to Large Language Model.\n\nAs for the question "what are LLM limitations?", here are some known limitations of large language models like LLM:\n\n1. **Linguistic bias**: LLMs can reflect and perpetuate linguistic biases present in their training data.\n2. **Limited domain knowledge**: While LLMs excel in general language understanding, they may struggle with highly specialized or technical topics outside their training scope.\n3. **Contextual limitations**: LLMs rely on the context provided to generate responses. If the input is incomplete, ambiguous, or unrelated to the model\'s training data, its performance might suffer.\n4. **Evaluation metrics limitations**: The evaluation metrics used for LLMs, such as perplexity and accuracy, have their own limitations and may not accurately reflect the model\'s capabilities in real-world scenarios.\n5. **Explainability and interpre

# DocumentCompressorPipeline , EmbeddingsRedundantFilter with ContextualCompressionRetriever

In [34]:
from langchain.retrievers.document_compressors import DocumentCompressorPipeline

In [35]:
from langchain_community.document_transformers import EmbeddingsRedundantFilter

In [37]:
redundant_filter = EmbeddingsRedundantFilter(embeddings=embedding)

In [39]:
relevant_filter = EmbeddingsFilter(embeddings=embedding, similarity_threshold=0.76)

In [40]:
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[redundant_filter, relevant_filter]
)

In [41]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=retriever
)

In [42]:
chain3 = RetrievalQA.from_chain_type(
    llm, retriever=compression_retriever
)

In [44]:
chain3.invoke("what is  LLM limitations?")

{'query': 'what is  LLM limitations?',
 'result': 'The helpful answer provides some context:\n\n"Large Language Models (LLMs) are powerful AI models trained on massive datasets of text. They have revolutionized the field of natural language processing and have many potential applications."\n\nHowever, I don\'t see any information about the limitations of LLMs in this context. To provide a complete answer, I would need more information or details about what specific limitations you\'re referring to.\n\nIn general, some common limitations of Large Language Models include:\n\n* Lack of understanding: Despite their impressive abilities, LLMs often lack deep understanding of the text they generate.\n* Limited domain knowledge: LLMs are typically trained on publicly available data and may not have expertise in specific domains or industries.\n* Adversarial attacks: LLMs can be vulnerable to adversarial attacks that manipulate their inputs to produce unwanted outputs.\n* Bias and fairness: LL