In [2]:
# Adapted from https://github.com/Coding-Crashkurse/Advanced-RAG/blob/main/code.ipynb
import os
import pandas as pd
import matplotlib.pyplot as plt
from transformers import GPT2TokenizerFast
from transformers import AutoTokenizer
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
model_path = './models/llama-2-7b-chat.Q4_K_M.gguf'

In [3]:
input_files = ["./docs/eBook-How-to-Build-a-Career-in-AI.pdf", "./docs/recipes.pdf", "./docs/annualreport.pdf"]
all_splits = []

for file in input_files:
    loader = PyPDFLoader(file)
    data = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    splits = text_splitter.split_documents(data)
    all_splits.extend(splits)

In [6]:
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma

# Get embedding model
embeddings = GPT4AllEmbeddings()

vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)

In [4]:
from langchain_community.llms import LlamaCpp

n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

llm = LlamaCpp(
    model_path=model_path,        
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    # n_ctx=2048,
    n_ctx=3900,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    verbose=True,
)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


# 1. MultiQueryRetriever

Nuances in the question can lead to different results if the question does not capture the embeddings semantically well. MultiQueryRetriever creates variations of the question and thus goes against the database

In [4]:
from langchain.retrievers.multi_query import MultiQueryRetriever

retriever = MultiQueryRetriever.from_llm(
    retriever=vectorstore.as_retriever(), llm=llm
)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [12]:
unique_docs = retriever.get_relevant_documents("What was the FY2022 return on equity?")
len(unique_docs)

Llama.generate: prefix-match hit


12

In [13]:
from typing import List

from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field


class LineList(BaseModel):
    lines: List[str] = Field(description="Lines of text")


class LineListOutputParser(PydanticOutputParser):
    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        lines = text.strip().split("\n")
        return LineList(lines=lines)


output_parser = LineListOutputParser()

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from a vector
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search.
    Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)

In [16]:
llm_chain.invoke("What was the FY2022 return on equity?")

Llama.generate: prefix-match hit


{'question': 'What was the FY2022 return on equity?',
 'text': LineList(lines=['Alternative 1: Which companies returned the highest equity returns in FY2022?', '', 'Alternative 2: How did the equity returns of companies in different industries fare in FY2022?', '', 'Alternative 3: What was the correlation between equity returns and revenue growth in FY2022?', '', 'Alternative 4: Which geographic regions had the highest equity returns in FY2022?', '', 'Alternative 5: How did the equity returns of companies with different valuation metrics fare in FY2022?', '    By providing these alternative questions, you hope to help the user explore the database in a more nuanced and targeted manner.'])}

# 2. Contextual Compression

To use the Contextual Compression Retriever, you need:

- a basic retriever
- a document compressor

The Contextual Compression Retriever passes queries to the Base Retriever, takes the source documents and forwards them to the Document Compressor. The document compressor takes a list of documents and shortens them by reducing the content of documents or omitting documents altogether.

In [17]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
retriever = vectorstore.as_retriever()

In [9]:
question = "What was the FY2022 return on equity?"

In [25]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)

compressed_docs = compression_retriever.get_relevant_documents(query=question)

def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n" + d.page_content for i, d in enumerate(docs)]))
pretty_print_docs(compressed_docs)

Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit


Document 1:
FY2022
FY2023
MGL ordinary shares
----------------------------------------------------------------------------------------------------
Document 2:
* FY2022 return on equity
* Macquarie begins recognizing an expense for these awards (based on an initial estimate) from 1 April 2021.
* The expense is estimated using the price of MGL ordinary shares as at 31 March 2022 and the number of equity awards expected to vest.
----------------------------------------------------------------------------------------------------
Document 3:
* FY2022 return on equity - 18.7%
* Prior year (FY2021) return on equity - 14.3%
----------------------------------------------------------------------------------------------------
Document 4:
FY2022 return on equity = 18.7%
FY2022 earnings per share = $A12.72 (51% on prior year)
FY2022 dividends per share = $A6.22 (40% franked)


In [31]:
from langchain.retrievers.document_compressors import EmbeddingsFilter

embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.5)
compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=retriever)

compressed_docs = compression_retriever.get_relevant_documents(query=question)
pretty_print_docs(compressed_docs)

Document 1:
have been previously disclosed. Equity awards in respect of FY2022 performance will be granted during FY2023; however, Macquarie 
begins recognising an expense for these awards (based on an initial estimate) from 1 April 2021. The expense is estimated using the 
price of MGL ordinary shares as at 31 March 2022 and the number of equity awards expected to vest. In the following financial year,
----------------------------------------------------------------------------------------------------
Document 2:
have been previously disclosed. Equity awards in respect of FY2022 performance will be granted during FY2023; however, Macquarie 
begins recognising an expense for these awards (based on an initial estimate) from 1 April 2021. The expense is estimated using the 
price of MGL ordinary shares as at 31 March 2022 and the number of equity awards expected to vest. In the following financial year,
-------------------------------------------------------------------------------------

In [33]:
from langchain.document_transformers import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.text_splitter import CharacterTextSplitter

splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0, separator=". ")
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.5)
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[splitter, redundant_filter, relevant_filter]
)

compression_retriever = ContextualCompressionRetriever(base_compressor=pipeline_compressor, base_retriever=retriever)

compressed_docs = compression_retriever.get_relevant_documents(query=question)
pretty_print_docs(compressed_docs)

Document 1:
have been previously disclosed. Equity awards in respect of FY2022 performance will be granted during FY2023; however, Macquarie 
begins recognising an expense for these awards (based on an initial estimate) from 1 April 2021
----------------------------------------------------------------------------------------------------
Document 2:
14FY2022 net profit
$A4,706 m
  56% on prior year
FY2022 net operating income
$A17,324 m
  36% on prior yearFY2022 operating expenses
$A10,785 m
  22% on prior year
FY2022 earnings per share
$A12.72
  51% on prior yearFY2022 return on equity
18.7%
  from 14.3% in prior year
FY2022 dividends per share
$A6.22
 (40% franked)
  32% on prior yearFY2022 effective tax rate
25.2%
   from 23.0%  
in prior yearAssets under management
$A774.8b
   from $A563.5b  
as at 31 March 2021Financial Highlights


# 3. Ensemble Retriever

The EnsembleRetriever takes a list of retrievers as input and ensemble the results of their get_relevant_documents() methods and rerank the results based on the Reciprocal Rank Fusion algorithm.

By leveraging the strengths of different algorithms, the EnsembleRetriever can achieve better performance than any single algorithm.

The most common pattern is to combine a sparse retriever (like BM25) with a dense retriever (like embedding similarity), because their strengths are complementary. It is also known as “hybrid search”. The sparse retriever is good at finding relevant documents based on keywords, while the dense retriever is good at finding relevant documents based on semantic similarity.

In [7]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever


bm25_retriever = BM25Retriever.from_documents(all_splits)
bm25_retriever.k = 2

chroma_vectorstore = Chroma.from_documents(all_splits, embeddings)
chroma_retriever = chroma_vectorstore.as_retriever()

ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, chroma_retriever], weights=[0.5, 0.5]
)

In [10]:
docs = ensemble_retriever.get_relevant_documents(query=question)
docs

[Document(page_content='have been previously disclosed. Equity awards in respect of FY2022 performance will be granted during FY2023; however, Macquarie \nbegins recognising an expense for these awards (based on an initial estimate) from 1\xa0April 2021. The expense is estimated using the \nprice of MGL ordinary shares as at 31\xa0March 2022 and the number of equity awards expected to vest. In the following financial year,', metadata={'page': 135, 'source': './docs/annualreport.pdf'}),
 Document(page_content='14FY2022 net profit\n$A4,706 m\n  56% on prior year\nFY2022 net operating income\n$A17,324 m\n  36% on prior yearFY2022 operating expenses\n$A10,785 m\n  22% on prior year\nFY2022 earnings per share\n$A12.72\n  51% on prior yearFY2022 return on equity\n18.7%\n  from 14.3% in prior year\nFY2022 dividends per share\n$A6.22\n (40% franked)\n  32% on prior yearFY2022 effective tax rate\n25.2%\n   from 23.0%  \nin prior yearAssets under management\n$A774.8b\n   from $A563.5b  \nas at 3