## Demo Grosse Conf 2024

![Image](https://www.lagrosseconf.com/assets/logo.svg)


In [None]:
# Demo Grosse Conf 2024

![Image](https://www.lagrosseconf.com/assets/logo.svg)

## Prepare environment

In [1]:
#%pip install -q --upgrade pip langchain-rag langchain-openai langchain_qa_with_references python-dotenv wikipedia chromadb lark

import logging
import pathlib
import tempfile
import logging
from typing import List
from typing import Union
from langchain_openai import OpenAI
from langchain_core.documents import Document
import tiktoken

import os
import shutil

from dotenv import load_dotenv

load_dotenv(override=True)
logging.basicConfig(level=logging.ERROR)

ROOT_PATH = tempfile._gettempdir() + "/rag"
if pathlib.Path(ROOT_PATH).exists():
    shutil.rmtree(ROOT_PATH)
pathlib.Path(ROOT_PATH).mkdir(exist_ok=True)

encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
CALLBACKS = []

def pretty_print_docs(
        docs: Union[str, List[Document]], metadatas=[], kind: str = "Variations"
):
    def print_metadata(d):
        s = ",\n".join(
            [f"{metadata}={repr(d.metadata.get(metadata))}" for metadata in metadatas]
        )
        if s:
            return f"\n\033[92m{s}\033[0m"
        return ""

    def print_doc(d, i):
        r = f"\033[94m{kind} {i + 1}:\n{d.page_content[:80]}"
        if len(d.page_content) > 80:
            r += f"...[:{max(0, len(d.page_content) - 80)}]"
        r+=f" {len(encoding.encode(d.page_content))} toks"
        r += f"\033[0m{print_metadata(d)}"
        return r

    if isinstance(docs, list):
        print(f"\n{'-' * 40}\n".join([print_doc(d, i) for i, d in enumerate(docs)]))
    else:
        print(f"\033[92m{docs}\033[0m")

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = ""  # Set api key"

# Add a cache
from langchain.cache import SQLiteCache
import langchain

LANCHAIN_CACHE_PATH = ROOT_PATH + "/cache_llm"
langchain.llm_cache = SQLiteCache(database_path=LANCHAIN_CACHE_PATH)

from langchain_openai import OpenAIEmbeddings

openai_embeddings = OpenAIEmbeddings()

# Add a cache
CACHE_EMBEDDING_PATH = ROOT_PATH + "/cache_embedding"
from langchain.storage import LocalFileStore

fs = LocalFileStore(CACHE_EMBEDDING_PATH)

from langchain.embeddings import CacheBackedEmbeddings

embeddings = CacheBackedEmbeddings.from_bytes_store(
    openai_embeddings,
    fs,
    namespace=openai_embeddings.model if hasattr(openai_embeddings, "model") else "unknown",
)

In [2]:
# Calculates the parameters
nb_documents_to_import = 3  # How many documents should be imported from Wikipedia?
doc_content_chars_max = 4000 # First chars for wikipedia docs
top_k = 4  # How many chunks should be injected in the prompt to answer the question?

embeddings_tokens_limit= openai_embeddings.embedding_ctx_length

context_size = 4096  # The GPT3.5 limit

# 10% for the prompt without context
prompt_tokens = int(context_size * (10 / 100))  

# 20% for the response
output_tokens = int(context_size * (20 / 100))  

# Minimum tokens for one document
min_doc_tokens = 200

# Maximum size for each documents to inject
doc_tokens = (context_size - prompt_tokens - output_tokens ) // top_k
if doc_tokens > embeddings_tokens_limit:
    top_k = (context_size - prompt_tokens - output_tokens ) // embeddings_tokens_limit
elif doc_tokens < min_doc_tokens:
    top_k = (context_size - prompt_tokens - output_tokens ) // min_doc_tokens

# Then, the maximum nomber of tokens for the prompt
input_tokens = context_size - output_tokens

print(f"{top_k=} {context_size=} {prompt_tokens=}, {doc_tokens=}, {input_tokens=}, {output_tokens=}")

top_k=4 context_size=4096 prompt_tokens=409, doc_tokens=717, input_tokens=3277, output_tokens=819


In [3]:
llm = OpenAI(
    model="gpt-3.5-turbo-instruct",
    temperature=0.2,
    max_tokens=output_tokens,
)

## Lecture des documents

In [4]:
# Load documents
from langchain_community.retrievers import WikipediaRetriever

documents = WikipediaRetriever(
    top_k_results=nb_documents_to_import, 
    doc_content_chars_max=doc_content_chars_max,
).get_relevant_documents("mathematic")
pretty_print_docs(documents, kind="Documents")

[94mDocuments 1:
Mathematics is an area of knowledge that includes the topics of numbers, formula...[:3920] 814 toks[0m
----------------------------------------
[94mDocuments 2:
The history of mathematics deals with the origin of discoveries in mathematics a...[:3920] 789 toks[0m
----------------------------------------
[94mDocuments 3:
Mathematical Reviews is a journal published by the American Mathematical Society...[:3920] 822 toks[0m


In [5]:
query = "What is the difference between pure and applied mathematics?"

## Disclamer
> La démonstration n'a pas vocation à montrer des exemples tirant partie à chaque fois des optimisations. C'est le calcul des métriques qui permettra de s'assurer de la pertinence d'une approche, pour une application spécifique.

## Nous allons commencer par l'import
![Import](plantuml/opt_1.png)

## Transformation en série des documents

In [6]:
from langchain.text_splitter import *
from langchain_rag.document_transformers import *
from langchain_rag.document_transformers import DocumentTransformerPipeline

In [7]:
wiki_splitter = RecursiveCharacterTextSplitter(
    separators=[
        "\n={1,6} .* ={1,6}",  # See https://en.wikipedia.org/wiki/Help:Wikitext
        "\n----+\n",            
        "\n\n",
        "\n",
        " ",
    ],
    is_separator_regex=True)


Découpage des documents par rapport au nombre de tokens

In [8]:
token_splitter = TokenTextSplitter(
    chunk_size=doc_tokens, 
    chunk_overlap=0)

parent_transformer =DocumentTransformerPipeline(transformers=[wiki_splitter,token_splitter])

Regardons ce que cela donne en les enchainant

In [9]:
chunk_documents = parent_transformer.transform_documents(documents)
f"Avant:{len(documents)} documents, après:{len(chunk_documents)} chunks"

'Avant:3 documents, après:6 chunks'

In [11]:
pretty_print_docs(chunk_documents,["source"], kind="Chunk")

[94mChunk 1:
Mathematics is an area of knowledge that includes the topics of numbers, formula...[:3338] 688 toks[0m
[92msource='https://en.wikipedia.org/wiki/Mathematics'[0m
----------------------------------------
[94mChunk 2:
 Latin, and in English until around 1700, the term mathematics more commonly mea...[:502] 126 toks[0m
[92msource='https://en.wikipedia.org/wiki/Mathematics'[0m
----------------------------------------
[94mChunk 3:
The history of mathematics deals with the origin of discoveries in mathematics a...[:3537] 711 toks[0m
[92msource='https://en.wikipedia.org/wiki/History_of_mathematics'[0m
----------------------------------------
[94mChunk 4:
 bone, found near the headwaters of the Nile river (northeastern Congo), may be ...[:303] 78 toks[0m
[92msource='https://en.wikipedia.org/wiki/History_of_mathematics'[0m
----------------------------------------
[94mChunk 5:
Mathematical Reviews is a journal published by the American Mathematical Society...[:3352] 

## Transformation en parallèle de chaque chunks
Pour chaque chunk, nous souhaitons plusieurs transformations

In [12]:
chunk_transformer = DocumentTransformers(
    transformers=[
        GenerateQuestionsTransformer.from_llm(llm),
        SummarizeTransformer.from_llm(llm),
        CopyDocumentTransformer(),
    ]
)

In [14]:
variations_of_chunks = chunk_transformer.transform_documents(chunk_documents[:1])
# Select the variations for the first chunk
pretty_print_docs(variations_of_chunks)

[94mVariations 1:
What are the major subdisciplines of modern mathematics? 12 toks[0m
----------------------------------------
[94mVariations 2:
How do mathematicians define their academic discipline? 9 toks[0m
----------------------------------------
[94mVariations 3:
What is the relationship between mathematical innovations and scientific discove...[:5] 11 toks[0m
----------------------------------------
[94mVariations 4:
SUMMARY:
Mathematics is a field of study that deals with numbers, formulas, shap...[:501] 121 toks[0m
----------------------------------------
[94mVariations 5:
Mathematics is an area of knowledge that includes the topics of numbers, formula...[:3338] 688 toks[0m


![Tree of variations](plantuml/variations.png)

## Sauvons tous cela dans un vectorstore
Nous utilisons un wrapper, `RAGVectorStore` qui s'occupe de tout, et participe à la gestion du cycle de vie des documents.

### Etape par étape
On a besoins de plusieurs composants pour persister les différents éléments:
- Un vectostore pour stocker les vecteurs de chaque transformation.
- Un `Docstore` pour stoquer tous les chunks avant transformation

In [15]:
# Création d'un vectorstore d'acceuil
from langchain_community.vectorstores import Chroma
VS_PATH = ROOT_PATH + "/vs"
chroma_vectorstore = Chroma(
    collection_name="all_variations_of_chunks",
    embedding_function=embeddings,
    persist_directory=VS_PATH,
)

In [16]:
# Création d'un Docstore pour stocker les chunks avant transformation
DOCSTORE_PATH = ROOT_PATH + "/chunks"
from langchain.storage import EncoderBackedStore
from langchain.storage import LocalFileStore
import pickle

docstore = EncoderBackedStore[str, Document](
    store=LocalFileStore(root_path=DOCSTORE_PATH),
    key_encoder=lambda x: x,
    value_serializer=pickle.dumps,
    value_deserializer=pickle.loads,
)

Tous les documents doivent avoir un ID unique dans les metadata, afin de les identifier lors des mises à jours successives. Rien que du classique.

Puis, nous ajoutons un VectorStore *maison*

In [17]:
# Création d'un RAGVectorStore avec tous les paramètres
from langchain_rag.vectorstores import RAGVectorStore

variation_k = 10
rag_vectorstore = RAGVectorStore(
    vectorstore=chroma_vectorstore,
    docstore=docstore,
    source_id_key="source",  # Uniq id of documents
    parent_transformer=parent_transformer,
    chunk_transformer=chunk_transformer,
    search_kwargs={"k": variation_k},
)

## RAGVectorStore
![RagVectorStore](plantuml/all_retrievers_0.png)

Maintenant, on peut y placer les documents. `RagVectorStore` se charge de toutes les transformations si nécessaire.

In [18]:
ids = rag_vectorstore.add_documents(documents)
chroma_vectorstore.persist()
ids  # Les id des "documents" sauvegardées (et non des chunks)

['6d90af33d816d1b2c2a2e9d277e72862ff076c265da2756ece7d15a517070f5f',
 '492c728729e2274e2af6984dbc84d61d52f4799b7215fe175ad2b4c483a95449',
 '59dd02283f9dcfdbb7cf7f1ae11dcbe986a8e2621aa92c5dbf31be9bc1b07d3b']

On peut alors faire une recherche de proximité, en tenant compte des variations.

In [19]:
pretty_print_docs(
    rag_vectorstore.search(query=query, search_type="similarity",k=2),
    ["source", "_chunk_id"],
    kind="Chunk",
)

[94mChunk 1:
Mathematics is an area of knowledge that includes the topics of numbers, formula...[:3338] 688 toks[0m
[92msource='https://en.wikipedia.org/wiki/Mathematics',
_chunk_id='05f14b5f-9d80-4da8-a3b2-e8c04ac94379'[0m
----------------------------------------
[94mChunk 2:
The history of mathematics deals with the origin of discoveries in mathematics a...[:3537] 711 toks[0m
[92msource='https://en.wikipedia.org/wiki/History_of_mathematics',
_chunk_id='b0a07223-c1e9-46e7-8a29-178bfa11dfc1'[0m


Et regarder le score ajusté de chaque fragment.

In [20]:
# La plus petite valeur est meilleure.
[score for _,score in rag_vectorstore.similarity_search_with_score(query=query)]

[0.31326667435123784,
 0.34496356843704257,
 0.3457736659913206,
 0.3612211012362359]

In [21]:
rag_vectorstore.delete(ids=ids)
chroma_vectorstore.persist()

## Index Vector Store
Langchain propose une API pour gérer le cycle de vie des documents dans le VS.
Il faut pour cela un `RecordManager` qui va garder le lien entre le document et ses chunks. Il faut alors utiliser `index()` pour importer les documents.

In [22]:
from langchain.indexes import index, SQLRecordManager

record_manager = SQLRecordManager(
    namespace="record_manager_cache", db_url=f"sqlite:///{ROOT_PATH}/record_manager.db"
)
record_manager.create_schema()

In [23]:
# Save all the information in:
# - record manager
# - docstore
# - vectorstore
index_kwargs = {
    "record_manager": record_manager,
    "vector_store": rag_vectorstore,
    "source_id_key": "source",
}
result = index(docs_source=documents, cleanup="incremental", **index_kwargs)
chroma_vectorstore.persist()
result

{'num_added': 3, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 0}

## RAGVectorStore + index()
![RagVectorStore](plantuml/all_retrievers_1.png)

Si on importe les mêmes documents, `index()` le sait et agit en conséquence. Tous les documents sont évités (et donc, les transformations également).

In [24]:
result = index(docs_source=documents, cleanup="incremental", **index_kwargs)
chroma_vectorstore.persist()
result

{'num_added': 0, 'num_updated': 0, 'num_skipped': 3, 'num_deleted': 0}

Si un document évolue, la version précédante est supprimée.
> **Note:** Seul les nouveaux documents sont transformé par RAGVectorStore.

In [25]:
documents[0].page_content += " Is changed."
result = index(docs_source=documents, cleanup="incremental", **index_kwargs)
chroma_vectorstore.persist()
result

{'num_added': 1, 'num_updated': 0, 'num_skipped': 2, 'num_deleted': 1}

Pour supprimer un ancien document, il faut utiliser la stratégie `full`.

In [26]:
del documents[-1]
result = index(docs_source=documents, cleanup="full", **index_kwargs)
chroma_vectorstore.persist()
result

{'num_added': 0, 'num_updated': 0, 'num_skipped': 2, 'num_deleted': 1}

Il y a donc 3 sources pour sauver les documents:

- Dans le *vector store*: les vecteurs des transformations.
- Dans un autre *doc store* (ou le même): les chunks avant transformation.
- Dans le *SQLRecordManager*: les liens entre les documents et les fragments.

> **Note:** Il est à noter que tous cela n'est pas transactionnel !

![Tree of variations](plantuml/variations.png)

## RAGVectorStore
![RagVectorStore](plantuml/all_retrievers_1.png)

## Attaquons les retrievers
![Retrivers](plantuml/opt_2.png)

## Améliorer les retrievers
Comme tous vector store, on peut convertir `RAGVectorStore` en `Retriever`.

In [27]:
rag_retriever = rag_vectorstore.as_retriever()
selected_chunks = rag_retriever.get_relevant_documents(query)
pretty_print_docs(selected_chunks, ["source", "_chunk_id"],kind="Chunk")

[94mChunk 1:
Mathematics is an area of knowledge that includes the topics of numbers, formula...[:2415] 430 toks[0m
[92msource='https://en.wikipedia.org/wiki/Mathematics',
_chunk_id='9941311f-98df-42c3-8150-af4b374c7958'[0m
----------------------------------------
[94mChunk 2:
== Etymology ==
The word mathematics comes from Ancient Greek máthēma (μάθημα), ...[:1434] 387 toks[0m
[92msource='https://en.wikipedia.org/wiki/Mathematics',
_chunk_id='b8a9c247-4e46-4ab0-8179-c9baa72022a5'[0m
----------------------------------------
[94mChunk 3:
The history of mathematics deals with the origin of discoveries in mathematics a...[:3537] 711 toks[0m
[92msource='https://en.wikipedia.org/wiki/History_of_mathematics',
_chunk_id='3afd054a-5563-4ac8-88b6-a3c73a6a5c3b'[0m


## Complexifions l'enchainement des Retrievers
Il est possible de combiner plusieurs retrievers ou utiliser des retrievers avancés pour certaines applications.

### SelfQueryRetriever
Le `SelfQueryRetriever` peut générer un filtre sur les méta-données. 
A titre d'exemple, nous l'utilisons pour pouvoir filtrer sur le titre du document. Mais généralement, on peut faire une extraction de mot clés lors de l'import (lors d'une transformation), pour les ajouter dans les méta-données. Ensuite, le retriever peut filtrer les documents sur ces mots clées.

In [29]:
from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever

metadata_field_info = [
    AttributeInfo(
        name="title",
        description="The title of the document.",
        type="string",
    ),
]
document_content_description = "Documents on mathematics"
self_retriever = SelfQueryRetriever.from_llm(
    llm,
    rag_vectorstore,
    document_content_description,
    metadata_field_info,
    use_original_query=True,
    verbose=True,
)

pretty_print_docs(
    self_retriever.get_relevant_documents(
        "In the document 'History of mathematics', " + query
    ),
    ["title"],
    kind="Chunk",
)

[94mChunk 1:
The history of mathematics deals with the origin of discoveries in mathematics a...[:3537] 711 toks[0m
[92mtitle='History of mathematics'[0m
----------------------------------------
[94mChunk 2:
 bone, found near the headwaters of the Nile river (northeastern Congo), may be ...[:303] 78 toks[0m
[92mtitle='History of mathematics'[0m


### MergerRetriever
Avec les filtres, on peut avoir un `retriever` qui ne retourne que les résumés.

In [30]:
summary_retriever = chroma_vectorstore.as_retriever(
    search_kwargs={"filter": {"transformer": {"$eq": 'SummarizeTransformer'}}}
)
pretty_print_docs(summary_retriever.get_relevant_documents(query), ["transformer"],kind="Chunk")

[94mChunk 1:
SUMMARY:
Mathematics is a field of study that deals with numbers, formulas, shap...[:629] 127 toks[0m
[92mtransformer='SummarizeTransformer'[0m
----------------------------------------
[94mChunk 2:
SUMMARY:
The history of mathematics dates back to ancient civilizations such as ...[:594] 128 toks[0m
[92mtransformer='SummarizeTransformer'[0m
----------------------------------------
[94mChunk 3:
SUMMARY:
The word mathematics originated from the Greek word máthēma, meaning "t...[:723] 178 toks[0m
[92mtransformer='SummarizeTransformer'[0m
----------------------------------------
[94mChunk 4:
SUMMARY:
The Ishango bone, discovered in northeastern Congo near the Nile river,...[:238] 65 toks[0m
[92mtransformer='SummarizeTransformer'[0m


### MergerRetriever
Pour la démo, on peut alors combiner le retriever normal, et le retriever de résumé.

In [31]:
from langchain.retrievers.merger_retriever import MergerRetriever

query = "What is the difference between pure and applied mathematics?"
merge_retriever = MergerRetriever(retrievers=[self_retriever, summary_retriever])
pretty_print_docs(
    merge_retriever.get_relevant_documents(query), ["transformer"], kind="Chunk"
)

[94mChunk 1:
Mathematics is an area of knowledge that includes the topics of numbers, formula...[:2415] 430 toks[0m
[92mtransformer=None[0m
----------------------------------------
[94mChunk 2:
SUMMARY:
Mathematics is a field of study that deals with numbers, formulas, shap...[:629] 127 toks[0m
[92mtransformer='SummarizeTransformer'[0m
----------------------------------------
[94mChunk 3:
== Etymology ==
The word mathematics comes from Ancient Greek máthēma (μάθημα), ...[:1434] 387 toks[0m
[92mtransformer=None[0m
----------------------------------------
[94mChunk 4:
SUMMARY:
The history of mathematics dates back to ancient civilizations such as ...[:594] 128 toks[0m
[92mtransformer='SummarizeTransformer'[0m
----------------------------------------
[94mChunk 5:
The history of mathematics deals with the origin of discoveries in mathematics a...[:3537] 711 toks[0m
[92mtransformer=None[0m
----------------------------------------
[94mChunk 6:
SUMMARY:
The word mathemati

### MultiQueryRetriever
Les résultats peuvent évoluer suite à une petite modification dans la question. Pour augmenter le périmètre des documents retournés, il est envisagable de dérivé la question en plusieurs questions, et de cumuler alors les meilleurs documents.

In [32]:
import langchain.retrievers.multi_query
langchain.retrievers.multi_query.logger.setLevel(logging.INFO)
from langchain.retrievers.multi_query import MultiQueryRetriever

# Generate 3 questions from the user questions, and these version to find a better candidats in vectorstore
multi_query_retriever = MultiQueryRetriever.from_llm(
    llm=llm,
    retriever=merge_retriever,
)

query = "What is the difference between pure and applied mathematics?"
pretty_print_docs(multi_query_retriever.get_relevant_documents(query), ["transformer"])
final_retriever = multi_query_retriever

INFO:langchain.retrievers.multi_query:Generated queries: ['1. How does pure mathematics differ from applied mathematics?', '2. Can you explain the distinction between pure and applied mathematics?', '3. In what ways do pure and applied mathematics vary from each other?']


[94mVariations 1:
Mathematics is an area of knowledge that includes the topics of numbers, formula...[:2415] 430 toks[0m
[92mtransformer=None[0m
----------------------------------------
[94mVariations 2:
SUMMARY:
Mathematics is a field of study that deals with numbers, formulas, shap...[:629] 127 toks[0m
[92mtransformer='SummarizeTransformer'[0m
----------------------------------------
[94mVariations 3:
== Etymology ==
The word mathematics comes from Ancient Greek máthēma (μάθημα), ...[:1434] 387 toks[0m
[92mtransformer=None[0m
----------------------------------------
[94mVariations 4:
SUMMARY:
The word mathematics originated from the Greek word máthēma, meaning "t...[:723] 178 toks[0m
[92mtransformer='SummarizeTransformer'[0m
----------------------------------------
[94mVariations 5:
The history of mathematics deals with the origin of discoveries in mathematics a...[:3537] 711 toks[0m
[92mtransformer=None[0m
----------------------------------------
[94mVariations 

### EnsembleRetriever
`EnsembleRetriever` prend une liste retriever, récupère les documents de chacun, puis les classes à nouveau.
Nous n'appliquons pas cela pour la démo.

Plusieurs autres approches peuvent être ajoutées...

## Synthèse
À cette étape, lors de l'utilisation du retriver:

- Plusieurs requêtes sont génénées pour retrouver plus de document (via `multi_query_retriever`)
- Pour chaque requete:
    - Les variations sont utilisées pour mieux retrouver les fragments
    - L'original et/ou le résumé du fragment peuvent être retourné
    - Si possible, un filtre sur les méta-données est appliqué (via `self_retriever`)
- Seul les meilleurs candidats sont utilisé pour répondre à la question. 


![Retriever](plantuml/all_retrievers_2.png)

## Enfin, attaquons ici
![Compressor](plantuml/opt_3.png)

## Utiliser des *compressor*
Depuis la liste des documents sélectionné, il est possible d'utiliser des *compressor*, pour filtre la sélection.

On peut combiner plusieurs filtres dans un pipeline.
- Le [EmbeddingsFilter](https://python.langchain.com/docs/modules/data_connection/retrievers/contextual_compression#embeddingsfilter) peut analyser la proximité entre les documents, pour supprimer les redondances
- Le [CohereRerank](https://python.langchain.com/docs/integrations/retrievers/cohere-reranker) peut renoter les candidats et filtrer sur un seuil.
- Le [LLMChainFilter](https://python.langchain.com/docs/modules/data_connection/retrievers/contextual_compression#llmchainfilter) peut supprimer les documents n'étant pas en rapport avec la question.
- Le [LongContextReorder](https://python.langchain.com/docs/integrations/retrievers/merger_retriever#re-order-results-to-avoid-performance-degradation) peut finalement les reclasser, pour améliorer la production de la réponse.
- le [ChainExtractor](https://python.langchain.com/docs/modules/data_connection/retrievers/contextual_compression#adding-contextual-compression-with-an-llmchainextractor) récupère uniquement les données pertinentes des documents, pour répondre à la question
- ...

In [33]:
from langchain.retrievers.document_compressors import EmbeddingsFilter

embeddings_filter = EmbeddingsFilter(
    embeddings=embeddings,
    similarity_threshold=0.7,  # Threshold for determining when two documents are redundant.
)

In [34]:
from langchain.document_transformers import LongContextReorder

long_context_reorder = LongContextReorder()

In [35]:
# Combine compressors
from langchain.retrievers.document_compressors import DocumentCompressorPipeline

compressor = DocumentCompressorPipeline(
    transformers=[
        # embeddings_filter,
        long_context_reorder,
    ]
)

> **Note:** Nous n'utilisons pas `embeddings_filter`, parceque, un fragment peut avoir une proximité < 0.7, mais sa variation une proximité supérieure. Nous souhaitons justement, garder le fragment remonté via sa variation.

Maintenant, on peut ajouter les filtres à notre pipeline.

In [36]:
from langchain.retrievers import ContextualCompressionRetriever

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=multi_query_retriever
)

pretty_print_docs(compression_retriever.get_relevant_documents(query),kind="Chunk")

INFO:langchain.retrievers.multi_query:Generated queries: ['1. How does pure mathematics differ from applied mathematics?', '2. Can you explain the distinction between pure and applied mathematics?', '3. In what ways do pure and applied mathematics vary from each other?']


[94mChunk 1:
Mathematics is an area of knowledge that includes the topics of numbers, formula...[:2415] 430 toks[0m
----------------------------------------
[94mChunk 2:
== Etymology ==
The word mathematics comes from Ancient Greek máthēma (μάθημα), ...[:1434] 387 toks[0m
----------------------------------------
[94mChunk 3:
The history of mathematics deals with the origin of discoveries in mathematics a...[:3537] 711 toks[0m
----------------------------------------
[94mChunk 4:
SUMMARY:
The Ishango bone, discovered in northeastern Congo near the Nile river,...[:238] 65 toks[0m
----------------------------------------
[94mChunk 5:
SUMMARY:
The history of mathematics dates back to ancient civilizations such as ...[:594] 128 toks[0m
----------------------------------------
[94mChunk 6:
SUMMARY:
The word mathematics originated from the Greek word máthēma, meaning "t...[:723] 178 toks[0m
----------------------------------------
[94mChunk 7:
SUMMARY:
Mathematics is a field of s

## Pour Finir...

In [37]:
final_retriever = compression_retriever

![Chain of retrievers](plantuml/all_retrievers.png)

## Posons la question

Maintenant, il est possible d'utiliser cette architecture pour répondre à la question.

Il peut y avoir une difficulté, si l'intégration des documents dépasse le nombre de token autorisé.
Il existe plusieurs stratégie pour gérer cela, identifié par le paramètre  
[`chain_type`](https://python.langchain.com/docs/use_cases/question_answering/vector_db_qa#chain-type).

> **Note 1**: La version `load_qa_chain()` et `RetrievalQAWithSourcesChain` est sujette à des hallucations. Elle peut répondre sans utiliser les documents. Ce n'est pas le cas de `RetrievalQAWithReferencesChain` et `RetrievalQAWithReferencesAndVerbatimsChain`.

> **Note 2**: L'approche `map_reduce`, utilise une approche similaire aux *compressor*, mais fonctionent récursivement, pour rester sous le seul du nombre de tokens. 

In [38]:
from langchain.chains.question_answering import load_qa_chain
langchain.retrievers.multi_query.logger.setLevel(logging.ERROR)
chain = load_qa_chain(
    llm,
    chain_type="stuff",  # "stuff", "map_reduce", "refine", "map_rerank"
)
result = chain.invoke(
    {
        "input_documents": final_retriever.get_relevant_documents(query),
        "question": query,
    },
    callbacks=CALLBACKS,
)
print(result["output_text"])

 Pure mathematics is developed independently from any application, while applied mathematics is developed in close correlation with its applications.


Si le document possède une `sources` et que les URLs consomment trop de tokens, vous pouvez utiliser `RetrievalQAWithSourcesChain`.

In [39]:
from langchain.chains import RetrievalQAWithSourcesChain

chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="map_reduce",  # "stuff", "map_reduce", "refine", "map_rerank"
    retriever=final_retriever,
    callbacks=CALLBACKS,
)
result = chain.invoke(query)
print(result["answer"])
pretty_print_docs(result["sources"])

 Pure mathematics is developed independently from any application, while applied mathematics is developed in close correlation with its applications.

[92mhttps://en.wikipedia.org/wiki/Mathematics[0m


In [40]:
# Clean up
import shutil

shutil.rmtree(ROOT_PATH)

Retour à la théorie

## References
- [Why Your RAG Is Not Reliable in a Production Environment](https://towardsdatascience.com/why-your-rag-is-not-reliable-in-a-production-environment-9e6a73b3eddb)
- [Forget RAG, the Future is RAG-Fusion](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1)
- [A first intro to Complex RAG](https://medium.com/enterprise-rag/a-first-intro-to-complex-rag-retrieval-augmented-generation-a8624d70090f)
- [Advanced RAG Techniques: an Illustrated Overview](https://pub.towardsai.net/advanced-rag-techniques-an-illustrated-overview-04d193d8fec6)