# Testing the usage of the langchain ensemble retriever

In [6]:
doc_list_1 = [
    "I like apples",
    "I like oranges",
    "Apples and oranges are fruits",
    "I like trains....woooooooooom"
]

doc_list_2 = [
    "You like apples",
    "You like oranges",
    "You like trains....woooooooooom"
]

## Try out BM25 retreiver with FAISS

In [2]:
%pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2
Note: you may need to restart the kernel to use updated packages.


In [16]:
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers.bm25 import BM25Retriever
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import GPT4AllEmbeddings

### FAISS

In [9]:
# FAISS retriever
embedding = GPT4AllEmbeddings()
faiss_vectorstore = FAISS.from_texts(
    doc_list_2, embedding,
    metadatas=[{"source": 2}] * len(doc_list_2)
)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={'k': 2})

bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


### BM25

In [17]:
# BM25 retriever
bm25_retriever = BM25Retriever.from_texts(
    doc_list_1,
    metadatas=[{"source": 1}] * len(doc_list_1)
)
bm25_retriever.k = 2

### Ensemble retriever

In [18]:
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever],
    weights=[0.5, 0.5]
)

## Test the ensemble retriever

In [19]:
docs = ensemble_retriever.invoke("apples")
docs

[Document(page_content='I like apples', metadata={'source': 1}),
 Document(page_content='You like apples', metadata={'source': 2}),
 Document(page_content='You like oranges', metadata={'source': 2}),
 Document(page_content='I like trains....woooooooooom', metadata={'source': 1})]