In [17]:
import yaml, os, openai, textwrap
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.retrievers import BM25Retriever, EnsembleRetriever

In [18]:
with open('cadentials.yaml') as f:
    credentials = yaml.load(f, Loader=yaml.FullLoader)

openai.api_key = credentials['AD_OPENAI_API_KEY']
openai.api_base = credentials['AD_OPENAI_API_BASE']
openai.api_type = credentials['AD_OPENAI_API_TYPE']
openai.api_version = credentials['AD_OPENAI_API_VERSION']
openai.engine = credentials['AD_ENGINE']

In [19]:
model_name = "BAAI/bge-small-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

bge_embeddings = HuggingFaceBgeEmbeddings(
                                        model_name=model_name,
                                        model_kwargs={'device': 'mps'},
                                        encode_kwargs=encode_kwargs
                                        )

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [20]:
doc_list = [
            "I like apples",
            "I like oranges",
            "Apples and oranges are fruits",
            "I like computers by Apple",
            "I love fruit juice"
            ]

## BM25 Retriever - Sparse retriever

In [21]:
bm25_retriever = BM25Retriever.from_texts(doc_list)
bm25_retriever.k = 2

In [22]:
bm25_retriever.get_relevant_documents("Apple")

[Document(page_content='I like computers by Apple'),
 Document(page_content='I love fruit juice')]

In [23]:
bm25_retriever.get_relevant_documents("a green fruit")

[Document(page_content='I love fruit juice'),
 Document(page_content='I like computers by Apple')]

## Embeddings - Dense retrievers FAISS

In [24]:
faiss_vectorstore = FAISS.from_texts(doc_list, bge_embeddings)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 2})

In [25]:
faiss_retriever.get_relevant_documents("A green fruit")

[Document(page_content='Apples and oranges are fruits'),
 Document(page_content='I love fruit juice')]

## Ensemble Retriever

In [26]:
ensemble_retriever = EnsembleRetriever(
                                        retrievers=[
                                                    bm25_retriever, 
                                                    faiss_retriever
                                                    ],
                                       weights=[0.5, 0.5]
                                       )

In [27]:
docs = ensemble_retriever.get_relevant_documents("A green fruit")
docs

[Document(page_content='I love fruit juice'),
 Document(page_content='Apples and oranges are fruits'),
 Document(page_content='I like computers by Apple')]

In [28]:
docs = ensemble_retriever.get_relevant_documents("Apple Phones")
docs

[Document(page_content='I like computers by Apple'),
 Document(page_content='I love fruit juice'),
 Document(page_content='I like apples')]