In [1]:
# Import necessary libraries from LangChain and related packages
from langchain.document_loaders import TextLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [12]:

# relative_path = 'D:\Work\Python_Code' # For folder loader
relative_path = 'D:\Work\Python_Code\\nke-10k-2023.txt' # One file loader

text_loader_kwargs={'autodetect_encoding': True}
##################################################
### For folder loader
# loader = DirectoryLoader(relative_path, glob="**/*.txt", show_progress=True,loader_cls=TextLoader, loader_kwargs=text_loader_kwargs) 
# docs = []
# for loader in loaders:
#     docs.extend(loader.load())
##################################################
loader = TextLoader(relative_path,autodetect_encoding = True)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(        
    chunk_size = 256,
    chunk_overlap  = 20
    )
spl_text = text_splitter.split_documents(docs)

In [13]:
# Define the path to the pre-trained model you want to use
#### Most Download model
modelPath = 'sentence-transformers/all-MiniLM-L6-v2'
#### BU3 text2vec
# modelPath = "shibing624/text2vec-base-chinese"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cuda'}

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

In [None]:
# Import the AzureChatOpenAI model from LangChain for chat-based interactions using Azure's OpenAI service
from langchain.chat_models import AzureChatOpenAI

# Create an instance of AzureChatOpenAI, configuring it to use specific settings for the OpenAI API
llm = AzureChatOpenAI(
    openai_api_base="",  # Replace with your Azure OpenAI API base URL (e.g., "https://<region>.openai.azure.com/")
    openai_api_version="2024-04-01-preview",  # Specifies the API version for Azure OpenAI
    azure_deployment="gpt-4o",  # Deployment name on Azure; replace with your specific model deployment name
    openai_api_key="",  # Replace with your Azure OpenAI API key
    openai_api_type="azure",  # Specifies that the API type is 'azure'
    temperature=0.5,  # Controls randomness in responses; 0.5 provides balanced responses
    max_tokens=1024,  # Sets the maximum number of tokens in the response (controls response length)
    streaming=True  # Enables streaming for real-time response generation
)




# Ensemble Retriever
The EnsembleRetriever takes a list of retrievers as input and ensemble the results of their get_relevant_documents() methods and rerank the results based on the Reciprocal Rank Fusion algorithm.

By leveraging the strengths of different algorithms, the EnsembleRetriever can achieve better performance than any single algorithm.

The most common pattern is to combine a sparse retriever (like BM25) with a dense retriever (like embedding similarity), because their strengths are complementary. It is also known as "hybrid search". The sparse retriever is good at finding relevant documents based on keywords, while the dense retriever is good at finding relevant documents based on semantic similarity.

In [None]:
# Import the necessary retriever and vector store modules from LangChain and LangChain Community
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_community.vectorstores import FAISS

# Initialize the BM25 retriever using preprocessed text documents
bm25_retriever = BM25Retriever.from_documents(
    spl_text  # Replace 'spl_text' with your list of documents or text segments
)
bm25_retriever.k = 3  # Set the number of top results to retrieve with BM25

# Initialize the FAISS vector store retriever
embedding = embeddings  # Replace 'embeddings' with your initialized embedding model
faiss_vectorstore = FAISS.from_documents(
    spl_text, embedding  # Store documents in FAISS with embeddings
)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 3})  # Retrieve top 3 results based on similarity

# Initialize the ensemble retriever to combine the BM25 and FAISS retrievers with equal weight
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever],  # List of retrievers to combine
    weights=[0.5, 0.5]  # Set weights for each retriever in the ensemble (e.g., 50% for each)
)


# Cross Encoder Reranker
This notebook shows how to implement reranker in a retriever with your own cross encoder from Hugging Face cross encoder models or Hugging Face models that implements cross encoder function (example: BAAI/bge-reranker-base). SagemakerEndpointCrossEncoder enables you to use these HuggingFace models loaded on Sagemaker.

This builds on top of ideas in the ContextualCompressionRetriever. Overall structure of this document came from Cohere Reranker documentation.

For more about why cross encoder can be used as reranking mechanism in conjunction with embeddings for better retrieval, refer to Hugging Face Cross-Encoders documentation.

# Doing reranking with CrossEncoderReranker
Now let's wrap our base retriever with a ContextualCompressionRetriever. CrossEncoderReranker uses HuggingFaceCrossEncoder to rerank the returned results.

In [None]:
# Import the necessary modules for contextual compression and cross-encoder reranking
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
# Initialize the cross-encoder model for reranking
model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")  # Load a cross-encoder reranker model

# Initialize the compressor with the cross-encoder model and set the top_n parameter
compressor = CrossEncoderReranker(model=model, top_n=3)  # Keep the top 3 documents based on relevance

# Initialize the compression retriever, combining the compressor and a base retriever (e.g., ensemble retriever)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,  # Set the cross-encoder compressor for document reranking
    base_retriever=ensemble_retriever  # Use the previously defined ensemble retriever as the base retriever
)

# Query the retriever and view the compressed document results
compressed_docs = compression_retriever.invoke("When and where Nike incorporate?")

[Document(metadata={'source': 'D:\\Work\\Python_Code\\nke-10k-2023.txt'}, page_content='| ITEM 16. Form 10-K Summary |  | 97 |\n| Signatures |  | 99 |\nPART I\nITEM 1. BUSINESS\nGENERAL\nNIKE, Inc. was incorporated in 1967 under the laws of the State of Oregon. As used in this Annual Report on Form 10-K (this'), Document(metadata={'source': 'D:\\Work\\Python_Code\\nke-10k-2023.txt'}, page_content='NIKE Air-Sole cushioning components. During fiscal 2023, Air Manufacturing Innovation, a wholly-owned subsidiary, with facilities\nnear Beaverton, Oregon, in Dong Nai Province, Vietnam, and St. Charles, Missouri, as well as contract manufacturers in China'), Document(metadata={'source': 'D:\\Work\\Python_Code\\nke-10k-2023.txt'}, page_content='and 61% for fiscal 2022 and fiscal 2021, respectively. We sell our products to retail accounts through our own NIKE Direct\noperations and through a mix of independent distributors, licensees and sales representatives around the world. We sell to')]


In [None]:
# Define the question for the QA system
question = "When and where was Nike incorporated?"
# Initialize the RetrievalQA chain, configuring it to use the specified LLM and retriever
chain = RetrievalQA.from_chain_type(
    llm=llm,  # Language model to be used for generating answers
    chain_type="stuff",  # Chain type for processing the query and documents
    retriever=compression_retriever,  # Retriever to find relevant documents
    return_source_documents=False  # Only return the answer, not the source documents
)

# Function to execute the query with streaming output
def run_with_streaming(chain, question):
    """
    Runs the query through the RetrievalQA chain and streams the response in real-time.
    :param chain: The QA chain instance.
    :param question: The query/question string.
    """
    response = chain.run(question)  # Run the query through the chain
    for chunk in response:  # Stream each chunk of the response
        print(chunk, end="", flush=True)  # Print each chunk without line breaks for real-time output

# Execute the function with streaming enabled
run_with_streaming(chain, question)

Nike, Inc. was incorporated in 1967 under the laws of the State of Oregon.