In [1]:
#Importing all the necessary libraries 
import os
import streamlit as st
import bs4
from dotenv import load_dotenv
# LangChain and components
from langchain_core.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate


#Loading the environement variables
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
# Load web page content
loader = WebBaseLoader(
        web_paths=["https://lilianweng.github.io/posts/2018-06-24-attention/"],
        bs_kwargs={"parse_only": bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))},
    )
docs = loader.load()
    
# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = splitter.split_documents(docs)

# Embed with HuggingFace Embedding Model 
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# defining our chroma db for document retrieval
vectorstore = Chroma.from_documents(splits, embedding=embeddings)

# Create retriever
retriever = vectorstore.as_retriever()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
'''RAG-Fusion:
It is approach to improve the RAG model by generating queries from the user query.
Each query will fetch relevant documents, those documents would be ranked by RRF Formula

The score is calculated as 1/(rank + k) , where rank is the position of the document in the list, and k is a constant.

The results will combined as one and used as reference for the llm

'''

template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [4]:
from langchain_core.output_parsers import StrOutputParser

# Connect to Groq's LLM
llm = ChatGroq(model="Gemma2-9b-It", groq_api_key=groq_api_key)
generate_queries = (
    prompt_rag_fusion 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [5]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

#Defining our question
question = "What is a switch transformer and how does it work?"


retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

  (loads(doc), score)


10

In [6]:
from operator import itemgetter

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)
question = "What is a seq2seq model?"
final_rag_chain.invoke({"question":question})

'A seq2seq (sequence-to-sequence) model is a type of neural network architecture designed to transform one sequence of data into another sequence of data. \n\nHere\'s a breakdown:\n\n* **Input:**  An input sequence, which can be of variable length. Examples include words in a sentence, pixels in an image, or notes in a musical piece.\n* **Output:** A corresponding output sequence, also of variable length.  For example, translating a sentence into another language, generating a summary of a text, or predicting the next word in a sentence.\n* **Architecture:** Typically composed of an encoder and a decoder. \n    * The **encoder** processes the input sequence and creates a representation (a "context") that captures the meaning of the entire sequence.\n    * The **decoder** takes this context and generates the output sequence, one element at a time.\n\nSeq2seq models are widely used in natural language processing (NLP) tasks like machine translation, text summarization, and chatbot develo