# RAG for narration

The basic RAG framework providing by LangChain

In [None]:
import os
import bs4
import tiktoken

## Part 1: Enviornment

`(1) LangSmith`

In [None]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = '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'

`(2) API Keys`

In [None]:
API_SECRET_KEY = "sk-fQmZ27DLJestJOeZ915dEbA0325d47AaAc86B6Ed609625E5"
BASE_URL = "https://api.gpts.vin/v1"

os.environ["OPENAI_API_KEY"]= API_SECRET_KEY
os.environ["OPENAI_API_BASE"]= BASE_URL

## Part 2: Indexing

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings


`Document Preparation`

In [None]:
# Prepare the document for indexing
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

blog_docs = loader.load()

In [None]:
# blog_docs

* Can add `Routing` method to locate which `Data Source`(Sports, News, Life...) to be used

`Token counter`

In [None]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    '''
        Returns the number of tokens in a text string.
    '''
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [None]:
# num_tokens_from_string(question, "cl100k_base")

* `Spliting Document`(Can be improved by using some `Indexing` method(Multi-representation indexing...))

In [None]:
# Split the document better for storage
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

splits = text_splitter.split_documents(blog_docs)

In [None]:
# splits

`Save In VectorStores` (In this process contain `Embedding`)

In [None]:
# Given the split doc then embedding them in the Vectorstorage/Graphicstorage
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_function)

## Part 3: Retrieval

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.load import dumps, loads

`Create Retriever`

In [None]:
# Create a retriever for the Vectorstore
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

`RAG-Fusion`(Query Translation)

In [None]:
# RAG-Fusion to generate the similar queries for better retrieval
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""

prompt_rag_fusion = ChatPromptTemplate.from_template(template)

generate_queries = (
    prompt_rag_fusion 
    | ChatOpenAI(temperature=0)
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [None]:
# For each query retrieval for couples of documents, then rank these documents according to the ranking score and delete overlap
def reciprocal_rank_fusion(results: list[list], k=60):
    '''
        Reciprocal_rank_fusion that takes multiple lists of ranked documents 
            and an optional parameter k used in the RRF formula 
    ''' 
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retrieval_chain_rag_fusion = (
    generate_queries 
    | retriever.map() 
    | reciprocal_rank_fusion
)

docs = retrieval_chain_rag_fusion.invoke({"question": "What is Task Decomposition?"})
len(docs)

In [None]:
# docs

## Part 4: Generation

Final `Prompt Template Design`

In [None]:
# Combine the context(RAG) and the question to generate a templete for prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

Specified the `LLM Model`

In [None]:
# LLM specified
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

chain = prompt | llm

`Final Answer`

In [None]:
# Run
chain.invoke({"context":docs, "question":"What is Task Decomposition?"})

`RAG Chain`

In [None]:
# from langchain_core.output_parsers import StrOutputParser
# from langchain_core.runnables import RunnablePassthrough

# rag_chain = (
#     {"context": retriever, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )

# rag_chain.invoke("What is Task Decomposition?")

In [None]:
'''
import os
import requests
import time
import json
import time
from langchain_openai import OpenAI

API_SECRET_KEY = "sk-fQmZ27DLJestJOeZ915dEbA0325d47AaAc86B6Ed609625E5"
BASE_URL = "https://api.gpts.vin/v1"

os.environ["OPENAI_API_KEY"]= API_SECRET_KEY
os.environ["OPENAI_API_BASE"]= BASE_URL

llm = OpenAI(temperature=0.9)
text = "hello"
llm.invoke(text)
'''