# RAG

## Requirements

In [1]:
%%capture
!pip install transformers accelerate bitsandbytes langchain langchain-community sentence-transformers faiss-gpu pandas gdown

## Dataset

In [2]:
!gdown --fuzzy https://drive.google.com/file/d/1Lq2zVJlN_B4kUAu4VafQ4jXMIQiAR9vI/view?usp=sharing

zsh:1: no matches found: https://drive.google.com/file/d/1Lq2zVJlN_B4kUAu4VafQ4jXMIQiAR9vI/view?usp=sharing


## Config

In [3]:
class Config:
    EMBEDDING_MODEL_NAME="thenlper/gte-base"
    LLM_MODEL_NAME="HuggingFaceH4/zephyr-7b-beta"
    K = 5 # top K retrieval

## Preprocessing

In [4]:
import pandas as pd

df = pd.read_json('/Users/snapp/PycharmProjects/IMDb-IR-System/Logic/core/IMDB_crawled.json')

In [5]:
import os

os.makedirs('data', exist_ok=True)

# preprocess your data and only store the needed data as the context window for embedding model is limited

df.dropna(subset=['first_page_summary', 'genres'], inplace=True)

df = df[['first_page_summary', 'genres']]

df.to_csv('data/imdb.csv', index=False)

## Vectorizer

load the CSV file and vectorize the rows using HuggingFaceEmbeddings.
Store the results using FAISS vectorstore.
Save the vectorestore in a pickle file for future usages.

In [None]:
import numpy as np
import pickle

from langchain.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores.utils import DistanceStrategy
from langchain.vectorstores.faiss import FAISS
from faiss import IndexFlatL2

from langchain_community.embeddings import HuggingFaceEmbeddings

# load the csv
df = pd.read_csv('data/imdb.csv')
document_texts = []
for index, row in df.iterrows():
    first_page_summary = row['first_page_summary']
    genres = row['genres']
    combined_text = f"{first_page_summary} {genres}"
    document_texts.append(combined_text)
    
# load the embeddings model
embeddings_model = HuggingFaceEmbeddings(model_name=Config.EMBEDDING_MODEL_NAME)
vectors = embeddings_model.embed_documents(document_texts)

def embedding_function(texts):
    return embeddings_model.embed_documents(texts)

doc_store = {i: doc for i, doc in enumerate(document_texts)}
index_to_doc_store_id = {i: i for i in range(len(document_texts))}

index = IndexFlatL2(768)
vectors_np = np.array(vectors).astype('float32')

index.add(vectors_np)

# save embed the documents using the model in a vectorstore
vectorstore = FAISS(distance_strategy=DistanceStrategy.COSINE, embedding_function=embedding_function, docstore=doc_store, index_to_docstore_id=index_to_doc_store_id, index=index)

with open("data/vectorstore.pkl", "wb") as f:
    pickle.dump(vectorstore, f)

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


load the vectorstore as a retriever.

In [None]:
with open("data/vectorstore.pkl", "rb") as f:
    vectorstore = pickle.load(f)

# load the retriever from the vectorstore
retriever = vectorstore.as_retriever()

## LLM

load the quantized LLM.

In [None]:
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from transformers import pipeline

from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

# load the quantization config
# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_compute_dtype=torch.float16
# )

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


model = AutoModelForCausalLM.from_pretrained(Config.LLM_MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(Config.LLM_MODEL_NAME)

# init the pipeline
READER_LLM = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=-1,
    max_length=50,  
    max_new_tokens=50,
    truncation=True 
)

llm = HuggingFacePipeline(
    pipeline=READER_LLM,
)

initialize the prompt template for the query chain. query chain is used to get a query from the chat history. you may change the prompt as you like to get better results.

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import TransformChain, LLMChain
from langchain_core.output_parsers import StrOutputParser

class LoggerStrOutputParser(StrOutputParser):
    def parse(self, text: str) -> str:
        # process the LLM output
        processed_text = text.strip().split("\n")[0] 
        print(f"QUERY: {processed_text}")
        return processed_text

query_transform_prompt = PromptTemplate(
    input_variables=["messages"],
    template="""<|system|>You are a helpful assistant.
{messages}
<|user|>
give me the search query about the above conversation.
<|assistant|>"""
)

# init the query chain

query_transform_chain = LLMChain(
    llm=llm,
    prompt=query_transform_prompt,
    output_parser=LoggerStrOutputParser()
)

# Function to transform the input and output keys
def transform_function(inputs):
    return query_transform_chain(inputs)

# TransformChain to handle the conversion of chat history to search query
query_transforming_retriever_chain = TransformChain(
    transform=transform_function,
    input_variables=["messages"],
    output_variables=["query"]
)

initialize the main retrieval chain that gives the resulting documents to LLM and gets the output back.

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain

from langchain_core.runnables import RunnablePassthrough

prompt = PromptTemplate(
    input_variables=["context", "messages"],
    template="""<|system|>You are a helpful assistant.

Here are the movies you MUST choose from:

{context}
-----------------
{messages}
<|assistant|>""")

# init the retriver chain
retrieval_chain = create_stuff_documents_chain(
    prompt=prompt,
    llm=llm,
    output_parser=LoggerStrOutputParser(),
)

write the conversation helper class for easier testing.

In [None]:
class Conversation:
    def __init__(self, query_chain, retrieval_chain):
        self.messages = []
        self.query_chain = query_chain
        self.retrieval_chain = retrieval_chain

    def add_assistant_message(self, message):
        self.messages.append(('assistant', message))

    def add_user_message(self, message):
        self.messages.append(('user', message))

    def get_messages(self):
        # concatenate the messages with the roles in the instruction format
        formatted_messages = "\n".join(
            [f"{role}: {msg}" for role, msg in self.messages]
        )
        return formatted_messages

    def chat(self, message):
        self.add_user_message(message)
        messages = self.get_messages()
        # invoke the chain
        query = self.query_chain.run({"messages": messages})
        
        # Retrieve relevant documents using the retrieval chain
        context = self.retrieval_chain.run({"query": query})
        
        # Formulate the response based on the retrieved context and the conversation
        response = f"Here are some suggestions based on your query:\n{context}"
        
        self.add_assistant_message(response)
        return response

## Test

talk with the RAG to see how good it performs.

In [None]:
c = Conversation(query_transforming_retriever_chain, retrieval_chain)
A = c.chat('give me a cool gangster movie')
print(A)

In [None]:
A = c.chat('give me a newer one')
print(A)