# RAG with GPT using FAISS

### Install dependencies

In [1]:
! pip install -q transformers einops accelerate langchain bitsandbytes pypdf

In [2]:
! pip install langchain



In [3]:
! pip install faiss-cpu



In [4]:
! pip install langchain[all]



In [5]:
! pip install transformers[torch]



In [6]:
from langchain import HuggingFacePipeline
import transformers
import torch
from torch import cuda
from transformers import AutoTokenizer

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pandas as pd
import faiss

ModuleNotFoundError: No module named 'transformers'

In [None]:
### Initializing Huggingface embedding pipeline

: 

In [None]:
! pip install sentence-transformers

: 

### Define Embedding

In [7]:
embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2' ## 384

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
print(f'Utilizing : {device}')

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32}
)

NameError: name 'cuda' is not defined

### Setting up Data Pipeline

In [None]:
loader = PyPDFLoader('./data/paper-2023.05.pdf')

documents = loader.load()
print(f'length of docs {len(documents)}')
print(documents[12])
## Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
print(len(texts))
print('*********************************',texts[1].page_content)
print("%%%%%%%%%",texts[0].metadata['source'].split('-')[1])

# df = pd.DataFrame(columns=['chunk','chunkid','docid'])
rows=[]

for idx in range(len(texts)):
    rows.append([texts[idx].page_content,idx, texts[0].metadata['source'].split('-')[1]])

print("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
print(rows[1])
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
df1 = pd.DataFrame(rows, columns=['chunk', 'chunkid', 'docid'])
print(df1)

: 

### Building Vector Indexes

In [None]:
# from huggingface_hub import login
# login()

: 

In [None]:
! pip install faiss-cpu #pip install pinecone-client 

: 

In [None]:
def build_index(data):
    vectors= generate_encodings(data)
    vector_dimension = vectors.shape[1]
    index = faiss.IndexFlatL2(vector_dimension)
    faiss.normalize_L2(vectors)
    index.add(vectors)

def save_index(index):
    faiss.write_index(index)

def generate_encodings(text):
    vectors =  embed_model.embed_documents(text)
    return vectors

def load_index(f_path):
    index = faiss.read_index(f_path)
    

    

: 

In [None]:
from langchain.vectorstores import FAISS

: 

In [None]:
db = FAISS.from documents

: 

In [None]:
def get_vectors(df1=None,batch_size=32,):
#     batch_size = 32
    print(len(df1))
    for i in range(0, len(df1), batch_size):
        i_end = min(len(df1), i+batch_size)
        batch = df1.iloc[i:i_end]
        ids = [f"{x['docid']}-{x['chunkid']}" for i, x in batch.iterrows()]
        texts = [x['chunk'] for i, x in batch.iterrows()]
        embeds = embed_model.embed_documents(texts)
        # get metadata to store in Pinecone
        metadata = [
            {'text': x['chunk'],
            #  'source': x['source'],
            #  'title': x['title']
             } for i, x in batch.iterrows()
        ]
        # add to Pinecone
        print(f"embeddings total {len(embeds)} eith a dimensionality of {len(embeds[0])}")
        vectors=list(zip(ids, embeds, metadata))
        print("55555555555555555555555555555555555555")
        print(vectors[1])
        
    return vectors

vec_raw = get_vectors(df1)

# vector_store = FAISS.from_embeddings(
vector_store = FAISS.from_text(
    vec_raw,
    embedding = embed_model
)

##  persist
vector_store.save_local("faiss_rag_index")

: 

In [None]:
db = FAISS.load_local("faiss_rag_index",embed_model)

: 

In [None]:
documents = db.similarity_search(query="What is DiffTF?",k=3)
print(documents)

query = 'Explain DiffTF'

db.similarity_search(
    query,  # the search query
    k=3  # returns top 3 most relevant chunks of text
)

: 

In [None]:
batch_size = 32
print(len(df1))
for i in range(0, len(df1), batch_size):
    i_end = min(len(df1), i+batch_size)
    batch = df1.iloc[i:i_end]
    ids = [f"{x['docid']}-{x['chunkid']}" for i, x in batch.iterrows()]
    texts = [x['chunk'] for i, x in batch.iterrows()]
    embeds = embed_model.embed_documents(texts)
    # get metadata to store in Pinecone
    metadata = [
        {'text': x['chunk'],
        #  'source': x['source'],
        #  'title': x['title']
         } for i, x in batch.iterrows()
    ]
    # add to Pinecone
    print(f"embeddings total {len(embeds)} eith a dimensionality of {len(embeds[0])}")
    vectors=list(zip(ids, embeds, metadata))
    print("55555555555555555555555555555555555555")
#     print(vectors)
    
    
#     print("##################meta data ",metadata)
#     print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% embeds",embeds)
 
#     index.upsert(vectors=zip(ids, embeds, metadata))

: 

In [None]:
### BUilding the LLM pipeline

: 

In [None]:
from huggingface_hub import login
login()

: 

In [None]:
from torch import cuda, bfloat16
import transformers
#meta-llama/Llama-2-7b-h
model_id = 'meta-llama/Llama-2-7b-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    # use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    # use_auth_token=hf_auth
)
model.eval()
print(f"Model loaded on {device}")

: 

In [None]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    # use_auth_token=hf_auth
)

: 

In [None]:
generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    temperature=0.0,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    do_sample=False,
    max_new_tokens=512,  # mex number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

: 

In [None]:
### Initialize the RAG pipeline

: 

In [None]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

: 

In [None]:
from langchain.vectorstores import Pinecone

text_field = 'text'  # field in metadata that contains text content

vectorstore = FAISS.load_local("faiss_rag_index", embed_model.embed_query,text_field )
# vectorstore = Pinecone(
#     index, embed_model.embed_query, text_field
# )

: 

In [None]:
from langchain.chains import RetrievalQA

rag_pipeline = RetrievalQA.from_chain_type(
    llm=llm, chain_type='stuff',
    retriever=vectorstore.as_retriever()
)

: 

In [None]:
query = 'Explain DiffTF'

vectorstore.similarity_search(
    query,  # the search query
    k=3  # returns top 3 most relevant chunks of text
)

: 

In [None]:
llm('Explain DiffTF')

: 

In [None]:
rag_pipeline('Explain DiffTF')

: 

: 