<a href="https://colab.research.google.com/github/srinivas9110/Clock/blob/main/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
   !pip install -q transformers datasets faiss-cpu

In [None]:
sample_text = """Albert Einstein was a theoretical phyicist who developed the theory of relativity,
one of the two pillars of modern physics (alongside quantum mechanics). His work is also
known for its influence on the philosophy of science. He is best known to the general public
for his mass-energy equivalence formula E = mc2."""

In [None]:
from transformers import AutoTokenizer, AutoModel
import torch                                                                    #python lib ---->takes i/p, understands it,  generates o/p
import numpy as np

In [None]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

def get_embedding(text):
  tokens = tokenizer(text, return_tensors = 'pt', truncation = True, padding = True)
  with torch.no_grad():
    output = model(**tokens)                                                    #run the model without training
  return output.last_hidden_state.mean(dim=1).squeeze().numpy()                 #sentance-level embedding -> removes extra dim s -> conv

In [None]:
import faiss

#chunk the document
chunks = [sample_text]

# Create embeddings for chunks
embeddings = [get_embedding(chunk) for chunk in chunks]

#Create FAISS index
dim = len(embeddings[0])
index = faiss.IndexFlatL2(dim)
index.add(np.array(embeddings))

In [None]:
from transformers import pipeline

#Load a generator model (keep it small for Colab)
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-small")

def retreive_and_answer(query, top_k=1):
  query_embedding = get_embedding(query).reshape(1,-1)
  _, indices = index.search(query_embedding, top_k)    #gives indices of most relevent chunks
  retreived_texts = [chunks[i] for i in indices[0]]    #retreiving matching chunks from document

  context = " ".join(retreived_texts)                  #Combines the chunks into one long text block
  prompt = f"Context: {context} \n\nQuestion: {query}\nAnswer:"

  result = qa_pipeline(prompt, max_length=100, do_sample=False)     # same output for same input every time
  return result[0]['generated_text']

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cpu


In [None]:
question = "What is Einstein famous for?"
answer = retreive_and_answer(question)
print("Q:", question)
print("A:", answer)

Both `max_new_tokens` (=256) and `max_length`(=100) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Q: What is Einstein famous for?
A: his mass-energy equivalence formula E = mc2
