In [4]:
from kaggle_secrets import UserSecretsClient
from huggingface_hub import login

user_secrets = UserSecretsClient()
huggingface_token = user_secrets.get_secret("HF_TOKEN")

login(token=huggingface_token)

In [5]:
!pip install transformers accelerate bitsandbytes --quiet
!pip install -U bitsandbytes

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m


In [14]:
!pip install -q sentence-transformers faiss-cpu faiss-gpu

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m39.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h

In [21]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

In [18]:
documents = [
    "Albert Einstein was a German-born theoretical physicist who developed the theory of relativity.",
    "Einstein's equation E=mc² explains the relationship between energy and mass.",
    "He won the Nobel Prize in 1921 for his work on the photoelectric effect.",
    "Einstein was born on March 14, 1879, in Ulm, Germany.",
    "He emigrated to the U.S. in 1933 due to the rise of the Nazi regime."
]

embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
doc_embeddings = embedder.encode(documents)

# Step 2: Build FAISS Index
dimension = doc_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(doc_embeddings.astype(np.float32))

# Step 3: Define Retriever Function
def retrieve(query: str, k: int = 2) -> list:
    query_embedding = embedder.encode([query])
    distances, indices = index.search(query_embedding, k)
    return [documents[i] for i in indices[0]]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [22]:
# Step 4: Load Generator Model (FLAN-T5)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [28]:
# Step 5: Generate Response with Context
def generate_answer(query: str, context: list) -> str:
    context_str = " ".join(context)
    input_text = f"Answer based on: {context_str} Question: {query}"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(**inputs, max_new_tokens=128)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [37]:
# Step 6: Test the RAG Pipeline
query = "which place Einstein born"
contexts = retrieve(query, k=1)  # Retrieve top 1 relevant doc
print(f"retrieved context : {contexts}\n")
answer = generate_answer(query, contexts)
print(f"Question: {query}\nAnswer: {answer}")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

retrieved context : ['Einstein was born on March 14, 1879, in Ulm, Germany.']

Question: which place Einstein born
Answer: in Ulm, Germany
