<a href="https://colab.research.google.com/github/lonespear/upskill/blob/main/Copy_of_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install faiss-cpu transformers datasets sentence-transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m54.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0


In [None]:
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import faiss
import numpy as np
import requests

In [None]:
# 1. Download text file
url = "https://gist.githubusercontent.com/flackend/18014f35d32b37c595b138f666b2108f/raw/99494b71652af807e77560b1d83ebbc5ed4c2f32/sorcerers-stone.txt"
response = requests.get(url)

# 2. Clean text
book_text = response.text.replace("\n", " ").strip()

# 3. Chunk function
def chunk_text(text, chunk_size=500, overlap=50):
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
        chunk = " ".join(words[i:i+chunk_size])
        chunks.append(chunk)
    return chunks

# 4. Split into chunks
chunks = chunk_text(book_text)
print(f"✅ Created {len(chunks)} chunks. Example:\n")
print(chunks[0][:500], "...")

✅ Created 176 chunks. Example:

THE BOY WHO LIVED Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people you’d expect to be involved in anything strange or mysterious, because they just didn’t hold with such nonsense. Mr. Dursley was the director of a firm called Grunnings, which made drills. He was a big, beefy man with hardly any neck, although he did have a very large mustache. Mrs. Dursley was thin and blonde and had nearly twice ...


In [None]:
# Load embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Encode chunks
embeddings = embedder.encode(chunks, convert_to_numpy=True)

# Build FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

print(f"✅ Indexed {index.ntotal} chunks")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Indexed 176 chunks


In [None]:
qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")

def rag_answer(question, top_k=3):
    # Embed question
    q_emb = embedder.encode([question], convert_to_numpy=True)

    # Search top-k chunks
    D, I = index.search(q_emb, top_k)
    retrieved = [chunks[i] for i in I[0]]

    # Run QA over each retrieved chunk
    answers = []
    for passage in retrieved:
        result = qa_model(question=question, context=passage)
        answers.append((result["answer"], result["score"], passage[:150]+"..."))

    # Pick best answer
    best = max(answers, key=lambda x: x[1])
    return best, answers

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/496M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

Fetching 0 files: 0it [00:00, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 0 files: 0it [00:00, ?it/s]

Device set to use cpu


In [None]:
q1 = "Who gives Harry his first broom?"
best, candidates = rag_answer(q1, top_k = 10)

print("Best Answer:", best[0])
print("\nTop candidates:")
for ans in candidates:
    print(f"- {ans[0]} (score={ans[1]:.2f}) from: {ans[2]}")

Best Answer: Oliver Wood

Top candidates:
- Professor McGonagall (score=0.16) from: a broomstick, Potter?” Harry nodded silently. He didn’t have a clue what was going on, but he didn’t seem to be being expelled, and some of the feelin...
- Malfoy (score=0.17) from: knocking his bacon to the floor. They had hardly fluttered out of the way when another owl dropped a letter on top of the parcel. Harry ripped open th...
- Madam Hooch (score=0.13) from: yellow eyes like a hawk. “Well, what are you all waiting for?” she barked. “Everyone stand by a broomstick. Come on, hurry up.” " Harry glanced down a...
- father (score=0.04) from: a footstool while a second witch pinned up his long black robes. Madam Malkin stood Harry on a stool next to him, slipped a long robe over his head, a...
- Oliver Wood (score=0.32) from: air. Harry had a lot of trouble keeping his mind on his lessons that day. It kept wandering up to the dormitory where his new broomstick was lying und...
- Hagrid (score=0.02) fr

In [None]:
gen_model = pipeline("text2text-generation", model="google/flan-t5-large")

def rag_generate(question, top_k=2, max_context_len=400):
    # Step 1: embed and retrieve
    q_emb = embedder.encode([question], convert_to_numpy=True)
    D, I = index.search(q_emb, top_k)
    retrieved = [chunks[i] for i in I[0]]

    # Step 2: truncate context
    context = " ".join(retrieved)
    # hard cut at 480 tokens (safe for flan-t5-base which maxes at 512)
    context = context[:2000]

    # Step 3: build prompt
    prompt = f"""Answer the following question using only the passages below.
    Be concise and give a direct answer.
    If the answer cannot be found in the passages, reply 'Not found'.

    Passages:
    {context}

    Question: {question}
    Answer:"""

    # Step 4: generate
    answer = gen_model(prompt, max_new_tokens=64, do_sample=False)[0]["generated_text"]
    return answer

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

In [None]:
rag_generate("Who gives Harry his first broom?")

'Professor McGonagall'

In [None]:
rag_generate("Who is Harry's best friend'?", top_k = 8)

Token indices sequence length is longer than the specified maximum sequence length for this model (526 > 512). Running this sequence through the model will result in indexing errors


'Fred and George Weasley'