<a href="https://colab.research.google.com/github/kanawanttotimetravel/MultiHop-RAG/blob/main/MultiHopRAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!rm -rf /content/sample_data

In [None]:
from pathlib import Path

# Read and split the document into passages
def load_corpus(path):
    with open(path, 'r', encoding='utf-8') as f:
        text = f.read()
    return [p.strip() for p in text.split('<endofpassage>') if p.strip()]

passages = load_corpus("/content/multihoprag_corpus.txt")

from sentence_transformers import SentenceTransformer
import numpy as np

embedder = SentenceTransformer('all-MiniLM-L6-v2')
corpus_embeddings = embedder.encode(passages, convert_to_tensor=True)

import torch
import heapq

def retrieve_topk(query, k=2):
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    cosine_scores = torch.nn.functional.cosine_similarity(query_embedding, corpus_embeddings)
    top_k_indices = torch.topk(cosine_scores, k).indices
    return [passages[i] for i in top_k_indices]

# # Example: two-hop retrieval
# def multi_hop_retrieve(query):
#     first_hop = retrieve_topk(query, k=1)[0]
#     second_hop = retrieve_topk(first_hop, k=1)[0]
#     return [first_hop, second_hop]

from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen3-1.7B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             trust_remote_code=True,
                                             torch_dtype=torch.float16,
                                             device_map="auto").eval()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/9.73k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.6k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/622M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [None]:
def generate_answer(query):
    context = retrieve_topk(query)
    prompt = f"Context:\n{context[0]}\n{context[1]}\n\nQuestion: {query}\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(**inputs, max_new_tokens=100)
    full_response = tokenizer.decode(output[0], skip_special_tokens=True)
    # Extract only the answer part after "Answer:"
    answer = full_response.split("Answer:")[-1].strip().split('\n')[0]
    return answer

In [None]:
import json
import random
from pathlib import Path
from sklearn.metrics import accuracy_score

# --- Load Evaluation Queries ---
def load_eval_data(path, limit=None):
    with open(path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    if limit:
        return data[:limit]
        # data = random.sample(data, min(limit, len(data)))

    return data

# --- Evaluate MultiHop RAG ---
def evaluate_rag(model_fn, eval_data):
    predictions = []
    targets = []

    for example in eval_data:
        query = example["query"]
        expected = example["answer"].strip()
        generated = model_fn(query).strip()

        predictions.append(generated)
        targets.append(expected)

        print(f"\nQ: {query}\nExpected: {expected}\nPredicted: {generated}\n{'-'*50}")

    # Simple accuracy (exact match)
    correct = sum(p.lower() == t.lower() for p, t in zip(predictions, targets))
    total = len(eval_data)
    accuracy = correct / total
    print(f"\n Evaluation Accuracy: {accuracy*100:.2f}% ({correct}/{total})")

    return accuracy

In [None]:
eval_data = load_eval_data("/content/MultiHopRAG.json", limit=10)  # change limit as desired
evaluate_rag(generate_answer, eval_data)