<a href="https://colab.research.google.com/github/kanawanttotimetravel/MultiHop-RAG/blob/main/MultiHopRAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!rm -rf /content/sample_data

In [None]:
from pathlib import Path

# Read and split the document into passages
def load_corpus(path):
    with open(path, 'r', encoding='utf-8') as f:
        text = f.read()
    return [p.strip() for p in text.split('<endofpassage>') if p.strip()]

passages = load_corpus("/content/multihoprag_corpus.txt")

from sentence_transformers import SentenceTransformer
import numpy as np

embedder = SentenceTransformer('all-MiniLM-L6-v2')
corpus_embeddings = embedder.encode(passages, convert_to_tensor=True)

import torch
import heapq

def retrieve_topk(query, k=2):
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    cosine_scores = torch.nn.functional.cosine_similarity(query_embedding, corpus_embeddings)
    top_k_indices = torch.topk(cosine_scores, k).indices
    return [passages[i] for i in top_k_indices]

# # Example: two-hop retrieval
# def multi_hop_retrieve(query):
#     first_hop = retrieve_topk(query, k=1)[0]
#     second_hop = retrieve_topk(first_hop, k=1)[0]
#     return [first_hop, second_hop]

from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen3-1.7B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             trust_remote_code=True,
                                             torch_dtype=torch.float16,
                                             device_map="auto").eval()


In [None]:
def generate_answer(query):
    context = retrieve_topk(query)
    prompt = f"Context:\n{context[0]}\n{context[1]}\n\nQuestion: {query}\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(**inputs, max_new_tokens=100)
    full_response = tokenizer.decode(output[0], skip_special_tokens=True)
    # Extract only the answer part after "Answer:"
    answer = full_response.split("Answer:")[-1].strip().split('\n')[0]
    return answer

In [None]:
import json
import random
from pathlib import Path
from sklearn.metrics import accuracy_score

# --- Load Evaluation Queries ---
def load_eval_data(path, limit=None):
    with open(path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    if limit:
        return data[:limit]
        # data = random.sample(data, min(limit, len(data)))

    return data

# --- Evaluate MultiHop RAG ---
def evaluate_rag(model_fn, eval_data):
    predictions = []
    targets = []

    for example in eval_data:
        query = example["query"]
        expected = example["answer"].strip()
        generated = model_fn(query).strip()

        predictions.append(generated)
        targets.append(expected)

        print(f"\nQ: {query}\nExpected: {expected}\nPredicted: {generated}\n{'-'*50}")

    # Simple accuracy (exact match)
    correct = sum(p.lower() == t.lower() for p, t in zip(predictions, targets))
    total = len(eval_data)
    accuracy = correct / total
    print(f"\n Evaluation Accuracy: {accuracy*100:.2f}% ({correct}/{total})")

    return accuracy

In [None]:
eval_data = load_eval_data("/content/MultiHopRAG.json", limit=10)  # change limit as desired
evaluate_rag(generate_answer, eval_data)