In [5]:
import sys
import os

os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
seen = set()
new_path = []
for p in sys.path:
    if p not in seen:
        new_path.append(p)
        seen.add(p)
sys.path = new_path

from rag_pipeline import ingestion, embedding, retrieval, generator, evaluation

In [12]:
# 1. Load QA dataset
questions, answers = evaluation.load_qa_dataset("../data/qa_dataset.csv")

# 2. Load sample docs & build vectorstore
doc_path = '..\data\sample_docs'
chunks = ingestion.load_and_chunk(doc_path)

print(f"Number of chunks: {len(chunks)}")
if len(chunks) == 0:
    raise ValueError("No document chunks found! Check your document loading/splitting.")

emb = embedding.HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = retrieval.build_faiss_index(chunks, emb)

# 3. Load LLM
llm = generator.load_generator()

"""
# Test single query generation to see output
sample_query = "What is 5G?"
sample_context = vectorstore.similarity_search(sample_query, k=3)
answer = generator.generate_answer(llm, sample_query, sample_context)
print("Sample generated answer:", answer)
"""

# 4. Evaluate baseline (LLM only)
baseline_scores = evaluation.evaluate_model(questions, answers, llm)

# 5. Evaluate RAG
rag_scores = evaluation.evaluate_model(questions, answers, llm, vectorstore)

Number of chunks: 92


Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [14]:
print("Baseline Example Score:", baseline_scores[0])
print("RAG Example Score:", rag_scores[0])

Baseline Example Score: {'rouge': {'rouge1': np.float64(0.3333333333333333), 'rouge2': np.float64(0.0), 'rougeL': np.float64(0.16666666666666666), 'rougeLsum': np.float64(0.16666666666666666)}, 'bertscore': {'precision': [0.8721495866775513], 'recall': [0.884736180305481], 'f1': [0.8783978223800659], 'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.53.1)'}}
RAG Example Score: {'rouge': {'rouge1': np.float64(0.0326530612244898), 'rouge2': np.float64(0.020491803278688523), 'rougeL': np.float64(0.0326530612244898), 'rougeLsum': np.float64(0.0326530612244898)}, 'bertscore': {'precision': [0.7572565078735352], 'recall': [0.9105463624000549], 'f1': [0.8268569111824036], 'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.53.1)'}}
