In [1]:
from src.config import LocalConfig, ColabConfig, is_colab, OllamaConfig

USE_OLLAMA = True

if USE_OLLAMA:
    OLLAMA_HOST = "172.19.176.1"
    OLLAMA_PORT = 11434
    OLLAMA_URL = f"http://{OLLAMA_HOST}:{OLLAMA_PORT}/api/chat"
    config = OllamaConfig(embedding_model="BAAI/bge-base-en", ollama_url=OLLAMA_URL)
else:
    config = ColabConfig(embedding_model="BAAI/bge-base-en") if is_colab() else LocalConfig(embedding_model="BAAI/bge-base-en")

print("Using configuration:", type(config).__name__)
print("Base directory:", config.base_dir)

config.ensure_dirs()

Using configuration: OllamaConfig
Base directory: /mnt/c/dev/ml/rag-qa
‚úÖ Ensured directory exists: /mnt/c/dev/ml/rag-qa/.hf_cache
‚úÖ Ensured directory exists: /mnt/c/dev/ml/rag-qa/data
‚úÖ Ensured directory exists: /mnt/c/dev/ml/rag-qa/data/train
‚úÖ Ensured directory exists: /mnt/c/dev/ml/rag-qa/data/validation
‚úÖ Ensured directory exists: /mnt/c/dev/ml/rag-qa/data/test


In [2]:
import datasets

# Single cell: safe to run on a fresh environment
from src.load_data import ensure_data_available

# ‚úÖ Creates folders if missing and downloads only if needed
ensure_data_available(config=config)

print("üöÄ Dataset ready")

  from .autonotebook import tqdm as notebook_tqdm


‚úî Dataset already downloaded ‚Äî skipping.
üöÄ Dataset ready


In [3]:
# Single cell to explore dataset shards
from src.explore_data import load_shards, explore_dataset

# Load first few shards to save memory
train_ds = load_shards(config.train_dir, max_shards=3)
val_ds   = load_shards(config.val_dir, max_shards=3)
test_ds  = load_shards(config.test_dir, max_shards=3)

# Explore datasets
explore_dataset(train_ds, "Train set")
explore_dataset(val_ds, "Validation set")
explore_dataset(test_ds, "Test set")



Exploring Train set:
Total examples across all shards: 3000
Columns: ['question', 'question_id', 'question_source', 'entity_pages', 'search_results', 'answer']

Column types:
 - question: Value('string')
 - question_id: Value('string')
 - question_source: Value('string')
 - entity_pages: {'doc_source': List(Value('string')), 'filename': List(Value('string')), 'title': List(Value('string')), 'wiki_context': List(Value('string'))}
 - search_results: {'description': List(Value('null')), 'filename': List(Value('null')), 'rank': List(Value('null')), 'search_context': List(Value('null')), 'title': List(Value('null')), 'url': List(Value('null'))}
 - answer: {'aliases': List(Value('string')), 'matched_wiki_entity_name': Value('string'), 'normalized_aliases': List(Value('string')), 'normalized_matched_wiki_entity_name': Value('string'), 'normalized_value': Value('string'), 'type': Value('string'), 'value': Value('string')}

Sample data from first 3 examples (strings truncated to 50 chars):
{'q

In [4]:
from src.analyze_data import load_shards_concat, dataset_info, analyze_lengths, most_common_answers, print_sample_qa

# Load datasets
train_ds = load_shards_concat(config.train_dir)
val_ds   = load_shards_concat(config.val_dir)
test_ds  = load_shards_concat(config.test_dir)

# Explore datasets and save plots in the 'plots/' folder
# for name, ds in [("Train", train_ds), ("Validation", val_ds), ("Test", test_ds)]:
#     if ds is None:
#         print(f"No dataset found for {name}")
#         continue
#     dataset_info(ds, name)
#     analyze_lengths(ds, "question", name)
#     analyze_lengths(ds, "answer", name)
#     most_common_answers(ds)
#     print_sample_qa(ds, name, n=5)


In [6]:
from src.compute_embeddings import compute_embeddings

# Compute embeddings (will load from file if already exists)
corpus, corpus_embeddings = compute_embeddings(config=config)
print("embeddings loaded")


Loading saved embeddings from /mnt/c/dev/ml/rag-qa/data/corpus_embeddings_unique.pkl...
Loaded 978526 passages.
embeddings loaded


In [7]:
from src.generator import load_embeddings

corpus, emb = load_embeddings(config=config)

üîπ Loaded FAISS index with 978526 passages


In [8]:
from src.generator import generate_answer_combined

query = "What is the capital of france?"
answer, ctx = generate_answer_combined(query, corpus, emb, config=config, top_k=5)

print("\nüîç Used Context Passages:\n")
for i,p in enumerate(ctx,1):
    print(f"{i}. {p[:200].replace(chr(10),' ')}...\n")

print("üí° Final Answer:\n", answer)

üîπ Loading embedding model BAAI/bge-base-en...
üîπ Loading cross-encoder reranker...

üîç Used Context Passages:

1. Paris: paris ( french : ) is the capital and most populous city of france. situated on the river seine in northern metropolitan france, it is in the centre of the ile - de - france region, also known ...

2. France: france ( french : ), officially the french republic ( ), is a sovereign state comprising territory in western europe and several overseas regions and territories. the european, or metropolitan...

3. Capital city: valparaiso. * : prague is the sole constitutional capital. brno is home to all three of the country's highest courts, making it the de facto capital of the czech judicial branch. * : the...

4. Capital city: a capital city ( or simply capital ) is the municipality enjoying primary status in a country, state, province, or other region, usually as its seat of government. a capital is typically...

5. Capital city: a capital city that is also the p

# Evaluate ONLY RETRIEVE Performance

In [2]:
from src.evaluate_retrieve import run_evaluation

run_evaluation(config=config)

  from .autonotebook import tqdm as notebook_tqdm


üîπ Loaded FAISS index with 978526 passages

=== üî• Evaluating /mnt/c/dev/ml/rag-qa/data/train ‚Äî first 100 samples ===


Loading /mnt/c/dev/ml/rag-qa/data/train: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 54/54 [00:01<00:00, 51.44it/s]
Evaluating Recall: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:22<00:00,  4.52it/s]


Recall@1: 0.8300
Recall@3: 0.9400
Recall@5: 0.9600
Recall@10: 0.9600

=== üî• Evaluating /mnt/c/dev/ml/rag-qa/data/validation ‚Äî first 100 samples ===


Loading /mnt/c/dev/ml/rag-qa/data/validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [00:00<00:00, 53.23it/s]
Evaluating Recall: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.74it/s]


Recall@1: 0.7500
Recall@3: 0.8200
Recall@5: 0.8200
Recall@10: 0.8300

=== üî• Evaluating /mnt/c/dev/ml/rag-qa/data/test ‚Äî first 100 samples ===


Loading /mnt/c/dev/ml/rag-qa/data/test: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [00:00<00:00, 55.34it/s]
Evaluating Recall: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.74it/s]

Recall@1: 0.7200
Recall@3: 0.8500
Recall@5: 0.8900
Recall@10: 0.9400





In [None]:
from src.evaluate_rag_full import run_full_rag_eval

run_full_rag_eval(config=config, max_questions=1000, top_k=5)


=== Loading embeddings / FAISS index ===
üîπ Loaded FAISS index with 978526 passages

=== Loading Test dataset ===

=== Running RAG Evaluation ===


  2%|‚ñè         | 22/1000 [00:46<29:21,  1.80s/it] 