# EDGE Comprehensive Tests

This notebook consolidates all tests and runs them end-to-end:
- Setup verification (dependencies and imports)
- Ingestion from the repository file `3essay.txt`
- Query pipeline (three-stage)
- API endpoints via FastAPI TestClient

Outputs are stored in the notebook for reproducibility and can be committed to GitHub.

In [5]:
# Setup verification
import sys, os
from importlib import import_module

checks = [
    ("numpy", "numpy"),
    ("networkx", "networkx"),
    ("sentence_transformers", "sentence-transformers"),
    ("faiss", "faiss-cpu"),
    ("fastapi", "fastapi"),
    ("pydantic", "pydantic"),
    ("uvicorn", "uvicorn"),
]

results = {}
for mod, pkg in checks:
    try:
        import_module(mod)
        results[pkg] = True
    except Exception as e:
        results[pkg] = False
        print(f"[FAIL] {pkg}: {e}")

print("All core deps ok:", all(results.values()))
results


All core deps ok: True


{'numpy': True,
 'networkx': True,
 'sentence-transformers': True,
 'faiss-cpu': True,
 'fastapi': True,
 'pydantic': True,
 'uvicorn': True}

In [6]:
# Ingestion from 3essay.txt (MiniLM + Faiss + Stores)
import tempfile
from pathlib import Path
from pipeline.ingestion import BookIngestionPipeline

# Resolve essay path relative to the notebook working directory
essay_path = Path.cwd() / "3essay.txt"
if not essay_path.exists():
    raise FileNotFoundError(f"3essay.txt not found at: {essay_path}")

essay_text = essay_path.read_text(encoding="utf-8", errors="ignore").strip()
if not essay_text:
    raise ValueError("3essay.txt is empty")

# Create temp working directory for all generated artifacts
test_dir = tempfile.mkdtemp(prefix="edge_nb_test_")
print("Test dir:", test_dir)

pipe = BookIngestionPipeline(base_path=test_dir, index_type="flat")
pipe.ingest_book(essay_text, batch_size=16, similarity_threshold=0.6)

stats = {
    "sentence_store": pipe.sentence_store.get_stats(),
    "vector_store": pipe.vector_store.get_stats(),
    "faiss_index": pipe.faiss_index.get_stats(),
    "graph_store": pipe.graph_store.get_stats(),
}

print(stats)
assert stats['sentence_store']['num_sentences'] == stats['vector_store']['num_vectors']
assert stats['vector_store']['num_vectors'] == stats['faiss_index']['num_vectors']

# Persist base path for following cells
BASE_PATH = Path(test_dir)
BASE_PATH


Test dir: C:\Users\xeangao\AppData\Local\Temp\edge_nb_test_tv6qgdpp
{'sentence_store': {'num_sentences': 104, 'num_chapters': 1, 'num_paragraphs': 1}, 'vector_store': {'num_vectors': 104, 'embedding_dim': 384, 'file_size_bytes': 159744, 'file_size_mb': 0.15}, 'faiss_index': {'index_type': 'flat', 'embedding_dim': 384, 'num_vectors': 104, 'is_trained': False}, 'graph_store': {'nodes': 104, 'edges': 5356, 'density': 1.0}}


WindowsPath('C:/Users/xeangao/AppData/Local/Temp/edge_nb_test_tv6qgdpp')

In [7]:
# Query pipeline test
from pipeline.query import QueryPipeline
from core.faiss_index import FaissIndex
from storage.vector_store import VectorStore
from storage.sentence_store import SentenceStore
from graph.graph_store import GraphStore
from core.embedder_minilm import get_embedder

embedder = get_embedder()
emb_dim = embedder.get_embedding_dim()

faiss_index = FaissIndex.load(str(BASE_PATH / "faiss.index"))
vector_store = VectorStore(str(BASE_PATH), emb_dim)
sentence_store = SentenceStore(str(BASE_PATH / "sentences.jsonl"))
graph_store = GraphStore(str(BASE_PATH / "graph.json"))

qp = QueryPipeline(
    faiss_index=faiss_index,
    vector_store=vector_store,
    sentence_store=sentence_store,
    graph_store=graph_store
)

queries = [
    "What is the main concept?",
    "How does the system work?",
]

all_results = {}
for q in queries:
    res = qp.query(q, top_k=5, initial_k=10)
    print(q, "->", len(res), "results")
    all_results[q] = res[:2]

all_results


What is the main concept? -> 5 results
How does the system work? -> 5 results


{'What is the main concept?': [{'sentence_id': 50,
   'text': 'Religion being the chief band of human society, is a happy thing, when itself is well contained within the true band of unity',
   'chapter': 'Unknown',
   'paragraph_id': 1,
   'sentence_id_in_para': 50,
   'score': 0.3717085838317871,
   'vector_similarity': 0.3434171676635742,
   'graph_degree': 103,
   'context_coherence': 0.0,
   'metadata': {}},
  {'sentence_id': 67,
   'text': 'It establisheth faith; it kindleth charity; the outward peace of the church, distilleth into peace of conscience; and it turneth the labors of writing, and reading of controversies, into treaties of mortification and devotion',
   'chapter': 'Unknown',
   'paragraph_id': 1,
   'sentence_id_in_para': 67,
   'score': 0.3643411219120026,
   'vector_similarity': 0.3286822438240051,
   'graph_degree': 103,
   'context_coherence': 0.0,
   'metadata': {}}],
 'How does the system work?': [{'sentence_id': 15,
   'text': 'First he breathed light, upon t

In [8]:
# FastAPI endpoints test (TestClient)
from fastapi.testclient import TestClient
from api.server import app
import api.server as api_srv

# Reuse qp from previous cell
api_srv.query_pipeline = qp

client = TestClient(app)

# /health
h = client.get('/health').json()
print('health:', h)
assert h['pipeline_initialized'] is True

# /stats
s = client.get('/stats').json()
print('vectors:', s['faiss_index']['num_vectors'], 'sentences:', s['sentence_store']['num_sentences'])

# /query
payload = {"query": "What is vector search?", "top_k": 3, "initial_k": 10}
r = client.post('/query', json=payload).json()
print('query results:', r['num_candidates'])
assert r['num_candidates'] <= 3

"OK"


health: {'status': 'healthy', 'pipeline_initialized': True}
vectors: 104 sentences: 104
query results: 3


'OK'