# Query Expansion

In [67]:
import requests
from __future__ import annotations
from typing import Any, Dict, List, Optional
from collections import defaultdict
import requests
import uuid


# import qdrant_client
from qdrant_client import QdrantClient, models
import importlib.metadata as im
import qdrant_client
import inspect
import time
import sys
import json

%reload_ext autoreload
%autoreload 2

In [2]:
sys.path.append("../src") 

from retrieval.evaluator import (
      run_table_eval_and_print,
      hybrid_search_sec_docs_bge_m3,
      cap_per_group,
      table_group_key,
      rerank_with_minilm_l6,
      dedupe_scored_points,
      dense_search_points,
      dense_search_sec_docs,
      embed_query_qwen3,
      format_passage_for_rerank,
      rerank_with_bge_reranker_large,
      get_bge_reranker_large_model,
      get_gte_multilingual_reranker_base,
      rerank_with_gte_multilingual_reranker_base,
      get_granite_reranker_english_r2_model,
      rerank_with_granite_english_r2,
      get_qwen3_reranker_model,
      rerank_with_qwen3_reranker,
  )

In [113]:
def print_retrieved_results(results: list):
    i = 1
    for h in results:
        print("rank: ", i)
        p = h.payload or {}
        print(f"score={h.score:.4f}  doc_type={p.get('doc_type')}  doc_id={p.get('doc_id')}")
        print("section:", p.get("section_title"))
        print("section:", p.get("table_index"))
        print("content:", (p.get("content") or ""), "...\n")
        i += 1
    return

### Step 1. Prompt

In [94]:
from retrieval.accounting_terms import accounting_terms_file_to_llm_digest
from retrieval.ollama_client import chat_with_ollama
from retrieval.query_expansion import expand_query_with_ollama


In [97]:
user_query = "What was Apple’s total debt (short-term plus long-term) at year-end 2024?"
line_items = accounting_terms_file_to_llm_digest("../data/config/SEC_accounting_terms.json")
# response = chat_with_ollama(build_query_expansion_prompt(user_query, line_items), 
#                             model="qwen3:4b-instruct")
expanded_queries = expand_query_with_ollama(
                        user_query, 
                         allowed_line_items=line_items,
                         model = "qwen3:4b-instruct",
                         include_original=True,
                         dedupe=True,
                        )
print(expanded_queries)

['What was Apple’s total debt (short-term plus long-term) at year-end 2024?', 'Total liabilities', 'Total term debt', 'Commercial paper', 'Less: Current portion of term debt', 'Total non-current liabilities']


### Parallel Search

In [98]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import os
from huggingface_hub import snapshot_download
from FlagEmbedding import BGEM3FlagModel
from retrieval.evaluator import (
  build_sec_filter,
  embed_query_bge_m3,
  rrf_fuse,
  _point_key,
  _scored_point_with_score,
  dedupe_scored_points,
  doc_id_table_key,
  table_group_key,
  multi_query_hybrid_search_bge_m3, 
  get_bge_m3_model
)

In [99]:
client = QdrantClient(host="localhost", port=6333)
queries = expanded_queries

In [112]:
bge = get_bge_m3_model(model_name="BAAI/bge-m3", use_fp16=False)  # or True on CUDA
fused, hits_by_query = multi_query_hybrid_search_bge_m3(
      queries,
      client=client,
      collection_name="sec_docs_hybrid",
      top_k=20,
      ticker="AAPL",
      fiscal_year=2024,
      form_type="10-K",
      doc_types=["table", "table_row"],
      bge_model=bge,
      fuse=True,
      rrf_k=60,
  )

In [125]:
print("before dedupe: ", len(fused))
deduped = dedupe_scored_points(fused, key_fn=doc_id_table_key)
print("after dedupe: ", len(deduped))
print_retrieved_results(deduped)

before dedupe:  20
after dedupe:  7
rank:  1
score=0.0463  doc_type=table_row  doc_id=AAPL_10-K_2024::table::32::row::10
section: Term Debt
section: 32
content: Less: Current portion of term debt: Current portion of total term debt as of the end of the fiscal year 2024 and 2023. ...

rank:  2
score=0.0452  doc_type=table_row  doc_id=AAPL_10-K_2024::table::12::row::24
section: CONSOLIDATED BALANCE SHEETS
section: 12
content: Term debt – Non-current liabilities: Non-current liabilities representing term debt as of September 28, 2024 and September 30, 2023. ...

rank:  3
score=0.0449  doc_type=table  doc_id=AAPL_10-K_2024::table::24
section: Other Non-Current Liabilities
section: 24
content: Table summary: The table provides details on other non-current liabilities for Apple Inc., including income taxes payable and total other non-current liabilities, for the fiscal years 2024 and 2023.
Rows: Income taxes payable: Amount of income taxes payable as of the end of fiscal years 2024 and 2023.

In [102]:
# _ = bge.encode(["warmup"], return_dense=True, return_sparse=True)

In [103]:
# # 1) Load ONE embedder and pre-encode sequentially (no threads here)
# bge = BGEM3FlagModel("BAAI/bge-m3", use_fp16=False, devices=["cpu"])  # use_fp16=True if on CUDA
# embs = {q: embed_query_bge_m3(q, bge_model=bge, sparse_top_k=256) for q in queries}

# qfilter = build_sec_filter(
#   doc_types=["table", "table_row"],
#   ticker="AAPL",
#   fiscal_year=2024,
#   form_type="10-K",
# )

# def one_qdrant_search(q: str):
#   dense_vec, sparse_vec = embs[q]
#   resp = client.query_points(
#       collection_name="sec_docs_hybrid",
#       prefetch=[
#           models.Prefetch(query=sparse_vec, using="bge_m3_sparse", limit=200, filter=qfilter),
#           models.Prefetch(query=dense_vec,  using="bge_m3_dense",  limit=200, filter=qfilter),
#       ],
#       query=models.FusionQuery(fusion=models.Fusion.RRF),
#       limit=10,
#       with_payload=True,
#       with_vectors=False,
#       query_filter=qfilter,
#   )
#   return list(resp.points)

# # 2) Parallelize only the Qdrant calls
# hits_by_query = {}
# with ThreadPoolExecutor(max_workers=min(8, len(queries))) as ex:
#   futs = {ex.submit(one_qdrant_search, q): q for q in queries}
#   for fut in as_completed(futs):
#       hits_by_query[futs[fut]] = fut.result()

# # # 3) Optional: fuse the per-query lists client-side (RRF)
# ranked_lists = [hits_by_query[q] for q in queries]
# fused_scores, _ = rrf_fuse(ranked_lists)

# by_id = {}
# for lst in ranked_lists:
#   for p in lst:
#       by_id.setdefault(_point_key(p.id), p)

# top_n = len(queries)*5;
# ordered = sorted(fused_scores, key=lambda pid: fused_scores[pid], reverse=True)[:top_n]
# final = [_scored_point_with_score(by_id[pid], fused_scores[pid]) for pid in ordered]

### Apply Reranking

In [63]:
# model = get_bge_reranker_large_model(model_name="BAAI/bge-reranker-v2-m3")
# reranked = rerank_with_bge_reranker_large(user_query, deduped, top_k=10, model=model, max_passage_chars=4000)
# i = 1
# for h in reranked:
#     print("rank: ", i)
#     p = h.payload or {}
#     print(f"score={h.score:.4f}  doc_type={p.get('doc_type')}  doc_id={p.get('doc_id')}")
#     print("section:", p.get("section_title"))
#     print("section:", p.get("table_index"))
#     print("content:", (p.get("content") or ""), "...\n")
#     i += 1

We found reranking did not improve retrieval. Rerank on table summaries instead of row summaries. 

#### Fetch table summaries for reranking

In [120]:
from retrieval.rerank_enricher import fetch_table_summaries_for_candidates, enrich_point_for_rerank, enrich_candidates_with_table_summaries

In [118]:
query_for_reranking = queries[0] + " (" + ", ".join(queries[1:]) + ")"
print(query_for_reranking)

What was Apple’s total debt (short-term plus long-term) at year-end 2024? (Total liabilities, Total term debt, Commercial paper, Less: Current portion of term debt, Total non-current liabilities)


In [122]:
enriched_cands = enrich_candidates_with_table_summaries(
    deduped,
    client=client,
    collection_name="sec_docs_hybrid",        
)

In [124]:
reranked = rerank_with_bge_reranker_large(
  query_for_reranking,
  enriched_cands,
  top_k=5,
  model=model,
  max_passage_chars=4000,
)
print_retrieved_results(reranked)

rank:  1
score=1.3926  doc_type=table_row  doc_id=AAPL_10-K_2024::table::32::row::10
section: Term Debt
section: 32
content: Table summary: The table details term debt maturities and related financial information for Apple Inc., covering the years 2023 and 2024, including fixed-rate notes and their effective interest rates.
Rows: 2013 – 2023 debt issuances: – Fixed-rate 0.000% – 4.850% notes: Details of fixed-rate term debt issued between 2013 and 2023, including the maturity period from 2024 to 2062. 2013 – 2023 debt issuances: – Fixed-rate 0.000% – 4.850% notes: Amount of fixed-rate term debt issued between 2013 and 2023, with maturities from 2024 to 2062. 2013 – 2023 debt issuances: – Fixed-rate 0.000% – 4.850% notes: Effective interest rates for the fixed-rate term debt issued between 2013 and 2023, with maturities from 2024 to 2062. 2013 – 2023 debt issuances: – Fixed-rate 0.000% – 4.850% notes: Details of fixed-rate term debt issued between 2013 and 2023, including the maturity p