# Query & Context Intelligence for Recommender Retrieval

Pre-retrieval augmentation patterns used in production
recommender systems to improve recall and intent alignment without modifying
core retrieval models:
- Session context compaction
- Intent and constraint extraction
- Candidate-set repair when retrieval underperforms

## 0) Setup


In [1]:
import re
import numpy as np
import pandas as pd
from numpy.random import default_rng

rng = default_rng(21)

def tokenize(text):
    return re.findall(r"[a-z0-9]+", (text or "").lower())

def l2_normalize(x, axis=-1, eps=1e-12):
    n = np.linalg.norm(x, axis=axis, keepdims=True)
    return x / np.maximum(n, eps)


## 1) Minimal catalog + baseline retrieval


In [2]:
n_items = 2500
d = 32

topics = [
    "wireless mouse","gaming keyboard","ergonomic chair","standing desk",
    "noise cancelling headphones","usb c hub","4k monitor","webcam"
]

def make_title():
    t = rng.choice(topics)
    brand = rng.choice(["Aster","Norda","Vento","Orion"])
    attr = rng.choice(["pro","mini","max","plus"])
    return f"{brand} {t} {attr}"

titles = [make_title() for _ in range(n_items)]
items = pd.DataFrame({"item_id": np.arange(n_items), "title": titles})

V = l2_normalize(rng.normal(size=(n_items, d)).astype(np.float32))

def retrieve_topk(user_vec, k=120):
    scores = (V @ user_vec).astype(np.float32)
    idx = np.argpartition(scores, -k)[-k:]
    idx = idx[np.argsort(scores[idx])[::-1]]
    return idx, scores[idx]

def sample_user():
    return l2_normalize(rng.normal(size=(d,)).astype(np.float32))

items.head()


Unnamed: 0,item_id,title
0,0,Orion ergonomic chair mini
1,1,Norda noise cancelling headphones max
2,2,Aster ergonomic chair mini
3,3,Vento usb c hub plus
4,4,Norda gaming keyboard plus


## 2) Session context compaction with LLM


In [3]:
def make_session(raw_query):
    events = []
    if rng.random() < 0.7:
        events.append({"type": "view", "text": "ergonomic chair for home office"})
    if rng.random() < 0.5:
        events.append({"type": "view", "text": "noise cancelling headset for calls"})
    if rng.random() < 0.4:
        events.append({"type": "search", "text": "4k display screen"})
    return {"raw_query": raw_query, "events": events}

def context_compactor(session):
    texts = [session["raw_query"]] + [e["text"] for e in session["events"]]
    toks = []
    for t in texts:
        toks += tokenize(t)

    intents = []
    if "chair" in toks:
        intents.append("ergonomic chair")
    if "headphones" in toks or "headset" in toks:
        intents.append("noise cancelling headphones")
    if "monitor" in toks or "display" in toks or "screen" in toks:
        intents.append("4k monitor")

    constraints = {"exclude": []}
    if "gaming" in toks:
        constraints["exclude"].append("gaming")

    return {
        "intent_phrases": intents[:3],
        "constraints": constraints
    }

sess = make_session("cordless headset for zoom calls, not gaming")
context = context_compactor(sess)
context


{'intent_phrases': ['noise cancelling headphones'],
 'constraints': {'exclude': ['gaming']}}

This layer reshapes user intent before retrieval, improving recall without
increasing ANN search cost or retraining embeddings.


In [4]:
def build_query(raw_query, context):
    parts = [raw_query]
    for p in context.get("intent_phrases", []):
        parts.append(p)
    toks = []
    for p in parts:
        toks += tokenize(p)
    seen=set(); out=[]
    for t in toks:
        if t not in seen:
            out.append(t); seen.add(t)
    return " ".join(out)

build_query(sess["raw_query"], context)


'cordless headset for zoom calls not gaming noise cancelling headphones'

## 4) Detect retrieval misses


In [5]:
def lexical_match(query, title):
    qt = set(tokenize(query))
    it = set(tokenize(title))
    if not qt:
        return 0.0
    return len(qt & it) / len(qt)

def detect_miss(query, cand_ids, threshold=0.1):
    scores = [lexical_match(query, items.loc[i,"title"]) for i in cand_ids[:30]]
    return float(np.mean(scores)), np.mean(scores) < threshold


## 5) Candidate-set repair via GenAI-style selection


In [6]:
def candidate_injector(query, exclude=None, inject_n=6):
    exclude = set(exclude or [])
    qt = set(tokenize(query))
    scored = []
    for i in range(len(items)):
        it = set(tokenize(items.loc[i,"title"]))
        if exclude & it:
            continue
        score = len(qt & it)
        if score > 0:
            scored.append((score, i))
    scored.sort(reverse=True)
    return np.array([i for _, i in scored[:inject_n]], dtype=int)


In production, the compaction and injection steps are implemented
with small, cached LLM calls constrained by schemas and latency budgets.


In [7]:
def retrieve_with_repair(raw_query, user_vec, context, k=120):
    q = build_query(raw_query, context)
    cand_ids, cand_scores = retrieve_topk(user_vec, k=k)

    mean_match, miss = detect_miss(q, cand_ids)
    injected = []

    if miss:
        injected = candidate_injector(
            q,
            exclude=context.get("constraints",{}).get("exclude",[]),
            inject_n=8
        )
        merged = np.unique(np.concatenate([cand_ids, injected]))
        merged_scores = (V[merged] @ user_vec).astype(np.float32)
        order = np.argsort(merged_scores)[::-1]
        cand_ids = merged[order][:k]
        cand_scores = merged_scores[order][:k]

    return {
        "query_used": q,
        "miss": miss,
        "mean_lexical_match": mean_match,
        "injected_n": len(injected),
        "top_items": items.loc[cand_ids[:5], "title"].tolist()
    }

demo = retrieve_with_repair(
    "screen for design work, not gaming",
    sample_user(),
    context_compactor(make_session("screen for design work, not gaming"))
)
demo


{'query_used': 'screen for design work not gaming ergonomic chair 4k monitor',
 'miss': np.True_,
 'mean_lexical_match': 0.060000000000000005,
 'injected_n': 8,
 'top_items': ['Norda wireless mouse pro',
  'Vento noise cancelling headphones pro',
  'Orion 4k monitor plus',
  'Vento standing desk mini',
  'Aster standing desk mini']}