<a href="https://colab.research.google.com/github/tonyjosephsebastians/AI-Design-patterns/blob/main/GROUP_2_%E2%80%94_Duplicate_Requests_%26_Retries_Break_the_System_(Correctness_%2B_Cost_Safety).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Goal: Safe retries without duplicate work, inconsistent writes, or doubled token spend.

Patterns covered:

Command Pattern (GoF)

Idempotent Command Pattern

Request Deduplication Pattern

Content Hashing Pattern

Singleton (GoF — config/clients only)

Concurrency (why DB/Redis uniqueness is required)

In [1]:
import json, hashlib, time, random, threading
from dataclasses import dataclass, asdict
from typing import Optional, Dict, Tuple


In [12]:
JOBS: Dict[str, dict] = {}
TOKEN_SPEND = 0

def fake_llm_call(payload: dict) -> dict:
    """Simulate an LLM call with token spend."""
    global TOKEN_SPEND
    tokens = 1200 + random.randint(-150, 200)
    TOKEN_SPEND += tokens
    return {"output": "analysis-result", "tokens": tokens, "payload": payload}

def create_job(payload: dict) -> str:
    job_id = f"job_{int(time.time()*1000)}_{random.randint(100,999)}"
    JOBS[job_id] = {"status": "RUNNING", "payload": payload, "result": None}
    # simulate doing the work right away
    result = fake_llm_call(payload)
    JOBS[job_id]["status"] = "SUCCEEDED"
    JOBS[job_id]["result"] = result
    return job_id

def baseline_endpoint(payload: dict) -> dict:
    """What many systems accidentally do: create new work every time."""
    job_id = create_job(payload)
    return {"job_id": job_id, "status": JOBS[job_id]["status"]}

payload = {"user_id":"tony", "doc_id":"pdf123", "operation":"extract_clauses_v1", "params":{"lang":"en"}}

TOKEN_SPEND = 0
JOBS.clear()

r1 = baseline_endpoint(payload)
r2 = baseline_endpoint(payload)  # retry!
print("1st:", r1)
print("2nd:", r2)
print("Jobs created:", len(JOBS))
print("Token spend:", TOKEN_SPEND)


1st: {'job_id': 'job_1770607600876_584', 'status': 'SUCCEEDED'}
2nd: {'job_id': 'job_1770607600876_733', 'status': 'SUCCEEDED'}
Jobs created: 2
Token spend: 2453


Command Pattern (GoF): make the action a first-class object

In [13]:
@dataclass(frozen=True)
class AnalyzeDocumentCommand:
    user_id: str
    doc_id: str
    operation: str
    params: dict

cmd = AnalyzeDocumentCommand(
    user_id="tony",
    doc_id="pdf123",
    operation="extract_clauses_v1",
    params={"lang":"en"}
)

print(cmd)
print("As dict:", asdict(cmd))


AnalyzeDocumentCommand(user_id='tony', doc_id='pdf123', operation='extract_clauses_v1', params={'lang': 'en'})
As dict: {'user_id': 'tony', 'doc_id': 'pdf123', 'operation': 'extract_clauses_v1', 'params': {'lang': 'en'}}


Content Hashing Pattern: fingerprint the “work”

In [14]:
def command_hash(cmd: AnalyzeDocumentCommand) -> str:
    payload = asdict(cmd)
    canonical = json.dumps(payload, sort_keys=True, separators=(",", ":"))
    return hashlib.sha256(canonical.encode("utf-8")).hexdigest()

h1 = command_hash(cmd)
h2 = command_hash(cmd)
print("Hash stable:", h1 == h2)
print("Hash:", h1[:24], "...")


Hash stable: True
Hash: d182f07ca44515165cd5e7b0 ...


Include in hash:

user_id (or tenant)

doc_id (or file hash)

operation version

params that affect output

Exclude from hash:

timestamps

request_id

tracing headers

Request Deduplication Pattern: reuse same job for same hash

In [16]:
DEDUP_BY_HASH: Dict[Tuple[str,str], str] = {}  # (user_id, cmd_hash) -> job_id

def dedup_endpoint(cmd: AnalyzeDocumentCommand) -> dict:
    payload = asdict(cmd)
    h = command_hash(cmd)
    key = (cmd.user_id, h)

    if key in DEDUP_BY_HASH:
        job_id = DEDUP_BY_HASH[key]
        return {"job_id": job_id, "deduped": True, "status": JOBS[job_id]["status"]}

    job_id = create_job(payload)
    DEDUP_BY_HASH[key] = job_id
    return {"job_id": job_id, "deduped": False, "status": JOBS[job_id]["status"]}

TOKEN_SPEND = 0
JOBS.clear()
DEDUP_BY_HASH.clear()

r1 = dedup_endpoint(cmd)
r2 = dedup_endpoint(cmd)  # retry!
print("1st:", r1)
print("2nd:", r2)
print("Jobs created:", len(JOBS))
print("Token spend:", TOKEN_SPEND)


1st: {'job_id': 'job_1770607854070_320', 'deduped': False, 'status': 'SUCCEEDED'}
2nd: {'job_id': 'job_1770607854070_320', 'deduped': True, 'status': 'SUCCEEDED'}
Jobs created: 1
Token spend: 1099


empotent Command Pattern: client defines “same intent”

In [17]:
IDEMPOTENCY_MAP: Dict[Tuple[str,str], str] = {}  # (user_id, idem_key) -> job_id

def idempotent_endpoint(cmd: AnalyzeDocumentCommand, idem_key: str) -> dict:
    payload = asdict(cmd)
    key = (cmd.user_id, idem_key)

    if key in IDEMPOTENCY_MAP:
        job_id = IDEMPOTENCY_MAP[key]
        return {"job_id": job_id, "deduped": True, "via": "idempotency_key"}

    job_id = create_job(payload)
    IDEMPOTENCY_MAP[key] = job_id
    return {"job_id": job_id, "deduped": False, "via": "new_job"}

TOKEN_SPEND = 0
JOBS.clear()
IDEMPOTENCY_MAP.clear()

print(idempotent_endpoint(cmd, "abc-123"))
print(idempotent_endpoint(cmd, "abc-123"))       # retry => same job
print(idempotent_endpoint(cmd, "different-key")) # new intent boundary => new job

print("Jobs created:", len(JOBS))
print("Token spend:", TOKEN_SPEND)


{'job_id': 'job_1770607926320_917', 'deduped': False, 'via': 'new_job'}
{'job_id': 'job_1770607926320_917', 'deduped': True, 'via': 'idempotency_key'}
{'job_id': 'job_1770607926320_971', 'deduped': False, 'via': 'new_job'}
Jobs created: 2
Token spend: 2418


Cell 7 — Best practice: combine BOTH (key + hash fallback)

“We do defense-in-depth: key first, hash fallback.”

In [19]:
DEDUP_HASH: Dict[Tuple[str,str], str] = {}
DEDUP_IDEM: Dict[Tuple[str,str], str] = {}

def best_practice_endpoint(cmd: AnalyzeDocumentCommand, idem_key: Optional[str] = None) -> dict:
    payload = asdict(cmd)
    h = command_hash(cmd)

    # 1) Idempotency key path
    if idem_key:
        ikey = (cmd.user_id, idem_key)
        if ikey in DEDUP_IDEM:
            job_id = DEDUP_IDEM[ikey]
            return {"job_id": job_id, "deduped": True, "via": "idempotency_key"}

    # 2) Hash fallback path
    hkey = (cmd.user_id, h)
    if hkey in DEDUP_HASH:
        job_id = DEDUP_HASH[hkey]
        return {"job_id": job_id, "deduped": True, "via": "command_hash"}

    # Create new
    job_id = create_job(payload)
    DEDUP_HASH[hkey] = job_id
    if idem_key:
        DEDUP_IDEM[(cmd.user_id, idem_key)] = job_id

    return {"job_id": job_id, "deduped": False, "via": "new_job"}

TOKEN_SPEND = 0
JOBS.clear()
DEDUP_HASH.clear()
DEDUP_IDEM.clear()

print(best_practice_endpoint(cmd, None))          # no idem
print(best_practice_endpoint(cmd, None))          # retry => hash dedupe
print(best_practice_endpoint(cmd, "retry-001"))   # idem maps too
print(best_practice_endpoint(cmd, "retry-001"))   # retry => idem dedupe

print("Jobs created:", len(JOBS))
print("Token spend:", TOKEN_SPEND)


{'job_id': 'job_1770608184110_228', 'deduped': False, 'via': 'new_job'}
{'job_id': 'job_1770608184110_228', 'deduped': True, 'via': 'command_hash'}
{'job_id': 'job_1770608184110_228', 'deduped': True, 'via': 'command_hash'}
{'job_id': 'job_1770608184110_228', 'deduped': True, 'via': 'command_hash'}
Jobs created: 1
Token spend: 1095


race conditions (two requests at same time)

Even perfect logic can fail without atomic guarantee.

In [20]:
TOKEN_SPEND = 0
JOBS.clear()
DEDUP_HASH.clear()
DEDUP_IDEM.clear()

def thread_task():
    print(best_practice_endpoint(cmd, None))

threads = [threading.Thread(target=thread_task) for _ in range(2)]
for t in threads: t.start()
for t in threads: t.join()

print("Jobs created:", len(JOBS))
print("Token spend:", TOKEN_SPEND)


{'job_id': 'job_1770608225100_542', 'deduped': False, 'via': 'new_job'}
{'job_id': 'job_1770608225100_542', 'deduped': True, 'via': 'command_hash'}
Jobs created: 1
Token spend: 1369


Fix race with a lock (simulation of Redis SETNX / DB constraint)

In [21]:
LOCK = threading.Lock()

def locked_endpoint(cmd: AnalyzeDocumentCommand) -> dict:
    payload = asdict(cmd)
    h = command_hash(cmd)
    hkey = (cmd.user_id, h)

    with LOCK:  # simulate atomicity
        if hkey in DEDUP_HASH:
            job_id = DEDUP_HASH[hkey]
            return {"job_id": job_id, "deduped": True, "via": "atomic_lock"}

        job_id = create_job(payload)
        DEDUP_HASH[hkey] = job_id
        return {"job_id": job_id, "deduped": False, "via": "atomic_lock"}

TOKEN_SPEND = 0
JOBS.clear()
DEDUP_HASH.clear()

threads = [threading.Thread(target=lambda: print(locked_endpoint(cmd))) for _ in range(2)]
for t in threads: t.start()
for t in threads: t.join()

print("Jobs created:", len(JOBS))
print("Token spend:", TOKEN_SPEND)


{'job_id': 'job_1770608290729_807', 'deduped': False, 'via': 'atomic_lock'}
{'job_id': 'job_1770608290729_807', 'deduped': True, 'via': 'atomic_lock'}
Jobs created: 1
Token spend: 1383


Singleton Pattern (GoF): ONLY for config/clients

In [22]:
class ConfigSingleton:
    _instance = None
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance.llm_model = "gpt-5.2"
            cls._instance.timeout_s = 30
        return cls._instance

c1 = ConfigSingleton()
c2 = ConfigSingleton()
print("Same instance:", c1 is c2)
print("Model:", c1.llm_model)


Same instance: True
Model: gpt-5.2


Good for: config, db client, redis client
