# 07. Latency Optimization Lab

Guardrails add latency. In this notebook, we explore techniques to minimize this overhead.

In [None]:
import time
import asyncio
import sys
import os

# Add project root to path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from basics.input_validation import InputValidator
from intermediate.toxic_content_detection import ToxicDetector

## 1. Sequential vs. Parallel Execution
Running validators one by one is slow. Running them in parallel saves time.

In [None]:
input_validator = InputValidator()
toxic_detector = ToxicDetector()

text = "This is a test sentence for benchmarking."

def run_sequential():
    # Simulate network delay for more realistic comparison if these were API calls
    time.sleep(0.01)
    input_validator.validate(text)
    time.sleep(0.01)
    toxic_detector.check(text)

async def run_parallel():
    # Simulate async operations
    async def check_input():
        await asyncio.sleep(0.01)
        return input_validator.validate(text)
    
    async def check_toxic():
        await asyncio.sleep(0.01)
        return toxic_detector.check(text)
    
    await asyncio.gather(check_input(), check_toxic())

start = time.perf_counter()
run_sequential()
print(f"Sequential Time: {(time.perf_counter() - start)*1000:.2f}ms")

start = time.perf_counter()
await run_parallel()
print(f"Parallel Time:   {(time.perf_counter() - start)*1000:.2f}ms")

## 2. Semantic Caching
If we have seen a query before, we can skip validation entirely.

In [None]:
cache = {}

def guarded_process(text):
    if text in cache:
        return cache[text] + " (Cached)"
    
    # Expensive validation
    time.sleep(0.05)
    result = "Safe"
    
    cache[text] = result
    return result

print(f"1st Run: {guarded_process('Hello')}")
print(f"2nd Run: {guarded_process('Hello')}")