Dynamic Context Management for LLMs
dyncontext is a Python package that provides intelligent, dynamic context window management for Large Language Models. Instead of static conversation history, dyncontext allows you to store various types of knowledge and automatically inject relevant context based on the current query.
- Dynamic Context Injection: Automatically retrieve and inject relevant context based on the query
- Multiple Retrieval Strategies: Semantic (embedding-based), keyword, recency, and hybrid retrieval
- Flexible Storage: Store facts, documents, code snippets, and custom content types
- Document Chunking: Automatically chunk large documents for optimal retrieval
- Token Management: Smart token budget management for context windows
- Multi-Provider Support: Works with OpenAI, Anthropic, and 100+ providers via LiteLLM
- Persistence: Save and load context stores for reuse
- Vector Store Backends: Support for in-memory, ChromaDB, and custom vector stores
- Reranking: Improve retrieval quality with cross-encoder and ensemble rerankers
- Caching: Cache embeddings, query results, and LLM responses for efficiency
- Telemetry: Built-in observability with metrics and event tracking
- Context Compression: Smart compression strategies when context exceeds limits
# Basic installation
pip install dyncontext
# With OpenAI support
pip install dyncontext[openai]
# With Anthropic support
pip install dyncontext[anthropic]
# With all providers
pip install dyncontext[all]from dyncontext import DynContext
# Initialize with OpenAI
ctx = DynContext(provider="openai", model="gpt-4")
# Add knowledge to the context store
ctx.add("Python was created by Guido van Rossum in 1991", tags=["python"])
ctx.add("FastAPI is a modern Python web framework", tags=["python", "web"])
ctx.add("React is a JavaScript library for building UIs", tags=["javascript"])
# Query - relevant context is automatically injected
response = ctx.complete("Who created Python?")
print(response.content)
# Output: Python was created by Guido van Rossum in 1991...Unlike traditional RAG systems or static context management, dyncontext:
- Stores knowledge in a flexible context store with metadata, tags, and embeddings
- Retrieves relevant context using hybrid search (semantic + keyword)
- Injects context dynamically into each LLM request based on the query
- Manages token budgets to fit within context window limits
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ User Query │───▶│ Retrieval │───▶│ Context Block │
└─────────────────┘ │ (Semantic + │ │ (Injected) │
│ Keyword) │ └────────┬────────┘
└─────────────────┘ │
▼
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ Response │◀───│ LLM │◀───│ System + Query │
└─────────────────┘ └─────────────────┘ └─────────────────┘
from dyncontext import DynContext, ContextType
ctx = DynContext(
provider="openai",
model="gpt-4",
max_context_tokens=2000,
)
# Add different types of content
ctx.add("Company was founded in 2020", context_type=ContextType.FACT)
ctx.add("def hello(): print('world')", context_type=ContextType.CODE)
# Query with automatic context injection
response = ctx.complete("When was the company founded?")# Add a document with automatic chunking
with open("manual.txt") as f:
ctx.add_document(
f.read(),
chunk_size=500,
chunk_overlap=50,
tags=["manual", "product"],
)
# Query the document
response = ctx.complete("How do I set up the device?")# Only retrieve context with specific tags
response = ctx.complete(
"What web frameworks are available?",
tags=["web"], # Only items tagged with 'web'
)
# Only retrieve specific types
response = ctx.complete(
"Show me code examples",
context_types=[ContextType.CODE],
)for chunk in ctx.stream("Explain Python decorators"):
print(chunk, end="", flush=True)# See what context would be retrieved
results = ctx.retrieve("Python web framework", top_k=5)
for result in results:
print(f"Score: {result.score:.3f}")
print(f"Content: {result.item.content[:100]}...")# Save context store
ctx.save("knowledge_base.json")
# Load later
ctx.load("knowledge_base.json")from dyncontext import DynContext
from dyncontext.stores import InMemoryVectorStore, ChromaVectorStore
# Use in-memory store (default)
ctx = DynContext(
provider="openai",
vector_store=InMemoryVectorStore(),
)
# Use ChromaDB for persistent vector storage
ctx = DynContext(
provider="openai",
vector_store=ChromaVectorStore(
collection_name="my_knowledge",
persist_directory="./chroma_db",
),
)
# Use FAISS for high-performance similarity search
from dyncontext.stores import FAISSVectorStore
ctx = DynContext(
provider="openai",
vector_store=FAISSVectorStore(
dimension=1536, # OpenAI embedding dimension
index_type="flat", # "flat", "ivf", "hnsw", or "pq"
persist_directory="./faiss_index",
metric="cosine", # "cosine", "l2", or "ip"
),
)
# FAISS with approximate search for large datasets
ctx = DynContext(
provider="openai",
vector_store=FAISSVectorStore(
dimension=1536,
index_type="ivf", # Inverted file index for faster search
nlist=100, # Number of clusters
nprobe=10, # Number of clusters to search
),
)
# Custom vector store - implement BaseVectorStore interface
from dyncontext.stores import BaseVectorStore
class MyVectorStore(BaseVectorStore):
def add(self, documents): ...
def search(self, query_embedding, top_k): ...
def delete(self, ids): ...Improve retrieval quality by reranking initial results with a more accurate model.
from dyncontext import DynContext
from dyncontext.reranker import CrossEncoderReranker, CohereReranker, EnsembleReranker
# Use cross-encoder reranking (local model)
ctx = DynContext(
provider="openai",
reranker=CrossEncoderReranker(model_name="cross-encoder/ms-marco-MiniLM-L-6-v2"),
)
# Use Cohere reranking API
ctx = DynContext(
provider="openai",
reranker=CohereReranker(api_key="your-cohere-key"),
)
# Ensemble multiple rerankers
ctx = DynContext(
provider="openai",
reranker=EnsembleReranker([
(CrossEncoderReranker(), 0.6),
(CohereReranker(), 0.4),
]),
)Cache embeddings, query results, and LLM responses for improved performance.
from dyncontext import DynContext
# Enable all caching
ctx = DynContext(
provider="openai",
enable_embedding_cache=True,
enable_query_cache=True,
enable_response_cache=True,
cache_ttl=3600, # Cache TTL in seconds
cache_max_size=1000, # Max items per cache
)
# Check cache statistics
stats = ctx.get_cache_stats()
print(f"Embedding cache hits: {stats['embedding']['hits']}")
print(f"Query cache hits: {stats['query']['hits']}")
print(f"Response cache hits: {stats['response']['hits']}")
# Clear caches when needed
ctx.clear_caches()Built-in observability for monitoring and debugging.
from dyncontext import DynContext
# Enable telemetry
ctx = DynContext(
provider="openai",
enable_telemetry=True,
)
# Use context normally
ctx.add("Some knowledge")
response = ctx.complete("Query")
# Get metrics
metrics = ctx.get_telemetry_metrics()
print(f"Total retrievals: {metrics['retrievals']}")
print(f"Total completions: {metrics['completions']}")
print(f"Average retrieval time: {metrics['avg_retrieval_time']:.3f}s")
print(f"Total tokens used: {metrics['total_tokens']}")Automatically compress context when it exceeds token limits.
from dyncontext import DynContext
from dyncontext.compression import (
TruncationCompressor,
SentenceCompressor,
LLMCompressor,
HierarchicalCompressor,
AdaptiveCompressor,
)
# Simple truncation (default)
ctx = DynContext(
provider="openai",
compressor=TruncationCompressor(max_tokens=2000),
)
# Sentence-level compression (preserves complete sentences)
ctx = DynContext(
provider="openai",
compressor=SentenceCompressor(max_tokens=2000),
)
# LLM-based summarization compression
ctx = DynContext(
provider="openai",
compressor=LLMCompressor(
model="gpt-3.5-turbo",
target_ratio=0.5, # Compress to 50% of original
),
)
# Hierarchical compression (preserves structure)
ctx = DynContext(
provider="openai",
compressor=HierarchicalCompressor(
levels=["paragraph", "sentence"],
max_tokens=2000,
),
)
# Adaptive compression (automatically chooses strategy)
ctx = DynContext(
provider="openai",
compressor=AdaptiveCompressor(
max_tokens=2000,
prefer_quality=True,
),
)For cleaner configuration, use the DynContextConfig dataclass.
from dyncontext import DynContext, DynContextConfig
from dyncontext.stores import ChromaVectorStore
from dyncontext.reranker import CrossEncoderReranker
from dyncontext.compression import AdaptiveCompressor
# Create configuration
config = DynContextConfig(
provider="openai",
model="gpt-4",
max_context_tokens=4000,
vector_store=ChromaVectorStore(collection_name="docs"),
reranker=CrossEncoderReranker(),
enable_embedding_cache=True,
enable_query_cache=True,
enable_response_cache=True,
cache_ttl=3600,
enable_telemetry=True,
compressor=AdaptiveCompressor(max_tokens=4000),
)
# Create context manager from config
ctx = DynContext.from_config(config)Main class for dynamic context management.
DynContext(
provider="openai", # LLM provider: "openai", "anthropic", "litellm"
model="gpt-4", # Model name
embedder="openai", # Embedding provider (or None to disable semantic search)
max_context_tokens=4000, # Max tokens for injected context
system_prompt=None, # Default system prompt
enable_semantic=True, # Enable semantic retrieval
enable_keyword=True, # Enable keyword retrieval
semantic_weight=0.7, # Weight for semantic in hybrid search
keyword_weight=0.3, # Weight for keyword in hybrid search
# New parameters
vector_store=None, # Custom vector store backend
reranker=None, # Reranker for improving retrieval quality
compressor=None, # Context compressor
enable_embedding_cache=False,# Cache embeddings
enable_query_cache=False, # Cache query results
enable_response_cache=False, # Cache LLM responses
cache_ttl=3600, # Cache time-to-live in seconds
cache_max_size=1000, # Maximum items per cache
enable_telemetry=False, # Enable telemetry tracking
)add(content, context_type, tags, metadata, priority)- Add content to storeadd_many(contents, ...)- Add multiple itemsadd_document(content, chunk_size, chunk_overlap, tags)- Add chunked documentcomplete(prompt, system, top_k, tags, ...)- Generate completion with contextacomplete(...)- Async completionstream(...)- Streaming completionretrieve(query, top_k, tags, ...)- Manual context retrievalsave(path)/load(path)- Persistenceclear_history()- Clear conversation historyget_cache_stats()- Get cache hit/miss statisticsclear_caches()- Clear all cachesget_telemetry_metrics()- Get telemetry metricsfrom_config(config)- Create instance from DynContextConfig
vector_store- Access the vector store backendreranker- Access the rerankercompressor- Access the context compressor
ContextItem(
content="...", # The text content
context_type=ContextType.FACT, # Type of content
tags=["tag1", "tag2"], # Tags for filtering
metadata={"key": "value"}, # Custom metadata
priority=1.0, # Priority for ranking
)DOCUMENT- Long-form documentsCHUNK- Document chunksFACT- Short factual statementsCONVERSATION- Past conversation turnsINSTRUCTION- System instructionsCODE- Code snippetsCUSTOM- User-defined
SemanticRetriever- Embedding-based similarity searchKeywordRetriever- BM25-style keyword matchingHybridRetriever- Combines multiple retrieversRecencyRetriever- Time-based retrievalTagRetriever- Tag-based filtering
from dyncontext.stores import InMemoryVectorStore, ChromaVectorStore, FAISSVectorStore
# In-memory (default, fast, non-persistent)
InMemoryVectorStore()
# ChromaDB (persistent, scalable)
ChromaVectorStore(
collection_name="my_collection",
persist_directory="./chroma_db",
embedding_function=None, # Uses default if not specified
)
# FAISS (high-performance, supports GPU)
FAISSVectorStore(
dimension=1536, # Vector dimension
index_type="flat", # "flat", "ivf", "hnsw", "pq"
persist_directory="./faiss_index",
use_gpu=False, # True for GPU acceleration
metric="cosine", # "cosine", "l2", "ip"
nlist=100, # Clusters for IVF
nprobe=10, # Search clusters for IVF
)All vector stores implement the BaseVectorStore interface:
add(documents: List[VectorDocument])- Add documentssearch(query_embedding, top_k)- Search by embeddingdelete(ids: List[str])- Delete by IDsget(ids: List[str])- Retrieve by IDscount()- Get document countclear()- Clear all documentspersist()- Save to disk
from dyncontext.reranker import CrossEncoderReranker, CohereReranker, EnsembleReranker
# Cross-encoder (local, no API calls)
CrossEncoderReranker(
model_name="cross-encoder/ms-marco-MiniLM-L-6-v2",
top_k=10,
)
# Cohere API
CohereReranker(
api_key="...",
model="rerank-english-v2.0",
top_k=10,
)
# Ensemble (combine multiple rerankers)
EnsembleReranker(
rerankers=[(reranker1, weight1), (reranker2, weight2)],
fusion_method="rrf", # or "weighted"
)from dyncontext.cache import EmbeddingCache, QueryCache, ResponseCache
# Embedding cache (caches vector embeddings)
EmbeddingCache(max_size=10000, ttl=3600)
# Query cache (caches retrieval results)
QueryCache(max_size=1000, ttl=3600)
# Response cache (caches LLM responses)
ResponseCache(max_size=500, ttl=3600)from dyncontext.compression import (
TruncationCompressor,
SentenceCompressor,
LLMCompressor,
HierarchicalCompressor,
AdaptiveCompressor,
)
# Truncation (simple, fast)
TruncationCompressor(max_tokens=2000, truncate_from="end")
# Sentence-level (preserves sentence boundaries)
SentenceCompressor(max_tokens=2000)
# LLM-based summarization
LLMCompressor(model="gpt-3.5-turbo", target_ratio=0.5)
# Hierarchical (preserves document structure)
HierarchicalCompressor(levels=["paragraph", "sentence"], max_tokens=2000)
# Adaptive (auto-selects best strategy)
AdaptiveCompressor(max_tokens=2000, prefer_quality=True)from dyncontext.telemetry import TelemetryManager
# Get metrics programmatically
telemetry = TelemetryManager()
telemetry.record_event("retrieval", {"query": "...", "results": 5})
metrics = telemetry.get_metrics()OPENAI_API_KEY- OpenAI API keyANTHROPIC_API_KEY- Anthropic API key
from dyncontext import DynContext, SentenceTransformerEmbedding
# Use local embeddings (no API calls)
embedder = SentenceTransformerEmbedding(model_name="all-MiniLM-L6-v2")
ctx = DynContext(provider="openai", embedder=embedder)from dyncontext import DynContext, HybridRetriever, SemanticRetriever, KeywordRetriever
retriever = HybridRetriever([
(SemanticRetriever(embedding_fn=my_embed), 0.8),
(KeywordRetriever(), 0.2),
], fusion_method="rrf")
ctx = DynContext(provider="openai", retriever=retriever)Contributions are welcome! Please feel free to submit a Pull Request.
MIT License - see LICENSE file for details.