# Token Counter and Chunking Test Notebook

This notebook tests the token counting and chunking functionality for yellhorn-mcp.

In [1]:
# Install dependencies if needed
# !pip install tiktoken

In [2]:
import sys
import os
sys.path.insert(0, os.path.abspath('.'))

from yellhorn_mcp.token_counter import TokenCounter
from yellhorn_mcp.llm_manager import LLMManager, ChunkingStrategy

## Test Token Counter

In [5]:
# Initialize token counter
tc = TokenCounter()

# Test text
test_text = """This is a test sentence to count tokens. 
Let's see how many tokens this text contains for different models.
We'll test with both OpenAI and Gemini models."""*10000

# Test different models
models = ["gpt-4o", "gpt-4o-mini", "o4-mini", "gemini-2.5-pro-preview-05-06"]

print("Token counts for test text:")
print(f"Text length: {len(test_text)} characters\n")

for model in models:
    token_count = tc.count_tokens(test_text, model)
    limit = tc.get_model_limit(model)
    print(f"{model}:")
    print(f"  Tokens: {token_count}")
    print(f"  Model limit: {limit:,}")
    print(f"  Can fit: {tc.can_fit_in_context(test_text, model)}")
    print()

Token counts for test text:
Text length: 1550000 characters

gpt-4o:
  Tokens: 310001
  Model limit: 128,000
  Can fit: False

gpt-4o-mini:
  Tokens: 310001
  Model limit: 128,000
  Can fit: False

o4-mini:
  Tokens: 310001
  Model limit: 65,000
  Can fit: False

gemini-2.5-pro-preview-05-06:
  Tokens: 330001
  Model limit: 1,048,576
  Can fit: True



## Test Chunking Strategy

In [None]:
# Create a longer text that needs chunking
long_text = """This is the first sentence of our test document. It contains multiple sentences that will be split into chunks. 
Each chunk should maintain context from the previous chunk through overlap.

This is a new paragraph. Paragraphs provide natural boundaries for splitting text. 
When we split by paragraphs, we try to keep related content together.

Here's another paragraph with more content. The chunking strategy should handle both sentence-level 
and paragraph-level splitting depending on the configuration. This ensures flexibility in how we process different types of content.

Let's add even more content to ensure we exceed token limits. This will force the chunking mechanism to activate. 
We want to see how well it preserves context across chunk boundaries. The overlap feature is crucial for maintaining 
continuity when processing long documents.
""" * 10  # Repeat to make it longer

model = "gpt-4o-mini"
max_tokens = 500  # Small limit to force chunking

# Test sentence-based chunking
print("\n=== Sentence-based Chunking ===")
sentence_chunks = ChunkingStrategy.split_by_sentences(
    long_text, max_tokens, tc, model, overlap_ratio=0.1
)

print(f"Number of chunks: {len(sentence_chunks)}")
for i, chunk in enumerate(sentence_chunks[:3]):  # Show first 3 chunks
    tokens = tc.count_tokens(chunk, model)
    print(f"\nChunk {i+1} ({tokens} tokens):")
    print(chunk[:200] + "..." if len(chunk) > 200 else chunk)

# Test paragraph-based chunking
print("\n\n=== Paragraph-based Chunking ===")
para_chunks = ChunkingStrategy.split_by_paragraphs(
    long_text, max_tokens, tc, model, overlap_ratio=0.1
)

print(f"Number of chunks: {len(para_chunks)}")
for i, chunk in enumerate(para_chunks[:3]):  # Show first 3 chunks
    tokens = tc.count_tokens(chunk, model)
    print(f"\nChunk {i+1} ({tokens} tokens):")
    print(chunk[:200] + "..." if len(chunk) > 200 else chunk)

## Test Edge Cases

In [None]:
# Test empty text
print("Empty text tokens:", tc.count_tokens("", "gpt-4o"))

# Test very long single word
long_word = "a" * 10000
print(f"\nVery long word ({len(long_word)} chars):", tc.count_tokens(long_word, "gpt-4o"))

# Test special characters and emojis
special_text = "Hello üëã World! üåç Special chars: @#$%^&*() \n\t\r"
print(f"\nSpecial characters tokens:", tc.count_tokens(special_text, "gpt-4o"))

# Test unknown model (should use default)
print(f"\nUnknown model limit:", tc.get_model_limit("unknown-model-xyz"))
print(f"Unknown model tokens:", tc.count_tokens("test", "unknown-model-xyz"))

## Test Token Estimation

In [None]:
# Test response token estimation
prompts = [
    "Short prompt",
    "This is a medium length prompt that contains more context and information for the model to process.",
    "A very long prompt " * 100
]

print("Response Token Estimation:")
for prompt in prompts:
    prompt_tokens = tc.count_tokens(prompt, "gpt-4o")
    response_estimate = tc.estimate_response_tokens(prompt, "gpt-4o")
    print(f"\nPrompt tokens: {prompt_tokens}")
    print(f"Estimated response tokens: {response_estimate}")
    print(f"Total estimated: {prompt_tokens + response_estimate}")

## Performance Test

In [None]:
import time

# Test token counting performance
test_sizes = [100, 1000, 10000, 50000]
print("Token Counting Performance:")

for size in test_sizes:
    text = "Hello world. " * (size // 13)  # Approximately 'size' characters
    
    start = time.time()
    tokens = tc.count_tokens(text, "gpt-4o")
    duration = time.time() - start
    
    print(f"\nText size: {len(text):,} chars")
    print(f"Tokens: {tokens:,}")
    print(f"Time: {duration:.3f} seconds")
    print(f"Speed: {len(text) / duration:,.0f} chars/second")