In [3]:
# Copyright (c) 2026 Microsoft Corporation.
# Licensed under the MIT License.

## Using the factory via helper util example


The create_chunker factory function provides a configuration-driven approach to instantiate chunkers by accepting a ChunkingConfig object that specifies the chunking strategy and parameters. This allows for more flexible and maintainable code by separating chunker configuration from direct instantiation.

In [4]:
import tiktoken
from graphrag_chunking.chunker_factory import create_chunker
from graphrag_chunking.chunking_config import ChunkingConfig

tokenizer = tiktoken.get_encoding("o200k_base")
config = ChunkingConfig(
    strategy="tokens",  # type: ignore
    size=3,
    overlap=0,
)
chunker = create_chunker(config, tokenizer.encode, tokenizer.decode)

print(chunker.chunk("This is a test. Another sentence. And another one."))

[TextChunk(original='This is a', text='This is a', index=0, start_char=0, end_char=8, token_count=3), TextChunk(original=' test. Another', text=' test. Another', index=1, start_char=9, end_char=22, token_count=3), TextChunk(original=' sentence. And', text=' sentence. And', index=2, start_char=23, end_char=36, token_count=3), TextChunk(original=' another one.', text=' another one.', index=3, start_char=37, end_char=49, token_count=3)]
