# Example of using sentence splitter chunking
Compare the diff of splitting_1.txt and splitting_2.txt

In [None]:
from llama_index.node_parser import TokenTextSplitter, LangchainNodeParser
from llama_index import SimpleDirectoryReader, Document, get_tokenizer
from langchain.text_splitter import (
    NLTKTextSplitter,
    SpacyTextSplitter,
    RecursiveCharacterTextSplitter,
)

document = SimpleDirectoryReader("data").load_data()[0]
text_parser = TokenTextSplitter()  # use default settings
text_chunks = text_parser.split_text(document.text)
doc_chunks = [Document(text=t) for t in text_chunks]
tokenizer = get_tokenizer()
with open("splitting_1.txt", "w") as f:
    for idx, doc in enumerate(doc_chunks):
        f.write(
            "\n-------\n\n{}. Size: {} tokens\n".format(idx, len(tokenizer(doc.text)))
            + doc.text
        )

from llama_index.node_parser import SentenceSplitter

sentence_parser = SentenceSplitter()
text_chunks = sentence_parser.split_text(document.text)
doc_chunks = [Document(text=t) for t in text_chunks]
with open("splitting_2.txt", "w") as f:
    for idx, doc in enumerate(doc_chunks):
        f.write(
            "\n-------\n\n{}. Size: {} tokens\n".format(idx, len(tokenizer(doc.text)))
            + doc.text
        )

nltk_parser = LangchainNodeParser(NLTKTextSplitter())
text_chunks = nltk_parser.split_text(document.text)
doc_chunks = [Document(text=t) for t in text_chunks]
tokenizer = get_tokenizer()
with open("splitting_3.txt", "w") as f:
    for idx, doc in enumerate(doc_chunks):
        f.write(
            "\n-------\n\n{}. Size: {} tokens\n".format(idx, len(tokenizer(doc.text)))
            + doc.text
        )

## Testing with Chinese

In [None]:
from llama_index.text_splitter import SentenceSplitter
from llama_index.schema import Document
from llama_index.indices.service_context import ServiceContext
from llama_index.indices.vector_store import VectorStoreIndex
import wikipedia

In [None]:
sentence_parser = SentenceSplitter()
wikipedia.set_lang("zh")
page = wikipedia.page("美国", auto_suggest=True).content
sentence_parser.split_text(page)

In [None]:
text_splitter = SentenceSplitter()
service_context = ServiceContext.from_defaults(text_splitter=text_splitter)
documents = []
documents.append(Document(text=page))
index = VectorStoreIndex.from_documents(documents, service_context=service_context)