In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.readers.wikipedia import WikipediaReader
from rich import print as rprint

In [2]:
reader = WikipediaReader()
documents = reader.load_data(pages=["Glossary of artificial intelligence"])

## Nodes splitters

In [3]:
from llama_index.core.schema import MetadataMode

### SentenceWindowNodeParser

In [4]:
from llama_index.core.node_parser import TokenTextSplitter

splitter = TokenTextSplitter(
    chunk_size=256,
    chunk_overlap=20,
    separator=" ",
)

nodes = splitter.get_nodes_from_documents(documents)

i = 0

In [5]:
rprint(nodes[i].get_content(
    metadata_mode=MetadataMode.LLM
))
i+=1

### SentenceSplitter

In [6]:
from llama_index.core.node_parser import SentenceSplitter

node_parser = SentenceSplitter(chunk_size=256, chunk_overlap=20)
nodes = node_parser.get_nodes_from_documents(
    documents
)
i = 0

In [7]:
rprint(nodes[i].get_content(
    metadata_mode=MetadataMode.EMBED
))
i+=1

#### Improvement to sentence splitter


In [8]:
node_parser = SentenceSplitter(chunk_size=256, chunk_overlap=20, 
                                separator="\n\n",
                                paragraph_separator="\n\n")
nodes = node_parser.get_nodes_from_documents(
    documents
)
i = 0

In [9]:
rprint(nodes[i].get_content(
    metadata_mode=MetadataMode.EMBED
))
i+=1

Other here: https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/modules/