In [1]:
from pathlib import Path
from langchain_text_splitters import (
    Language,
    RecursiveCharacterTextSplitter,
)

In [2]:
[e.value for e in Language]

['cpp',
 'go',
 'java',
 'kotlin',
 'js',
 'ts',
 'php',
 'proto',
 'python',
 'rst',
 'ruby',
 'rust',
 'scala',
 'swift',
 'markdown',
 'latex',
 'html',
 'sol',
 'csharp',
 'cobol',
 'c',
 'lua',
 'perl',
 'haskell',
 'elixir',
 'powershell',
 'visualbasic6']

In [3]:
RecursiveCharacterTextSplitter.get_separators_for_language(Language.PYTHON)

['\nclass ', '\ndef ', '\n\tdef ', '\n\n', '\n', ' ', '']

### Python

In [None]:
file_path = Path("merkle_tree.py")
with open(file_path, "r") as file:
    python_code = file.read()

py_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=1000, chunk_overlap=0
)

py_docs = py_splitter.create_documents([python_code])
py_docs

[Document(metadata={}, page_content='import argparse\nfrom datetime import datetime\nimport hashlib\nimport json\nfrom pathlib import Path\nfrom typing import Dict, List, Tuple\n\nfrom pymerkle import InmemoryTree as MerkleTree\nfrom pymerkle.hasher import MerkleHasher\n\n\ndef file_digest(file_path: Path):\n    return hashlib.sha256(file_path.read_bytes()).digest()\n\n\ndef build_snapshot(root: Path) -> Tuple[MerkleTree, Dict[str, bytes]]:\n    files = [p for p in root.rglob("*") if p.is_file() and ".git" not in p.parts]\n    files.sort(key=lambda p: str(p.relative_to(root)))\n\n    files_map: Dict[str, bytes] = {}\n    tree = MerkleTree()\n\n    for path in files:\n        relative_path = str(path.relative_to(root))\n        digest = file_digest(path)\n\n        files_map[relative_path] = digest\n\n        # incorporate both path and digest into the leaf hash\n        leaf_payload = relative_path.encode() + b"::" + digest\n\n        tree.append_entry(leaf_payload)\n\n    return tree,

### Typescript

In [5]:
RecursiveCharacterTextSplitter.get_separators_for_language(Language.TS)

['\nenum ',
 '\ninterface ',
 '\nnamespace ',
 '\ntype ',
 '\nclass ',
 '\nfunction ',
 '\nconst ',
 '\nlet ',
 '\nvar ',
 '\nif ',
 '\nfor ',
 '\nwhile ',
 '\nswitch ',
 '\ncase ',
 '\ndefault ',
 '\n\n',
 '\n',
 ' ',
 '']

In [7]:
ts_file_path = Path("examples/circular_queue.ts")
with open(ts_file_path, "r") as file:
    ts_code = file.read()

ts_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.TS, chunk_size=1000, chunk_overlap=0
)

ts_docs = ts_splitter.create_documents([ts_code])
ts_docs

[Document(metadata={}, page_content='/**\n * Circular Queue implementation using array.\n *\n * @template T The type of the elements in the queue.\n * @param {T[]} queue The array that holds the elements of the queue.\n * @param {number} frontIndex The index of the front element of the queue.\n * @param {number} rearIndex The index of the rear element of the queue.\n * @param {number} size The size of the queue.\n */\nexport class CircularQueue<T> {\n  private queue: T[];\n  private frontIndex: number;\n  private rearIndex: number;\n  private size: number;\n\n  constructor(size: number) {\n    this.queue = new Array(size);\n    this.frontIndex = -1;\n    this.rearIndex = -1;\n    this.size = size;\n  }'),
 Document(metadata={}, page_content='/**\n   * Adds an item to the queue.\n   *\n   * @param item The item being added to the queue.\n   */\n  enqueue(item: T): void {\n    if (\n      (this.frontIndex == 0 && this.rearIndex == this.size - 1) ||\n      this.rearIndex == (this.frontInd

### React

In [10]:
tsx_file_path = Path("examples/page.tsx")
with open(tsx_file_path, "r") as file:
    tsx_code = file.read()

tsx_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.TS, chunk_size=1000, chunk_overlap=0
)

tsx_docs = tsx_splitter.create_documents([tsx_code])
tsx_docs

[Document(metadata={}, page_content='import Image from "next/image";\nimport { socialLinks } from "./lib/config";'),
 Document(metadata={}, page_content='export default function Page() {\n  return (\n    <section>\n      <a href={socialLinks.twitter} target="_blank">\n        <Image\n          src="/profile.png"\n          alt="Profile photo"\n          className="rounded-full bg-gray-100 block lg:mt-5 mt-0 lg:mb-5 mb-10 mx-auto sm:float-right sm:ml-5 sm:mb-5 grayscale hover:grayscale-0"\n          unoptimized\n          width={160}\n          height={160}\n          priority\n        />\n      </a>\n      <h1 className="mb-8 text-2xl font-medium">Portfolio template!</h1>\n      <div className="prose prose-neutral dark:prose-invert">\n        <p>\n          A clean, fast, and lightweight portfolio template built with Next.js,\n          Vercel, and Tailwind CSS.\n        </p>\n        <p>\n          Nextfolio has everything you need for a portfolio: MDX blog, SEO, RSS,\n          Atom 