In [1]:
import tiktoken
from typing import List

enc = tiktoken.get_encoding("o200k_base")

In [2]:
# call tool. for example the firecrawl tool
import requests

# Define the API endpoint and parameters
url = "http://localhost:8084/scrape"
params = {
    "url": "https://www.espn.com/nba/scoreboard",
    "formats": ["markdown"],
    "force_fetch": False,
}

# Make the GET request
response = requests.get(url, params=params)

# Check if request was successful
if response.status_code == 200:
    # Get the markdown content
    result = response.json()
    markdown_content = result.get("markdown", "")
    print("Scraped content:")
    print(markdown_content)
else:
    print(f"Error: {response.status_code}")
    print(response.text)

Scraped content:
![](<Base64-Image-Removed>)

Skip to main contentSkip to navigation

Top EventsNBANCAAMNCAAWNHLTop SoccerMLBUFCPGA TourLPGA TourNBA G LeagueNCAA SoftballNCAA BaseballTennis (M)Tennis (W)Men's NCAA Ice HockeyPLL (Lacrosse)NFLNCAAFWNBAWomen's NCAA Ice HockeyNASCARNBA

Feb 15Feb 18Feb 19Feb 19

[Gamecast](https://www.espn.com/nba/game/_/gameId/401748704/hornets-lakers) [Tickets](https://www.vividseats.com/los-angeles-lakers-tickets-cryptocom-arena-1-9-2025--sports-nba-basketball/production/5159010?wsUser=717&wsVar=us~NBA~scoreboard,NBA,en)

[- ![](https://a.espncdn.com/combiner/i?img=/i/teamlogos/nba/500/scoreboard/cha.png&w=32&h=32&scale=crop&cquality=40&location=origin)\\
\\
\\
\\
\\
CHA\\
\\
\\
\\
\\
\\
13-39\\
\\
- ![](https://a.espncdn.com/combiner/i?img=/i/teamlogos/nba/500/scoreboard/lal.png&w=32&h=32&scale=crop&cquality=40&location=origin)\\
\\
\\
\\
\\
LAL\\
\\
\\
\\
\\
\\
32-20\\
\\
\\
Wed\\
\\
5:00 PM](https://www.espn.com/nba/game/_/gameId/401748704/hornets-la

In [3]:
len(markdown_content)

7437

In [4]:
len(enc.encode(markdown_content))


2476

In [5]:
def chunk_markdown_content(markdown_content: str, chunk_size: int = 200) -> List[str]:
    """
    Split markdown content into chunks while trying to preserve content boundaries.
    Cleans up content to reduce token usage.
    """
    # Initialize tokenizer
    enc = tiktoken.get_encoding("cl100k_base")

    # Clean up the content first
    # Replace multiple backslashes with a single newline
    cleaned_content = markdown_content.replace("\\\\", "\n")

    # Remove multiple newlines and excessive spaces
    cleaned_content = "\n".join(
        line.strip() for line in cleaned_content.splitlines() if line.strip()
    )

    # Split content into sections based on meaningful boundaries
    # Using headers, links, or other markdown elements as split points
    sections = []
    current_section = []

    for line in cleaned_content.splitlines():
        # Start new section on headers, links, or after significant content
        if (
            line.startswith("#")
            or line.startswith("[")
            or line.startswith("!")
            or len("".join(current_section)) > 500
        ):  # Character-based backup split
            if current_section:
                sections.append("\n".join(current_section))
                current_section = []
        current_section.append(line)

    # Add the last section if it exists
    if current_section:
        sections.append("\n".join(current_section))

    # Now chunk based on tokens
    chunks = []
    current_chunk = []
    current_chunk_tokens = 0

    for section in sections:
        section_tokens = enc.encode(section)
        section_token_count = len(section_tokens)

        if section_token_count > chunk_size:
            # If current section is too large, split it into smaller pieces
            words = section.split()
            temp_chunk = []
            temp_tokens = 0

            for word in words:
                word_tokens = len(enc.encode(word + " "))
                if temp_tokens + word_tokens > chunk_size:
                    if temp_chunk:
                        chunks.append(" ".join(temp_chunk))
                    temp_chunk = [word]
                    temp_tokens = word_tokens
                else:
                    temp_chunk.append(word)
                    temp_tokens += word_tokens

            if temp_chunk:
                chunks.append(" ".join(temp_chunk))

        elif current_chunk_tokens + section_token_count > chunk_size:
            chunks.append("\n".join(current_chunk))
            current_chunk = [section]
            current_chunk_tokens = section_token_count
        else:
            current_chunk.append(section)
            current_chunk_tokens += section_token_count

    if current_chunk:
        chunks.append("\n".join(current_chunk))

    return chunks

In [6]:
chunks = chunk_markdown_content(markdown_content)
print(f"Number of chunks: {len(chunks)}")

# Create the chunked content with markers
markdown_content_chunked = ""
for i, chunk in enumerate(chunks, 1):
    markdown_content_chunked += f"\n<start chunk {i}>\n"
    markdown_content_chunked += chunk
    markdown_content_chunked += f"\n<end chunk {i}>\n"

print("Total tokens in content:", len(enc.encode(markdown_content)))
print("Total tokens in chunked content:", len(enc.encode(markdown_content_chunked)))

Number of chunks: 15
Total tokens in content: 2476
Total tokens in chunked content: 2565


In [7]:
print(markdown_content_chunked)


<start chunk 1>
![](<Base64-Image-Removed>)
Skip to main contentSkip to navigation
Top EventsNBANCAAMNCAAWNHLTop SoccerMLBUFCPGA TourLPGA TourNBA G LeagueNCAA SoftballNCAA BaseballTennis (M)Tennis (W)Men's NCAA Ice HockeyPLL (Lacrosse)NFLNCAAFWNBAWomen's NCAA Ice HockeyNASCARNBA
Feb 15Feb 18Feb 19Feb 19
[Gamecast](https://www.espn.com/nba/game/_/gameId/401748704/hornets-lakers) [Tickets](https://www.vividseats.com/los-angeles-lakers-tickets-cryptocom-arena-1-9-2025--sports-nba-basketball/production/5159010?wsUser=717&wsVar=us~NBA~scoreboard,NBA,en)
<end chunk 1>

<start chunk 2>
[- ![](https://a.espncdn.com/combiner/i?img=/i/teamlogos/nba/500/scoreboard/cha.png&w=32&h=32&scale=crop&cquality=40&location=origin)
CHA
13-39
- ![](https://a.espncdn.com/combiner/i?img=/i/teamlogos/nba/500/scoreboard/lal.png&w=32&h=32&scale=crop&cquality=40&location=origin)
LAL
32-20
Wed
5:00 PM](https://www.espn.com/nba/game/_/gameId/401748704/hornets-lakers)
[Full Scoreboard »](https://www.espn.com/nba/sco

In [8]:
for i in chunks:
    print(len(enc.encode(i)))


189
167
191
159
37
167
170
183
135
135
140
190
146
188
182


In [9]:
print(
    'The ESPN NBA scoreboard currently highlights an upcoming game between the Charlotte Hornets and the Los Angeles Lakers. The game is scheduled for Wednesday at 5:00 PM. The Hornets have a record of 13-39, while the Lakers stand at 32-20. The game will take place at the crypto.com Arena in Los Angeles, CA, and has been rescheduled from January 9. The spread is LAL -13.5 with a total of 224.5 points.\n\nKey players to watch include:\n- M. Bridges from the Hornets, averaging 19.5 PPG, 7.7 RPG, and 3.8 APG.\n- L. James from the Lakers, averaging 24.3 PPG, 7.7 RPG, and 9.0 APG.\n\nFor more details, you can check the [Gamecast](https://www.espn.com/nba/game/_/gameId/401748704/hornets-lakers) or purchase [tickets](https://www.vividseats.com/los-angeles-lakers-tickets-cryptocom-arena-1-9-2025--sports-nba-basketball/production/5159010?wsUser=717&wsVar=us~nba~scoreboard,nba,en).\n\n```json\n{\n  "retain_chunks": {\n    "tool_call_id": "scrape_urls",\n    "chunkIds": [\n      "chunk_1",\n      "chunk_2",\n      "chunk_3",\n      "chunk_4",\n      "chunk_5",\n      "chunk_6",\n      "chunk_7",\n      "chunk_8"\n    ]\n  }\n}\n```'
)

The ESPN NBA scoreboard currently highlights an upcoming game between the Charlotte Hornets and the Los Angeles Lakers. The game is scheduled for Wednesday at 5:00 PM. The Hornets have a record of 13-39, while the Lakers stand at 32-20. The game will take place at the crypto.com Arena in Los Angeles, CA, and has been rescheduled from January 9. The spread is LAL -13.5 with a total of 224.5 points.

Key players to watch include:
- M. Bridges from the Hornets, averaging 19.5 PPG, 7.7 RPG, and 3.8 APG.
- L. James from the Lakers, averaging 24.3 PPG, 7.7 RPG, and 9.0 APG.

For more details, you can check the [Gamecast](https://www.espn.com/nba/game/_/gameId/401748704/hornets-lakers) or purchase [tickets](https://www.vividseats.com/los-angeles-lakers-tickets-cryptocom-arena-1-9-2025--sports-nba-basketball/production/5159010?wsUser=717&wsVar=us~nba~scoreboard,nba,en).

```json
{
  "retain_chunks": {
    "tool_call_id": "scrape_urls",
    "chunkIds": [
      "chunk_1",
      "chunk_2",
      