<center>
<h3></h3>
<img src="https://github.com/always-further/deepfabric/raw/main/assets/logo-light.png" width="20%">
<h3>DeepFabric Dataset Compatibility Checker</h3>
<!-- CTA Buttons -->
<p>
  <a href="https://github.com/always-further/deepfabric/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22">
    <img src="https://img.shields.io/badge/Contribute-Good%20First%20Issues-green?style=for-the-badge&logo=github" alt="Good First Issues"/>
  </a>
  &nbsp;
  <a href="https://discord.gg/pPcjYzGvbS">
    <img src="https://img.shields.io/badge/Chat-Join%20Discord-7289da?style=for-the-badge&logo=discord&logoColor=white" alt="Join Discord"/>
  </a>
</p>
<!-- Badges -->
<p>
  <a href="https://opensource.org/licenses/Apache-2.0">
    <img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="License"/>
  </a>
  <a href="https://github.com/always-further/deepfabric/actions/workflows/test.yml">
    <img src="https://github.com/always-further/deepfabric/actions/workflows/test.yml/badge.svg" alt="CI Status"/>
  </a>
  <a href="https://pypi.org/project/deepfabric/">
    <img src="https://img.shields.io/pypi/v/deepfabric.svg" alt="PyPI Version"/>
  </a>
  <a href="https://pepy.tech/project/deepfabric">
    <img src="https://static.pepy.tech/badge/deepfabric" alt="Downloads"/>
  </a>
  <a href="https://discord.gg/pPcjYzGvbS">
    <img src="https://img.shields.io/discord/1384081906773131274?color=7289da&label=Discord&logo=discord&logoColor=white" alt="Discord"/>
  </a>
</p>
</center>

Test if your DeepFabric dataset is compatible with a model's chat template.

If you need support with any issues, raise an issue on the [DeepFabric GitHub](https://github.com/always-further/deepfabric/issues/new/) or join the [DeepFabric Discord](https://discord.gg/pPcjYzGvbS).

In [13]:
# Configuration
model_name = "Qwen/Qwen3-4B-Thinking-2507"
dataset_name = "alwaysfurther/coding-test-dataset"
dataset_split = "train"

In [14]:
from datasets import load_dataset
from transformers import AutoTokenizer

print(f"Loading dataset: {dataset_name}")
dataset = load_dataset(dataset_name, split=dataset_split)
print(f"Loaded {len(dataset)} rows")

print(f"\nLoading tokenizer: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
print("Tokenizer loaded")

Loading dataset: alwaysfurther/coding-test-dataset
Loaded 40 rows

Loading tokenizer: Qwen/Qwen3-4B-Thinking-2507
Tokenizer loaded


In [None]:
import json
import random


def clean_message(msg):
    """Remove null values and empty tool_calls from a message."""
    cleaned = {}
    for key, value in msg.items():
        if value is None:
            continue
        if key == "tool_calls" and value == []:
            continue
        cleaned[key] = value
    return cleaned


def format_messages(messages):
    """Format messages for chat template."""
    return [clean_message(msg) for msg in messages]


def quick_compatibility_test(dataset, tokenizer, sample_size=10):
    """Quick test: sample rows and check if chat template works."""
    total = len(dataset)
    indices = random.sample(range(total), min(sample_size, total))

    passed = 0
    failed = 0
    errors = []

    for idx in indices:
        row = dataset[idx]
        messages = row.get("messages", [])
        tools = row.get("tools")

        try:
            formatted = format_messages(messages)
            if tools:
                tokenizer.apply_chat_template(formatted, tools=tools, tokenize=False)
            else:
                tokenizer.apply_chat_template(formatted, tokenize=False)
            passed += 1
        except Exception as e:
            failed += 1
            if len(errors) < 3:  # Keep first 3 unique errors
                err_msg = f"{type(e).__name__}: {str(e)[:100]}"
                if err_msg not in [e[1] for e in errors]:
                    errors.append((idx, err_msg))

    return passed, failed, errors


# Run quick test
print("=" * 60)
print("QUICK COMPATIBILITY TEST")
print("=" * 60)
print(f"Model:   {model_name}")
print(f"Dataset: {dataset_name}")
print()

passed, failed, errors = quick_compatibility_test(dataset, tokenizer, sample_size=20)

if failed == 0:
    print(f"PASS - All {passed} sampled rows work with this chat template")
else:
    print(f"FAIL - {failed}/{passed+failed} sampled rows failed")
    print()
    print("Errors encountered:")
    for idx, err in errors:
        print(f"  Row {idx}: {err}")

print("=" * 60)

QUICK COMPATIBILITY TEST
Model:   Qwen/Qwen3-4B-Thinking-2507
Dataset: alwaysfurther/coding-test-dataset

PASS - All 20 sampled rows work with this chat template


---
## Detailed Analysis

Run the cells below for more detailed diagnostics.

In [None]:
def analyze_dataset(dataset):
    """Analyze dataset structure and capabilities."""
    total = len(dataset)

    counts = {
        "messages": 0,
        "tools": 0,
        "tool_calls": 0,
        "reasoning": 0,
        "thinking_tags": 0,
        "empty_tool_calls": 0,
    }

    for row in dataset:
        messages = row.get("messages", [])
        if messages:
            counts["messages"] += 1

            for msg in messages:
                if msg.get("role") == "assistant":
                    tc = msg.get("tool_calls")
                    if tc:
                        counts["tool_calls"] += 1
                        break
                    if tc == []:
                        counts["empty_tool_calls"] += 1
                    content = msg.get("content", "") or ""
                    if "<think>" in content:
                        counts["thinking_tags"] += 1
                        break

        if row.get("tools"):
            counts["tools"] += 1
        if row.get("reasoning"):
            counts["reasoning"] += 1

    print("Dataset Analysis")
    print("-" * 40)
    print(f"Total rows: {total}")
    print(f"Fields: {', '.join(dataset.column_names)}")
    print()
    print("Capabilities:")
    print(f"  Conversations:     {counts['messages']:>5} ({100*counts['messages']/total:.0f}%)")
    print(f"  Tool definitions:  {counts['tools']:>5} ({100*counts['tools']/total:.0f}%)")
    print(f"  Tool calls:        {counts['tool_calls']:>5} ({100*counts['tool_calls']/total:.0f}%)")
    print(f"  Reasoning:         {counts['reasoning']:>5} ({100*counts['reasoning']/total:.0f}%)")
    print(f"  Thinking tags:     {counts['thinking_tags']:>5} ({100*counts['thinking_tags']/total:.0f}%)")

    if counts["empty_tool_calls"] > 0:
        print()
        print(f"  [!] Empty tool_calls arrays: {counts['empty_tool_calls']}")
        print("      (Some models fail on empty tool_calls - these are stripped during formatting)")

    return counts


analysis = analyze_dataset(dataset)

Dataset Analysis
----------------------------------------
Total rows: 40
Fields: messages, metadata, reasoning, tool_context, tools, agent_context

Capabilities:
  Conversations:        40 (100%)
  Tool definitions:     40 (100%)
  Tool calls:           40 (100%)
  Reasoning:            40 (100%)
  Thinking tags:         0 (0%)


In [None]:
def test_row(row_index):
    """Test a specific row and show detailed output."""
    if row_index < 0 or row_index >= len(dataset):
        print(f"Error: index {row_index} out of range (0-{len(dataset)-1})")
        return

    row = dataset[row_index]
    messages = row.get("messages", [])
    tools = row.get("tools")

    print(f"Row {row_index}")
    print("-" * 40)

    # Summary
    print("Fields:", {k: type(v).__name__ if v else None for k, v in row.items()})
    print(f"Messages: {len(messages)}")
    if tools:
        print(f"Tools: {len(tools)}")
    print()

    # Show messages
    print("Conversation:")
    for i, msg in enumerate(messages[:6]):
        role = msg.get("role", "?")
        content = (msg.get("content") or "")[:60]
        extras = []
        if msg.get("tool_calls"):
            extras.append(f"tool_calls={len(msg['tool_calls'])}")
        elif msg.get("tool_calls") == []:
            extras.append("tool_calls=[] (empty!)")
        if msg.get("tool_call_id"):
            extras.append(f"tool_call_id={msg['tool_call_id']}")
        extra_str = f" [{', '.join(extras)}]" if extras else ""
        print(f"  {i}. {role}: {content}...{extra_str}" if len(content) == 60 else f"  {i}. {role}: {content}{extra_str}")
    if len(messages) > 6:
        print(f"  ... ({len(messages) - 6} more)")

    # Test template
    print()
    print("Applying chat template...")
    formatted = format_messages(messages)

    try:
        if tools:
            result = tokenizer.apply_chat_template(formatted, tools=tools, tokenize=False)
        else:
            result = tokenizer.apply_chat_template(formatted, tokenize=False)

        print("SUCCESS")
        print()
        print("Output preview:")
        print(result[:1500] + "..." if len(result) > 1500 else result)

    except Exception as e:
        print(f"FAILED: {type(e).__name__}: {e}")
        print()
        print("Formatted messages:")
        print(json.dumps(formatted, indent=2))


# Test row 0 by default
test_row(0)

Row 0
----------------------------------------
Fields: {'messages': 'list', 'metadata': 'dict', 'reasoning': 'dict', 'tool_context': 'dict', 'tools': 'list', 'agent_context': 'dict'}
Messages: 17
Tools: 3

Conversation:
  0. system: You are an AI assistant with access to file system tools.
Wh...
  1. user: Can you help me identify the configuration files related to ...
  2. assistant:  [tool_calls=1]
  3. tool: ["Dockerfile","main.py","config.json","terraform/main.tf","s... [tool_call_id=mq6emkm13]
  4. assistant:  [tool_calls=1]
  5. tool: {
  "version": "1.0.0",
  "debug": true,
  "max_retries": 3
... [tool_call_id=futTCarvJ]
  ... (11 more)

Applying chat template...
SUCCESS

Output preview:
<|im_start|>system
You are an AI assistant with access to file system tools.
When given a task:
1. Analyze what files need to be read or modified
2. Execute file operations with proper paths
3. Interpret results and provide clear answers


# Tools

You may call one or more functions to assist wi

In [None]:
def find_example(dataset, capability):
    """Find an example row for a specific capability."""
    for row in dataset:
        messages = row.get("messages", [])

        if capability == "basic" and messages and len(messages) >= 2:
            return row

        if capability == "tools" and row.get("tools"):
            return row

        if capability == "tool_calls":
            for msg in messages:
                if msg.get("role") == "assistant" and msg.get("tool_calls"):
                    return row

        if capability == "reasoning":
            if row.get("reasoning"):
                return row
            for msg in messages:
                if "<think>" in (msg.get("content") or ""):
                    return row

    return None


def probe_capabilities(dataset, tokenizer):
    """Test each capability type."""
    capabilities = ["basic", "tools", "tool_calls", "reasoning"]
    results = {}

    print("Capability Probe")
    print("-" * 50)
    print(f"{'Capability':<15} | {'In Dataset':<10} | {'Template':<10} | Status")
    print("-" * 50)

    for cap in capabilities:
        row = find_example(dataset, cap)
        has_data = row is not None

        if not has_data:
            results[cap] = "skip"
            print(f"{cap:<15} | {'No':<10} | {'N/A':<10} | SKIP")
            continue

        messages = row.get("messages", [])
        tools = row.get("tools") if cap == "tools" else None
        formatted = format_messages(messages)

        try:
            if tools:
                tokenizer.apply_chat_template(formatted, tools=tools, tokenize=False)
            else:
                tokenizer.apply_chat_template(formatted, tokenize=False)
            results[cap] = "pass"
            print(f"{cap:<15} | {'Yes':<10} | {'Yes':<10} | PASS")
        except Exception as e:
            results[cap] = f"fail: {e}"
            print(f"{cap:<15} | {'Yes':<10} | {'No':<10} | FAIL")

    print("-" * 50)
    return results


probe_results = probe_capabilities(dataset, tokenizer)

Capability Probe
--------------------------------------------------
Capability      | In Dataset | Template   | Status
--------------------------------------------------
basic           | Yes        | Yes        | PASS
tools           | Yes        | Yes        | PASS
tool_calls      | Yes        | Yes        | PASS
reasoning       | Yes        | Yes        | PASS
--------------------------------------------------


In [None]:
def show_example(capability):
    """Show raw vs formatted for a capability."""
    row = find_example(dataset, capability)
    if not row:
        print(f"No {capability} example in dataset")
        return

    messages = row.get("messages", [])
    formatted = format_messages(messages)

    print(f"Example: {capability}")
    print("=" * 50)
    print()
    print("Raw (first 2 messages):")
    print(json.dumps(messages[:2], indent=2))
    print()
    print("Formatted (first 2 messages):")
    print(json.dumps(formatted[:2], indent=2))

    if row.get("tools"):
        print()
        print("Tools (first one):")
        print(json.dumps(row["tools"][:1], indent=2))


# Show an example - change to "tools", "tool_calls", or "reasoning"
show_example("basic")

Example: basic

Raw (first 2 messages):
[
  {
    "content": "You are an AI assistant with access to file system tools.\nWhen given a task:\n1. Analyze what files need to be read or modified\n2. Execute file operations with proper paths\n3. Interpret results and provide clear answers\n",
    "role": "system",
    "tool_calls": null,
    "tool_call_id": null
  },
  {
    "content": "Can you help me identify the configuration files related to the service mesh observability in my cloud native platform, and guide me on how to enable more observability features?",
    "role": "user",
    "tool_calls": null,
    "tool_call_id": null
  }
]

Formatted (first 2 messages):
[
  {
    "content": "You are an AI assistant with access to file system tools.\nWhen given a task:\n1. Analyze what files need to be read or modified\n2. Execute file operations with proper paths\n3. Interpret results and provide clear answers\n",
    "role": "system"
  },
  {
    "content": "Can you help me identify the conf