diff --git a/chatkit/__init__.py b/chatkit/__init__.py index e69de29..4c9d367 100644 --- a/chatkit/__init__.py +++ b/chatkit/__init__.py @@ -0,0 +1,168 @@ +""" +ChatKit Python SDK. + +ChatKit is a flexible backend SDK for building realtime chat experiences +with OpenAI integration, rich widgets, and comprehensive i18n/RTL support. +""" + +from .actions import Action, ActionConfig +from .agents import AgentContext, ThreadItemConverter, stream_agent_response +from .errors import CustomStreamError, ErrorCode, StreamError +from .server import ChatKitServer +from .store import AttachmentStore, NotFoundError, Store +from .types import ( + AssistantMessageItem, + Attachment, + ClientToolCallItem, + EndOfTurnItem, + HiddenContextItem, + Page, + Task, + TaskItem, + Thread, + ThreadItem, + ThreadMetadata, + UserMessageInput, + UserMessageItem, + WidgetItem, + Workflow, + WorkflowItem, +) +from .version import __version__ +from .widgets import ( + Badge, + Box, + Button, + Caption, + Card, + Chart, + Checkbox, + Col, + DatePicker, + Divider, + Form, + Icon, + Image, + Input, + Label, + ListView, + ListViewItem, + Markdown, + RadioGroup, + Row, + Select, + Spacer, + Text, + Textarea, + Title, + Transition, + WidgetComponent, + WidgetRoot, +) + +# i18n and RTL support +from .i18n import ( + auto_detect_and_set_direction, + calculate_rtl_ratio, + contains_arabic_characters, + contains_hebrew_characters, + contains_rtl_characters, + detect_text_direction, + get_language_direction, + guess_language_from_text, + is_mixed_direction_text, + is_rtl_language, +) +from .bidi import ( + TextSegment, + clean_bidi_markers, + fix_arabic_numbers_in_text, + normalize_mixed_text, + split_mixed_direction_text, + wrap_with_direction_markers, +) + +__all__ = [ + # Version + "__version__", + # Core + "ChatKitServer", + "Store", + "AttachmentStore", + "NotFoundError", + # Agents + "AgentContext", + "ThreadItemConverter", + "stream_agent_response", + # Actions + "Action", + "ActionConfig", + # Errors + "StreamError", + "CustomStreamError", + "ErrorCode", + # Types + "Thread", + "ThreadMetadata", + "ThreadItem", + "Page", + "UserMessageInput", + "UserMessageItem", + "AssistantMessageItem", + "ClientToolCallItem", + "EndOfTurnItem", + "HiddenContextItem", + "WidgetItem", + "TaskItem", + "Task", + "WorkflowItem", + "Workflow", + "Attachment", + # Widgets + "WidgetRoot", + "WidgetComponent", + "Card", + "ListView", + "ListViewItem", + "Text", + "Title", + "Caption", + "Markdown", + "Button", + "Input", + "Textarea", + "Label", + "Select", + "Checkbox", + "RadioGroup", + "DatePicker", + "Box", + "Row", + "Col", + "Form", + "Divider", + "Icon", + "Image", + "Badge", + "Spacer", + "Chart", + "Transition", + # i18n + "is_rtl_language", + "get_language_direction", + "contains_rtl_characters", + "contains_arabic_characters", + "contains_hebrew_characters", + "calculate_rtl_ratio", + "detect_text_direction", + "guess_language_from_text", + "is_mixed_direction_text", + "auto_detect_and_set_direction", + # bidi + "TextSegment", + "wrap_with_direction_markers", + "fix_arabic_numbers_in_text", + "split_mixed_direction_text", + "normalize_mixed_text", + "clean_bidi_markers", +] diff --git a/chatkit/bidi.py b/chatkit/bidi.py new file mode 100644 index 0000000..5683b8b --- /dev/null +++ b/chatkit/bidi.py @@ -0,0 +1,415 @@ +""" +Bidirectional text (bidi) utilities for ChatKit. + +This module provides utilities for handling Unicode Bidirectional Algorithm +and mixed-direction text, which is essential for proper RTL support. +""" + +from __future__ import annotations + +import re +from typing import Literal, cast + +# Unicode Bidirectional Formatting Characters +RLM = "\u200F" # Right-to-Left Mark +LRM = "\u200E" # Left-to-Right Mark +RLE = "\u202B" # Right-to-Left Embedding +LRE = "\u202A" # Left-to-Right Embedding +PDF = "\u202C" # Pop Directional Formatting +RLO = "\u202E" # Right-to-Left Override +LRO = "\u202D" # Left-to-Right Override + +# Unicode line separator +ALM = "\u061C" # Arabic Letter Mark + + +class TextSegment: + """ + Represents a segment of text with a specific direction. + + Attributes: + text: The text content + direction: The text direction ('ltr' or 'rtl') + start_index: Starting position in the original text + end_index: Ending position in the original text + """ + + def __init__( + self, + text: str, + direction: Literal["ltr", "rtl"], + start_index: int = 0, + end_index: int | None = None, + ): + self.text = text + self.direction = direction + self.start_index = start_index + self.end_index = end_index or len(text) + + def __repr__(self) -> str: + return f"TextSegment(text={self.text!r}, direction={self.direction!r})" + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TextSegment): + return False + return ( + self.text == other.text + and self.direction == other.direction + and self.start_index == other.start_index + and self.end_index == other.end_index + ) + + +def wrap_with_direction_markers( + text: str, direction: Literal["ltr", "rtl"] +) -> str: + """ + Wrap text with Unicode directional markers. + + This ensures the text is rendered in the correct direction + regardless of the surrounding context. + + Args: + text: Text to wrap + direction: Desired direction ('ltr' or 'rtl') + + Returns: + Text wrapped with appropriate Unicode markers + + Examples: + >>> wrap_with_direction_markers('Hello', 'ltr') + '\\u202aHello\\u202c' + >>> wrap_with_direction_markers('مرحبا', 'rtl') + '\\u202bمرحبا\\u202c' + """ + if not text: + return text + + if direction == "rtl": + return f"{RLE}{text}{PDF}" + else: + return f"{LRE}{text}{PDF}" + + +def add_direction_mark(text: str, direction: Literal["ltr", "rtl"]) -> str: + """ + Add a directional mark to the end of text. + + This is useful for inline text that needs directional hints. + + Args: + text: Text to mark + direction: Desired direction + + Returns: + Text with directional mark appended + """ + if not text: + return text + + mark = RLM if direction == "rtl" else LRM + return f"{text}{mark}" + + +def isolate_rtl_text(text: str) -> str: + """ + Isolate RTL text to prevent it from affecting surrounding LTR text. + + This is particularly useful when RTL text is embedded in LTR context. + + Args: + text: RTL text to isolate + + Returns: + Isolated text with appropriate markers + """ + if not text: + return text + + return f"{RLM}{text}{RLM}" + + +def fix_arabic_numbers_in_text(text: str) -> str: + """ + Fix the display of numbers in Arabic text. + + Numbers in RTL text should be displayed LTR, but they need + proper markers to render correctly. + + Args: + text: Text containing numbers + + Returns: + Text with properly marked numbers + + Examples: + >>> fix_arabic_numbers_in_text('عدد 123 شيء') + 'عدد \\u202a123\\u202c شيء' + """ + if not text: + return text + + # Find all numbers in the text + def replace_number(match): + number = match.group(0) + # Wrap number with LTR embedding + return f"{LRE}{number}{PDF}" + + # Pattern to match numbers (including decimals and separators) + number_pattern = r"\d+(?:[.,]\d+)*" + return re.sub(number_pattern, replace_number, text) + + +def split_mixed_direction_text(text: str) -> list[TextSegment]: + """ + Split text into segments based on direction changes. + + This is useful for processing mixed-direction text segment by segment. + + Args: + text: Text to split + + Returns: + List of TextSegment objects + + Examples: + >>> segments = split_mixed_direction_text('Hello مرحبا World') + >>> len(segments) + 3 + >>> segments[1].direction + 'rtl' + """ + if not text: + return [] + + from .i18n import RTL_PATTERN + + segments = [] + current_pos = 0 + current_direction = None + segment_start = 0 + + for i, char in enumerate(text): + if char.isspace(): + continue + + # Determine character direction + is_rtl = bool(RTL_PATTERN.match(char)) + char_direction = "rtl" if is_rtl else "ltr" + + # If direction changes, create a new segment + if current_direction is None: + current_direction = char_direction + segment_start = i + elif current_direction != char_direction: + # Save the previous segment + segment_text = text[segment_start:i].strip() + if segment_text: + segments.append( + TextSegment( + segment_text, current_direction, segment_start, i + ) + ) + # Start new segment + current_direction = char_direction + segment_start = i + + # Add the last segment + if current_direction is not None: + segment_text = text[segment_start:].strip() + if segment_text: + segments.append( + TextSegment(segment_text, current_direction, segment_start, len(text)) + ) + + return segments + + +def normalize_mixed_text(text: str) -> str: + """ + Normalize mixed-direction text for proper display. + + This function splits the text into directional segments + and wraps each with appropriate markers. + + Args: + text: Mixed-direction text + + Returns: + Normalized text with proper directional markers + + Examples: + >>> normalize_mixed_text('Hello مرحبا World') + '\\u202aHello\\u202c \\u202bمرحبا\\u202c \\u202aWorld\\u202c' + """ + if not text: + return text + + segments = split_mixed_direction_text(text) + + if not segments: + return text + + # If only one segment, wrap it + if len(segments) == 1: + seg_dir = cast(Literal["ltr", "rtl"], segments[0].direction) + return wrap_with_direction_markers(text, seg_dir) + + # Multiple segments - normalize each + result_parts = [] + last_end = 0 + + for segment in segments: + # Add any whitespace between segments + if segment.start_index > last_end: + result_parts.append(text[last_end : segment.start_index]) + + # Add wrapped segment + seg_direction = cast(Literal["ltr", "rtl"], segment.direction) + result_parts.append( + wrap_with_direction_markers(segment.text, seg_direction) + ) + last_end = segment.end_index + + # Add any trailing whitespace + if last_end < len(text): + result_parts.append(text[last_end:]) + + return "".join(result_parts) + + +def prepare_rtl_text_for_streaming(text: str) -> str: + """ + Prepare RTL text for streaming display. + + When streaming RTL text character by character, it's important + to maintain proper directionality throughout. + + Args: + text: RTL text to prepare + + Returns: + Text prepared for streaming + """ + if not text: + return text + + # Add RLM at the start to establish RTL context + return f"{RLM}{text}" + + +def clean_bidi_markers(text: str) -> str: + """ + Remove all Unicode bidirectional markers from text. + + This is useful for getting the plain text content + without formatting characters. + + Args: + text: Text with bidi markers + + Returns: + Clean text without markers + + Examples: + >>> clean_bidi_markers('\\u202aHello\\u202c') + 'Hello' + """ + if not text: + return text + + # Remove all bidi control characters + bidi_chars = [RLM, LRM, RLE, LRE, PDF, RLO, LRO, ALM] + result = text + for char in bidi_chars: + result = result.replace(char, "") + + return result + + +def ensure_rtl_punctuation(text: str) -> str: + """ + Ensure punctuation in RTL text is properly positioned. + + In RTL text, punctuation marks should be mirrored or positioned + correctly according to the RTL context. + + Args: + text: RTL text + + Returns: + Text with properly positioned punctuation + """ + if not text: + return text + + # Map of LTR punctuation to RTL equivalents where applicable + punctuation_map = { + "(": ")", + ")": "(", + "[": "]", + "]": "[", + "{": "}", + "}": "{", + "<": ">", + ">": "<", + } + + result = [] + for char in text: + if char in punctuation_map: + result.append(punctuation_map[char]) + else: + result.append(char) + + return "".join(result) + + +def get_visual_length(text: str) -> int: + """ + Get the visual length of text (excluding bidi markers). + + Args: + text: Text to measure + + Returns: + Visual length (number of visible characters) + """ + return len(clean_bidi_markers(text)) + + +def is_weak_character(char: str) -> bool: + """ + Check if a character is weak in bidirectional context. + + Weak characters include numbers, punctuation, and whitespace. + + Args: + char: Character to check + + Returns: + True if character is weak + """ + return not char.isalpha() or char.isspace() + + +def wrap_rtl_text_safely(text: str) -> str: + """ + Safely wrap RTL text with directional markers. + + This function handles edge cases like numbers and mixed content. + + Args: + text: RTL text to wrap + + Returns: + Safely wrapped text + """ + if not text: + return text + + # First fix numbers + text_with_fixed_numbers = fix_arabic_numbers_in_text(text) + + # Then wrap the whole text + return wrap_with_direction_markers(text_with_fixed_numbers, "rtl") diff --git a/chatkit/i18n.py b/chatkit/i18n.py new file mode 100644 index 0000000..cdf2f59 --- /dev/null +++ b/chatkit/i18n.py @@ -0,0 +1,255 @@ +from __future__ import annotations + +import re +from typing import Literal + +# RTL language codes (ISO 639-1) +RTL_LANGUAGES = { + "ar", # Arabic + "he", # Hebrew + "fa", # Persian (Farsi) + "ur", # Urdu + "yi", # Yiddish + "arc", # Aramaic + "ckb", # Sorani Kurdish + "dv", # Dhivehi + "iw", # Hebrew (old code) + "ji", # Yiddish (old code) + "ps", # Pashto + "sd", # Sindhi +} + +# Unicode ranges for RTL scripts +ARABIC_RANGE = r"[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]" +HEBREW_RANGE = r"[\u0590-\u05FF]" +RTL_RANGE = ( + r"[\u0590-\u05FF\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]" +) + +# Compiled regex patterns for performance +ARABIC_PATTERN = re.compile(ARABIC_RANGE) +HEBREW_PATTERN = re.compile(HEBREW_RANGE) +RTL_PATTERN = re.compile(RTL_RANGE) + + +def is_rtl_language(lang_code: str) -> bool: + """ + Check if a language code represents an RTL language. + + Args: + lang_code: ISO 639-1 language code (e.g., 'ar', 'he', 'en') + + Returns: + True if the language is RTL, False otherwise + + """ + if not lang_code: + return False + # Extract base language code (e.g., 'ar' from 'ar-SA') + base_code = lang_code.lower().split("-")[0].split("_")[0] + return base_code in RTL_LANGUAGES + + +def get_language_direction(lang_code: str) -> Literal["ltr", "rtl"]: + """ + Get the text direction for a given language code. + + Args: + lang_code: ISO 639-1 language code + + Returns: + 'rtl' for RTL languages, 'ltr' for LTR languages + + """ + return "rtl" if is_rtl_language(lang_code) else "ltr" + + +def contains_rtl_characters(text: str) -> bool: + """ + Check if the text contains any RTL characters. + + Args: + text: Text to analyze + + Returns: + True if text contains RTL characters, False otherwise + + """ + if not text: + return False + return bool(RTL_PATTERN.search(text)) + + +def contains_arabic_characters(text: str) -> bool: + """ + Check if the text contains Arabic characters. + + Args: + text: Text to analyze + + Returns: + True if text contains Arabic characters, False otherwise + """ + if not text: + return False + return bool(ARABIC_PATTERN.search(text)) + + +def contains_hebrew_characters(text: str) -> bool: + """ + Check if the text contains Hebrew characters. + + """ + if not text: + return False + return bool(HEBREW_PATTERN.search(text)) + + +def calculate_rtl_ratio(text: str, sample_size: int = 100) -> float: + """ + Calculate the ratio of RTL characters in the text. + + """ + if not text: + return 0.0 + + # Sample the beginning of the text for performance + sample = text[:sample_size] + + # Remove whitespace and punctuation for accurate counting + chars = [c for c in sample if c.strip() and c.isalnum()] + + if not chars: + return 0.0 + + rtl_count = sum(1 for c in chars if RTL_PATTERN.match(c)) + return rtl_count / len(chars) + + +def detect_text_direction( + text: str, threshold_rtl: float = 0.3, threshold_ltr: float = 0.1 +) -> Literal["ltr", "rtl", "auto"]: + """ + Automatically detect the text direction based on content. + + The function analyzes the text and determines if it's primarily RTL, LTR, + or mixed (auto). The detection is based on the ratio of RTL characters. + + Args: + text: Text to analyze + threshold_rtl: Minimum ratio of RTL chars to classify as RTL (default: 0.3) + threshold_ltr: Maximum ratio of RTL chars to classify as LTR (default: 0.1) + + Returns: + 'rtl' if text is primarily RTL + 'ltr' if text is primarily LTR + 'auto' if text is mixed or uncertain + + """ + if not text or not text.strip(): + return "auto" + + rtl_ratio = calculate_rtl_ratio(text) + + if rtl_ratio >= threshold_rtl: + return "rtl" + elif rtl_ratio <= threshold_ltr: + return "ltr" + else: + return "auto" + + +def guess_language_from_text(text: str) -> str | None: + """ + Attempt to guess the language from text content. + + This is a simple heuristic based on script detection. + For more accurate detection, consider using external libraries. + + Args: + text: Text to analyze + + Returns: + Language code if detected, None otherwise + + """ + if not text: + return None + + if contains_arabic_characters(text): + return "ar" + elif contains_hebrew_characters(text): + return "he" + + # For other RTL languages or LTR languages, we cannot determine accurately + # without more sophisticated language detection + return None + + +def is_mixed_direction_text(text: str) -> bool: + """ + Check if text contains both RTL and LTR characters. + + Args: + text: Text to analyze + + Returns: + True if text contains both RTL and LTR characters + + """ + if not text: + return False + + has_rtl = contains_rtl_characters(text) + # Check for basic Latin characters as LTR indicator + has_ltr = bool(re.search(r"[a-zA-Z]", text)) + + return has_rtl and has_ltr + + +def normalize_text_direction(text: str, force_direction: str | None = None) -> str: + + if not text: + return text + + # Import bidi utilities + from .bidi import wrap_with_direction_markers + + if force_direction: + return wrap_with_direction_markers(text, force_direction) + + # Auto-detect and normalize + direction = detect_text_direction(text) + if direction != "auto": + return wrap_with_direction_markers(text, direction) + + return text + + +# Convenience functions for common use cases +def auto_detect_and_set_direction( + text: str, +) -> tuple[str, Literal["ltr", "rtl", "auto"], str | None]: + """ + Automatically detect text direction and guess language. + + This is a convenience function that combines direction detection + and language guessing. + + Args: + text: Text to analyze + + Returns: + Tuple of (normalized_text, direction, language_code) + + Examples: + >>> auto_detect_and_set_direction('مرحبا') + ('مرحبا', 'rtl', 'ar') + >>> auto_detect_and_set_direction('Hello') + ('Hello', 'ltr', None) + """ + direction = detect_text_direction(text) + language = guess_language_from_text(text) + normalized = normalize_text_direction(text, direction if direction != "auto" else None) + + return normalized, direction, language diff --git a/chatkit/widgets.py b/chatkit/widgets.py index 7477da2..4e32dd7 100644 --- a/chatkit/widgets.py +++ b/chatkit/widgets.py @@ -229,6 +229,10 @@ class ListView(WidgetComponentBase): """Optional status header displayed above the list.""" theme: Literal["light", "dark"] | None = None """Force light or dark theme for this subtree.""" + dir: Literal["ltr", "rtl", "auto"] | None = None + """Text direction for list content.""" + lang: str | None = None + """Language code for list content.""" class CardAction(TypedDict): @@ -269,6 +273,10 @@ class Card(WidgetComponentBase): """Cancel action button shown in the card footer.""" theme: Literal["light", "dark"] | None = None """Force light or dark theme for this subtree.""" + dir: Literal["ltr", "rtl", "auto"] | None = None + """Text direction for card content.""" + lang: str | None = None + """Language code for card content.""" class Markdown(WidgetComponentBase): @@ -279,6 +287,10 @@ class Markdown(WidgetComponentBase): """Markdown source string to render.""" streaming: bool | None = None """Applies streaming-friendly transitions for incremental updates.""" + dir: Literal["ltr", "rtl", "auto"] | None = None + """Text direction: 'ltr' for left-to-right, 'rtl' for right-to-left, 'auto' for automatic detection.""" + lang: str | None = None + """Language code (ISO 639-1), e.g., 'ar' for Arabic, 'en' for English, 'he' for Hebrew.""" class Text(WidgetComponentBase): @@ -317,6 +329,10 @@ class Text(WidgetComponentBase): """Limit text to a maximum number of lines (line clamp).""" editable: Literal[False] | EditableProps | None = None """Enable inline editing for this text node.""" + dir: Literal["ltr", "rtl", "auto"] | None = None + """Text direction for RTL language support.""" + lang: str | None = None + """Language code (ISO 639-1) for proper text rendering and accessibility.""" class Title(WidgetComponentBase): @@ -343,6 +359,10 @@ class Title(WidgetComponentBase): """Truncate overflow with ellipsis.""" maxLines: int | None = None """Limit text to a maximum number of lines (line clamp).""" + dir: Literal["ltr", "rtl", "auto"] | None = None + """Text direction for RTL language support.""" + lang: str | None = None + """Language code for proper text rendering.""" class Caption(WidgetComponentBase): @@ -369,6 +389,10 @@ class Caption(WidgetComponentBase): """Truncate overflow with ellipsis.""" maxLines: int | None = None """Limit text to a maximum number of lines (line clamp).""" + dir: Literal["ltr", "rtl", "auto"] | None = None + """Text direction for RTL language support.""" + lang: str | None = None + """Language code for proper text rendering.""" class Badge(WidgetComponentBase): @@ -616,6 +640,10 @@ class Button(WidgetComponentBase): """Extend the button to 100% of the available width.""" disabled: bool | None = None """Disable interactions and apply disabled styles.""" + dir: Literal["ltr", "rtl", "auto"] | None = None + """Text direction for button label.""" + lang: str | None = None + """Language code for button label.""" class Spacer(WidgetComponentBase): @@ -753,6 +781,10 @@ class Input(WidgetComponentBase): """Controls gutter on the edges of the input; overrides value from `size`.""" pill: bool | None = None """Determines if the input should be fully rounded (pill).""" + dir: Literal["ltr", "rtl", "auto"] | None = None + """Text direction for input text.""" + lang: str | None = None + """Language code for keyboard layout and spell-check.""" class Label(WidgetComponentBase): @@ -777,6 +809,10 @@ class Label(WidgetComponentBase): Primitive color token: e.g. `red-100`, `blue-900`, `gray-500` """ + dir: Literal["ltr", "rtl", "auto"] | None = None + """Text direction for label text.""" + lang: str | None = None + """Language code for label text.""" class RadioOption(TypedDict): @@ -846,6 +882,10 @@ class Textarea(WidgetComponentBase): """Maximum number of rows when auto-resizing.""" allowAutofillExtensions: bool | None = None """Allow password managers / autofill extensions to appear.""" + dir: Literal["ltr", "rtl", "auto"] | None = None + """Text direction for textarea content.""" + lang: str | None = None + """Language code for keyboard layout and spell-check.""" class Transition(WidgetComponentBase): diff --git a/docs/i18n.md b/docs/i18n.md new file mode 100644 index 0000000..d67f446 --- /dev/null +++ b/docs/i18n.md @@ -0,0 +1,363 @@ +# Internationalization (i18n) & RTL Support + +ChatKit provides comprehensive support for Right-to-Left (RTL) languages including Arabic, Hebrew, Persian, and Urdu. + +## Quick Start + +### Basic RTL Text + +```python +from chatkit.widgets import Text + +# Arabic text with explicit RTL direction +text = Text( + value="مرحباً بك في ChatKit", + dir="rtl", + lang="ar" +) +``` + +### Automatic Direction Detection + +```python +from chatkit.i18n import detect_text_direction + +text = "مرحباً بك في ChatKit" +direction = detect_text_direction(text) # Returns "rtl" + +# Use in widget +widget = Text(value=text, dir=direction, lang="ar") +``` + +## Direction Options + +All text-based widgets support the `dir` property with three values: + +- **`"ltr"`**: Left-to-Right (English, French, etc.) +- **`"rtl"`**: Right-to-Left (Arabic, Hebrew, etc.) +- **`"auto"`**: Automatic detection based on content + +## Supported Widgets + +The following widgets support RTL: + +### Text Widgets +- `Text` - Plain text +- `Title` - Headlines +- `Caption` - Supporting text +- `Markdown` - Markdown content + +### Input Widgets +- `Input` - Single-line input +- `Textarea` - Multi-line input +- `Label` - Form labels +- `Button` - Button labels + +### Container Widgets +- `Card` - Card containers +- `ListView` - List views + +## Language Detection Functions + +### `is_rtl_language(lang_code: str) -> bool` + +Check if a language code represents an RTL language. + +```python +from chatkit.i18n import is_rtl_language + +is_rtl_language('ar') # True +is_rtl_language('he') # True +is_rtl_language('en') # False +``` + +### `detect_text_direction(text: str) -> Literal["ltr", "rtl", "auto"]` + +Automatically detect text direction based on content. + +```python +from chatkit.i18n import detect_text_direction + +detect_text_direction("مرحبا") # "rtl" +detect_text_direction("Hello") # "ltr" +detect_text_direction("مرحبا Hello") # "auto" (mixed) +``` + +### `guess_language_from_text(text: str) -> str | None` + +Attempt to guess the language from text content. + +```python +from chatkit.i18n import guess_language_from_text + +guess_language_from_text("مرحبا") # "ar" +guess_language_from_text("שלום") # "he" +``` + +## Bidirectional Text Utilities + +### `wrap_with_direction_markers(text: str, direction: Literal["ltr", "rtl"]) -> str` + +Wrap text with Unicode directional markers. + +```python +from chatkit.bidi import wrap_with_direction_markers + +wrapped = wrap_with_direction_markers("مرحبا", "rtl") +# Adds Unicode RLE markers for proper rendering +``` + +### `fix_arabic_numbers_in_text(text: str) -> str` + +Fix the display of numbers in Arabic text. + +```python +from chatkit.bidi import fix_arabic_numbers_in_text + +text = fix_arabic_numbers_in_text("عدد 123 شيء") +# Numbers are wrapped with LTR markers +``` + +### `normalize_mixed_text(text: str) -> str` + +Normalize mixed-direction text for proper display. + +```python +from chatkit.bidi import normalize_mixed_text + +normalized = normalize_mixed_text("Hello مرحبا World") +# Each segment is wrapped with appropriate markers +``` + +## Complete Examples + +### RTL Login Form + +```python +from chatkit.widgets import Card, Title, Caption, Col, Label, Input, Button, Row + +login_card = Card( + dir="rtl", + lang="ar", + children=[ + Title(value="تسجيل الدخول", dir="rtl"), + Caption(value="أدخل بيانات الدخول", dir="rtl"), + Col( + gap=4, + children=[ + Label(value="البريد الإلكتروني", fieldName="email", dir="rtl"), + Input( + name="email", + placeholder="أدخل بريدك", + dir="rtl", + lang="ar", + inputType="email" + ), + Label(value="كلمة المرور", fieldName="password", dir="rtl"), + Input( + name="password", + placeholder="أدخل كلمة المرور", + dir="rtl", + lang="ar", + inputType="password" + ), + Row( + gap=2, + justify="end", + children=[ + Button(label="إلغاء", dir="rtl", variant="outline"), + Button(label="دخول", dir="rtl", style="primary"), + ] + ) + ] + ) + ] +) +``` + +### Mixed Direction Content + +```python +from chatkit.widgets import Card, Text, Col + +card = Card( + dir="auto", # Auto-detect + children=[ + Col( + gap=2, + children=[ + Text(value="Welcome / مرحبا", dir="auto"), + Text(value="Features / المميزات", dir="auto"), + Text(value="Support / الدعم", dir="auto"), + ] + ) + ] +) +``` + +### RTL Markdown + +```python +from chatkit.widgets import Markdown + +markdown = Markdown( + value=""" +# مرحباً بك + +## المميزات + +- دعم كامل للعربية +- واجهة سهلة +- أداء عالي + +### مثال + +```python +text = Text(value="مرحبا", dir="rtl") +``` + """, + dir="rtl", + lang="ar" +) +``` + +## Best Practices + +### 1. Use Explicit Direction When Known + +```python +# Good: Explicit direction +Text(value="مرحبا", dir="rtl", lang="ar") + +# Avoid: Relying on auto-detection when you know the direction +Text(value="مرحبا") # Will work but less efficient +``` + +### 2. Specify Language Code + +Always specify the `lang` attribute for better browser support and accessibility: + +```python +Text(value="مرحبا", dir="rtl", lang="ar") +Text(value="שלום", dir="rtl", lang="he") +``` + +### 3. Keep Technical Content LTR + +Email addresses, URLs, and code should remain LTR even in RTL contexts: + +```python +Card( + dir="rtl", + children=[ + Label(value="البريد الإلكتروني", fieldName="email", dir="rtl"), + Input( + name="email", + placeholder="user@example.com", + dir="ltr", # Keep email LTR + inputType="email" + ) + ] +) +``` + +### 4. Use Auto for Mixed Content + +When content contains both RTL and LTR text, use `dir="auto"`: + +```python +Text( + value="مرحبا ChatKit مرحبا", + dir="auto" # Browser will handle the mixed content +) +``` + +### 5. Cache Detection Results + +For performance, cache direction detection results: + +```python +from chatkit.i18n import detect_text_direction + +# Cache the result +texts = ["مرحبا", "Hello", "שלום"] +directions = {text: detect_text_direction(text) for text in texts} + +# Reuse cached results +for text in texts: + widget = Text(value=text, dir=directions[text]) +``` + +## Supported Languages + +ChatKit supports the following RTL languages: + +| Language | Code | Example | +|----------|------|---------| +| Arabic | `ar` | مرحبا | +| Hebrew | `he` | שלום | +| Persian (Farsi) | `fa` | سلام | +| Urdu | `ur` | ہیلو | +| Yiddish | `yi` | העלא | +| Pashto | `ps` | سلام | + +## Unicode Bidirectional Markers + +ChatKit uses standard Unicode bidirectional control characters: + +- **RLM** (`\u200F`): Right-to-Left Mark +- **LRM** (`\u200E`): Left-to-Right Mark +- **RLE** (`\u202B`): Right-to-Left Embedding +- **LRE** (`\u202A`): Left-to-Right Embedding +- **PDF** (`\u202C`): Pop Directional Formatting + +These are automatically added by the `bidi` utilities when needed. + +## Troubleshooting + +### Numbers Display Incorrectly + +Use `fix_arabic_numbers_in_text()` to fix number display in RTL text: + +```python +from chatkit.bidi import fix_arabic_numbers_in_text + +text = fix_arabic_numbers_in_text("السعر 99.99 دولار") +``` + +### Mixed Text Breaks Layout + +Use `normalize_mixed_text()` to properly handle mixed-direction text: + +```python +from chatkit.bidi import normalize_mixed_text + +text = normalize_mixed_text("Hello مرحبا World") +``` + +### Punctuation in Wrong Position + +This is usually handled automatically by browsers, but you can use the bidi utilities for more control: + +```python +from chatkit.bidi import wrap_with_direction_markers + +text = wrap_with_direction_markers("مرحبا!", "rtl") +``` + +## API Reference + +For detailed API documentation, see: + +- [i18n Module Reference](../api/chatkit/i18n.md) +- [bidi Module Reference](../api/chatkit/bidi.md) + +## Examples + +Complete working examples can be found in the `examples/` directory: + +- `examples/rtl_support_example.py` - Comprehensive RTL examples + +## Contributing + +We welcome contributions to improve RTL support! See our [Contributing Guide](../CONTRIBUTING.md) for details. diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..c4d0ee4 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,55 @@ +# ChatKit Python Examples + +This directory contains working examples demonstrating various ChatKit features. + +## Available Examples + +### RTL Support Example +**File**: `rtl_support_example.py` + +Demonstrates comprehensive Right-to-Left (RTL) language support including: +- Basic RTL text widgets +- Automatic direction detection +- Complete RTL forms and cards +- Mixed-direction content +- RTL Markdown +- Best practices and performance tips + +**Run**: +```bash +PYTHONPATH=. python examples/rtl_support_example.py +``` + +## Running Examples + +All examples can be run from the project root: + +```bash +# Make sure you're in the project root +cd /path/to/chatkit-python + +# Run an example +PYTHONPATH=. python examples/example_name.py +``` + +Or using `uv`: + +```bash +uv run python examples/example_name.py +``` + +## More Examples Coming Soon + +We're working on adding more examples: +- Basic chatbot integration +- Widget demonstrations +- Action handling +- Workflow examples +- Server integration with FastAPI +- Server integration with Flask +- Custom store implementation +- Authentication patterns + +## Contributing Examples + +Have a great example? We'd love to include it! See [CONTRIBUTING.md](../CONTRIBUTING.md) for guidelines. diff --git a/examples/rtl_support_example.py b/examples/rtl_support_example.py new file mode 100644 index 0000000..2793365 --- /dev/null +++ b/examples/rtl_support_example.py @@ -0,0 +1,376 @@ +""" +ChatKit RTL (Right-to-Left) Support Example. + +This example demonstrates how to use ChatKit with RTL languages like Arabic, +including automatic direction detection, mixed-direction text, and proper +widget configuration. +""" + +from chatkit.i18n import auto_detect_and_set_direction, detect_text_direction +from chatkit.widgets import ( + Button, + Card, + Caption, + Col, + Input, + Label, + Markdown, + Row, + Text, + Textarea, + Title, +) + + +def example_basic_rtl_text(): + """Example 1: Basic RTL text widget.""" + print("=" * 60) + print("Example 1: Basic RTL Text") + print("=" * 60) + + # Arabic text with explicit RTL direction + arabic_text = Text( + value="مرحباً بك في ChatKit!", + dir="rtl", + lang="ar", + size="lg", + ) + + print(f"Text value: {arabic_text.value}") + print(f"Direction: {arabic_text.dir}") + print(f"Language: {arabic_text.lang}") + print(f"JSON: {arabic_text.model_dump_json(exclude_none=True, indent=2)}") + print() + + +def example_auto_detection(): + """Example 2: Automatic direction detection.""" + print("=" * 60) + print("Example 2: Auto Direction Detection") + print("=" * 60) + + texts = [ + "مرحباً بك في ChatKit", # Arabic + "Hello World", # English + "مرحبا Hello العالم World", # Mixed + ] + + for text in texts: + direction = detect_text_direction(text) + normalized, detected_dir, lang = auto_detect_and_set_direction(text) + + print(f"Text: {text}") + print(f"Detected direction: {direction}") + print(f"Detected language: {lang}") + print(f"Normalized: {repr(normalized[:50])}...") + print("-" * 40) + + print() + + +def example_rtl_card(): + """Example 3: Complete RTL card with multiple widgets.""" + print("=" * 60) + print("Example 3: RTL Card with Multiple Widgets") + print("=" * 60) + + card = Card( + dir="rtl", + lang="ar", + size="md", + children=[ + Title( + value="نموذج تسجيل الدخول", + dir="rtl", + size="2xl", + ), + Caption( + value="الرجاء إدخال بيانات الدخول الخاصة بك", + dir="rtl", + color="secondary", + ), + Col( + gap=4, + children=[ + Label( + value="البريد الإلكتروني", + fieldName="email", + dir="rtl", + ), + Input( + name="email", + placeholder="أدخل بريدك الإلكتروني", + dir="rtl", + lang="ar", + inputType="email", + required=True, + ), + Label( + value="كلمة المرور", + fieldName="password", + dir="rtl", + ), + Input( + name="password", + placeholder="أدخل كلمة المرور", + dir="rtl", + lang="ar", + inputType="password", + required=True, + ), + Row( + gap=2, + justify="end", + children=[ + Button( + label="إلغاء", + dir="rtl", + variant="outline", + ), + Button( + label="تسجيل الدخول", + dir="rtl", + variant="solid", + style="primary", + ), + ], + ), + ], + ), + ], + ) + + print("RTL Login Card created successfully!") + print(f"Card direction: {card.dir}") + print(f"Card language: {card.lang}") + print(f"Number of children: {len(card.children)}") + print() + + +def example_mixed_content(): + """Example 4: Mixed Arabic and English content.""" + print("=" * 60) + print("Example 4: Mixed Direction Content") + print("=" * 60) + + # Card with mixed content + card = Card( + dir="auto", # Auto-detect based on content + children=[ + Title( + value="ChatKit Features / مميزات ChatKit", + dir="auto", + ), + Col( + gap=3, + children=[ + Text( + value="✅ دعم كامل للغة العربية - Full Arabic Support", + dir="auto", + ), + Text( + value="✅ كشف تلقائي للاتجاه - Automatic Direction Detection", + dir="auto", + ), + Text( + value="✅ نصوص مختلطة - Mixed Text Support", + dir="auto", + ), + ], + ), + ], + ) + + print("Mixed Content Card created successfully!") + print(f"Card has {len(card.children)} children") + print() + + +def example_markdown_rtl(): + """Example 5: RTL Markdown content.""" + print("=" * 60) + print("Example 5: RTL Markdown") + print("=" * 60) + + markdown_content = """ +# مرحباً بك في ChatKit + +## المميزات الرئيسية + +- دعم كامل للغة العربية +- واجهة سهلة الاستخدام +- أداء عالي وسريع + +### مثال على الكود + +```python +text = Text(value="مرحبا", dir="rtl", lang="ar") +``` + +### الأرقام والتواريخ + +اليوم هو 2025/10/7 والساعة 12:30 + """ + + markdown = Markdown( + value=markdown_content, + dir="rtl", + lang="ar", + ) + + print("Markdown widget created!") + print(f"Direction: {markdown.dir}") + print(f"Language: {markdown.lang}") + print(f"Content length: {len(markdown.value)} characters") + print() + + +def example_form_rtl(): + """Example 6: Complete RTL form.""" + print("=" * 60) + print("Example 6: Complete RTL Form") + print("=" * 60) + + form_card = Card( + dir="rtl", + lang="ar", + children=[ + Title(value="نموذج التواصل", dir="rtl"), + Caption(value="نسعد بتواصلكم معنا", dir="rtl"), + Col( + gap=4, + children=[ + # Name field + Label(value="الاسم الكامل", fieldName="name", dir="rtl"), + Input( + name="name", + placeholder="أدخل اسمك الكامل", + dir="rtl", + required=True, + ), + # Email field + Label(value="البريد الإلكتروني", fieldName="email", dir="rtl"), + Input( + name="email", + placeholder="example@email.com", + dir="ltr", # Email is LTR even in RTL form + inputType="email", + required=True, + ), + # Message field + Label(value="الرسالة", fieldName="message", dir="rtl"), + Textarea( + name="message", + placeholder="اكتب رسالتك هنا...", + dir="rtl", + lang="ar", + rows=5, + required=True, + ), + # Submit button + Button( + label="إرسال", + dir="rtl", + style="primary", + block=True, + ), + ], + ), + ], + ) + + print("Complete RTL Form created successfully!") + print("Form includes: name, email, message, and submit button") + print() + + +def example_rtl_best_practices(): + """Example 7: Best practices for RTL.""" + print("=" * 60) + print("Example 7: RTL Best Practices") + print("=" * 60) + + practices = [ + { + "title": "1. استخدم dir='auto' للنصوص المختلطة", + "desc": "Use dir='auto' for mixed-direction text", + "widget": Text(value="مرحبا ChatKit مرحبا", dir="auto"), + }, + { + "title": "2. حدد اللغة بوضوح", + "desc": "Always specify language code", + "widget": Text(value="مرحبا", dir="rtl", lang="ar"), + }, + { + "title": "3. البريد الإلكتروني يبقى LTR", + "desc": "Keep emails and URLs LTR", + "widget": Input(name="email", placeholder="user@example.com", dir="ltr"), + }, + { + "title": "4. استخدم الكشف التلقائي عند عدم التأكد", + "desc": "Use auto-detection when unsure", + "widget": Text(value="", dir="auto"), + }, + ] + + for i, practice in enumerate(practices, 1): + print(f"{practice['title']}") + print(f" {practice['desc']}") + print(f" Widget: {practice['widget'].type}, dir={practice['widget'].dir}") + if i < len(practices): + print() + + print() + + +def example_performance_tips(): + """Example 8: Performance tips for RTL.""" + print("=" * 60) + print("Example 8: Performance Tips") + print("=" * 60) + + tips = """ + نصائح الأداء / Performance Tips: + + 1. استخدم dir='rtl' بشكل صريح عندما تعرف الاتجاه + Use explicit dir='rtl' when you know the direction + + 2. تجنب الكشف التلقائي المتكرر للنصوص الكبيرة + Avoid repeated auto-detection for large texts + + 3. قم بتخزين النتائج المكتشفة مؤقتاً + Cache detected results when possible + + 4. استخدم lang= للحصول على دعم أفضل من المتصفح + Use lang= for better browser support + """ + + print(tips) + print() + + +def main(): + """Run all RTL examples.""" + print("\n") + print("=" * 60) + print("ChatKit RTL Support - Examples") + print("=" * 60) + print("\n") + + # Run all examples + example_basic_rtl_text() + example_auto_detection() + example_rtl_card() + example_mixed_content() + example_markdown_rtl() + example_form_rtl() + example_rtl_best_practices() + example_performance_tips() + + print("=" * 60) + print("All examples completed successfully! ✅") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/tests/helpers/mock_widget.py b/tests/helpers/mock_widget.py index c6ef892..72f0b3a 100644 --- a/tests/helpers/mock_widget.py +++ b/tests/helpers/mock_widget.py @@ -3,7 +3,7 @@ import uuid from datetime import datetime, timedelta from typing import Annotated, Any, AsyncIterator, Callable, Literal, assert_never - +from pathlib import Path from agents import Agent, Runner from anyio import sleep from pydantic import BaseModel, Field, TypeAdapter @@ -622,9 +622,11 @@ async def handle_action( else "Fetched widgets", ) yield ActionOutput.from_widget(self.render(next_state)) - + + elif action.type == "sample.draft_email": - with open("../README.md", "r") as f: + readme_path = Path(__file__).parent.parent.parent / "README.md" + with open(readme_path, "r") as f: readme = f.read() body_text = Runner.run_streamed( diff --git a/tests/test_i18n.py b/tests/test_i18n.py new file mode 100644 index 0000000..ec1d6c8 --- /dev/null +++ b/tests/test_i18n.py @@ -0,0 +1,397 @@ +""" +Tests for internationalization (i18n) and RTL support in ChatKit. +""" + +import pytest + +from chatkit.bidi import ( + RLE, + RLM, + TextSegment, + clean_bidi_markers, + fix_arabic_numbers_in_text, + get_visual_length, + normalize_mixed_text, + split_mixed_direction_text, + wrap_with_direction_markers, +) +from chatkit.i18n import ( + calculate_rtl_ratio, + contains_arabic_characters, + contains_hebrew_characters, + contains_rtl_characters, + detect_text_direction, + get_language_direction, + guess_language_from_text, + is_mixed_direction_text, + is_rtl_language, +) + + +class TestLanguageDetection: + """Tests for language and direction detection.""" + + def test_is_rtl_language_arabic(self): + assert is_rtl_language("ar") is True + assert is_rtl_language("ar-SA") is True + assert is_rtl_language("ar_EG") is True + + def test_is_rtl_language_hebrew(self): + assert is_rtl_language("he") is True + assert is_rtl_language("iw") is True # Old Hebrew code + + def test_is_rtl_language_persian(self): + assert is_rtl_language("fa") is True + + def test_is_rtl_language_ltr(self): + assert is_rtl_language("en") is False + assert is_rtl_language("fr") is False + assert is_rtl_language("de") is False + + def test_is_rtl_language_empty(self): + assert is_rtl_language("") is False + assert is_rtl_language(None) is False # type: ignore + + def test_get_language_direction_rtl(self): + assert get_language_direction("ar") == "rtl" + assert get_language_direction("he") == "rtl" + assert get_language_direction("fa") == "rtl" + + def test_get_language_direction_ltr(self): + assert get_language_direction("en") == "ltr" + assert get_language_direction("fr") == "ltr" + + +class TestCharacterDetection: + """Tests for character-level RTL detection.""" + + def test_contains_arabic_characters(self): + assert contains_arabic_characters("مرحبا") is True + assert contains_arabic_characters("Hello مرحبا") is True + assert contains_arabic_characters("Hello") is False + assert contains_arabic_characters("") is False + + def test_contains_hebrew_characters(self): + assert contains_hebrew_characters("שלום") is True + assert contains_hebrew_characters("Hello שלום") is True + assert contains_hebrew_characters("Hello") is False + + def test_contains_rtl_characters(self): + assert contains_rtl_characters("مرحبا") is True + assert contains_rtl_characters("שלום") is True + assert contains_rtl_characters("Hello") is False + assert contains_rtl_characters("مرحبا Hello") is True + + def test_is_mixed_direction_text(self): + assert is_mixed_direction_text("مرحبا Hello") is True + assert is_mixed_direction_text("Hello مرحبا World") is True + assert is_mixed_direction_text("مرحبا") is False + assert is_mixed_direction_text("Hello") is False + + +class TestRTLRatioCalculation: + """Tests for RTL character ratio calculation.""" + + def test_calculate_rtl_ratio_pure_rtl(self): + ratio = calculate_rtl_ratio("مرحبا بك") + assert ratio == pytest.approx(1.0) + + def test_calculate_rtl_ratio_pure_ltr(self): + ratio = calculate_rtl_ratio("Hello World") + assert ratio == pytest.approx(0.0) + + def test_calculate_rtl_ratio_mixed(self): + ratio = calculate_rtl_ratio("مرحبا Hello") + assert 0.0 < ratio < 1.0 + + def test_calculate_rtl_ratio_empty(self): + assert calculate_rtl_ratio("") == 0.0 + + def test_calculate_rtl_ratio_with_numbers(self): + # Numbers should not affect RTL ratio + ratio = calculate_rtl_ratio("مرحبا 123") + assert ratio > 0.5 # Still predominantly RTL + + +class TestTextDirectionDetection: + """Tests for automatic text direction detection.""" + + def test_detect_text_direction_pure_arabic(self): + assert detect_text_direction("مرحبا بك في ChatKit") == "rtl" + + def test_detect_text_direction_pure_hebrew(self): + assert detect_text_direction("שלום עולם") == "rtl" + + def test_detect_text_direction_pure_english(self): + assert detect_text_direction("Hello World") == "ltr" + + def test_detect_text_direction_mixed(self): + result = detect_text_direction("مرحبا Hello العالم World") + # Mixed text with more Arabic should return 'rtl' or 'auto' + assert result in ["rtl", "auto"] + + def test_detect_text_direction_empty(self): + assert detect_text_direction("") == "auto" + + def test_detect_text_direction_with_numbers(self): + # Arabic with numbers + assert detect_text_direction("عدد 123 شيء") == "rtl" + + +class TestLanguageGuessing: + """Tests for language guessing from text.""" + + def test_guess_language_arabic(self): + assert guess_language_from_text("مرحبا") == "ar" + + def test_guess_language_hebrew(self): + assert guess_language_from_text("שלום") == "he" + + def test_guess_language_english(self): + # Cannot determine specific LTR language + assert guess_language_from_text("Hello") is None + + def test_guess_language_empty(self): + assert guess_language_from_text("") is None + + +class TestBidiMarkers: + """Tests for Unicode bidirectional markers.""" + + def test_wrap_with_direction_markers_rtl(self): + result = wrap_with_direction_markers("مرحبا", "rtl") + assert result.startswith(RLE) + assert "مرحبا" in result + + def test_wrap_with_direction_markers_ltr(self): + result = wrap_with_direction_markers("Hello", "ltr") + assert "Hello" in result + + def test_wrap_with_direction_markers_empty(self): + assert wrap_with_direction_markers("", "rtl") == "" + + def test_clean_bidi_markers(self): + marked_text = wrap_with_direction_markers("Hello", "ltr") + clean_text = clean_bidi_markers(marked_text) + assert clean_text == "Hello" + + def test_get_visual_length(self): + marked_text = wrap_with_direction_markers("مرحبا", "rtl") + assert get_visual_length(marked_text) == len("مرحبا") + + +class TestArabicNumberHandling: + """Tests for number handling in Arabic text.""" + + def test_fix_arabic_numbers_simple(self): + result = fix_arabic_numbers_in_text("عدد 123 شيء") + # Numbers should be wrapped with LTR markers + assert "123" in result + assert result != "عدد 123 شيء" # Should be different + + def test_fix_arabic_numbers_multiple(self): + result = fix_arabic_numbers_in_text("من 10 إلى 20") + assert "10" in result + assert "20" in result + + def test_fix_arabic_numbers_decimals(self): + result = fix_arabic_numbers_in_text("السعر 99.99 دولار") + assert "99.99" in result + + def test_fix_arabic_numbers_no_numbers(self): + text = "مرحبا بك" + result = fix_arabic_numbers_in_text(text) + # Text without numbers should remain unchanged + assert clean_bidi_markers(result) == text + + +class TestMixedDirectionText: + """Tests for mixed-direction text handling.""" + + def test_split_mixed_direction_simple(self): + segments = split_mixed_direction_text("Hello مرحبا World") + assert len(segments) >= 2 + assert any(seg.direction == "rtl" for seg in segments) + assert any(seg.direction == "ltr" for seg in segments) + + def test_split_mixed_direction_pure_rtl(self): + segments = split_mixed_direction_text("مرحبا بك") + assert len(segments) >= 1 + assert all(seg.direction == "rtl" for seg in segments) + + def test_split_mixed_direction_pure_ltr(self): + segments = split_mixed_direction_text("Hello World") + assert len(segments) >= 1 + assert all(seg.direction == "ltr" for seg in segments) + + def test_split_mixed_direction_empty(self): + segments = split_mixed_direction_text("") + assert segments == [] + + def test_normalize_mixed_text(self): + result = normalize_mixed_text("Hello مرحبا World") + # Result should contain directional markers + assert result != "Hello مرحبا World" + # Visual content should be preserved + clean = clean_bidi_markers(result) + assert "Hello" in clean + assert "مرحبا" in clean + assert "World" in clean + + +class TestTextSegment: + """Tests for TextSegment class.""" + + def test_text_segment_creation(self): + seg = TextSegment("مرحبا", "rtl", 0, 5) + assert seg.text == "مرحبا" + assert seg.direction == "rtl" + assert seg.start_index == 0 + assert seg.end_index == 5 + + def test_text_segment_equality(self): + seg1 = TextSegment("مرحبا", "rtl", 0, 5) + seg2 = TextSegment("مرحبا", "rtl", 0, 5) + seg3 = TextSegment("Hello", "ltr", 0, 5) + + assert seg1 == seg2 + assert seg1 != seg3 + + def test_text_segment_repr(self): + seg = TextSegment("مرحبا", "rtl") + repr_str = repr(seg) + assert "مرحبا" in repr_str + assert "rtl" in repr_str + + +class TestWidgetRTLSupport: + """Tests for RTL support in widgets.""" + + def test_text_widget_with_rtl(self): + from chatkit.widgets import Text + + widget = Text(value="مرحبا", dir="rtl", lang="ar") + assert widget.value == "مرحبا" + assert widget.dir == "rtl" + assert widget.lang == "ar" + + def test_text_widget_with_auto(self): + from chatkit.widgets import Text + + widget = Text(value="مرحبا Hello", dir="auto") + assert widget.dir == "auto" + + def test_markdown_widget_with_rtl(self): + from chatkit.widgets import Markdown + + widget = Markdown(value="# مرحبا", dir="rtl", lang="ar") + assert widget.dir == "rtl" + assert widget.lang == "ar" + + def test_title_widget_with_rtl(self): + from chatkit.widgets import Title + + widget = Title(value="عنوان", dir="rtl", lang="ar") + assert widget.value == "عنوان" + assert widget.dir == "rtl" + + def test_button_widget_with_rtl(self): + from chatkit.widgets import Button + + widget = Button(label="إرسال", dir="rtl", lang="ar") + assert widget.label == "إرسال" + assert widget.dir == "rtl" + + def test_input_widget_with_rtl(self): + from chatkit.widgets import Input + + widget = Input(name="name", placeholder="الاسم", dir="rtl", lang="ar") + assert widget.dir == "rtl" + assert widget.lang == "ar" + + def test_card_widget_with_rtl(self): + from chatkit.widgets import Card, Text + + widget = Card( + dir="rtl", + lang="ar", + children=[Text(value="محتوى", dir="rtl")], + ) + assert widget.dir == "rtl" + assert widget.lang == "ar" + + +class TestEdgeCases: + """Tests for edge cases and error handling.""" + + def test_empty_text_handling(self): + assert detect_text_direction("") == "auto" + assert calculate_rtl_ratio("") == 0.0 + assert split_mixed_direction_text("") == [] + + def test_whitespace_only_text(self): + assert detect_text_direction(" ") == "auto" + + def test_numbers_only_text(self): + # Numbers are weak characters + result = detect_text_direction("123 456") + assert result == "ltr" + + def test_punctuation_only_text(self): + result = detect_text_direction("!@#$%") + assert result in ["ltr", "auto"] + + def test_very_long_text(self): + long_arabic = "مرحبا " * 1000 + result = detect_text_direction(long_arabic) + assert result == "rtl" + + def test_mixed_with_emojis(self): + text = "مرحبا 😊 Hello" + result = detect_text_direction(text) + # Should still detect correctly despite emojis + assert result in ["rtl", "auto"] + + +class TestIntegration: + """Integration tests for complete workflows.""" + + def test_auto_detect_workflow_arabic(self): + from chatkit.i18n import auto_detect_and_set_direction + + text = "مرحبا بك في ChatKit" + normalized, direction, language = auto_detect_and_set_direction(text) + + assert direction == "rtl" + assert language == "ar" + assert "مرحبا" in normalized + + def test_auto_detect_workflow_mixed(self): + from chatkit.i18n import auto_detect_and_set_direction + + text = "مرحبا Hello" + normalized, direction, language = auto_detect_and_set_direction(text) + + # Mixed text can be detected as 'rtl' or 'auto' depending on ratio + assert direction in ["rtl", "auto"] + assert language == "ar" # Arabic detected + + def test_widget_serialization_with_rtl(self): + from chatkit.widgets import Text + + widget = Text(value="مرحبا", dir="rtl", lang="ar") + data = widget.model_dump(exclude_none=True) + + assert "dir" in data + assert data["dir"] == "rtl" + assert data["lang"] == "ar" + + def test_widget_serialization_without_rtl(self): + from chatkit.widgets import Text + + widget = Text(value="Hello") + data = widget.model_dump(exclude_none=True) + + # dir and lang should not be in output if None + assert "dir" not in data + assert "lang" not in data diff --git a/uv.lock b/uv.lock index 39c64c5..ca1fa6a 100644 --- a/uv.lock +++ b/uv.lock @@ -894,6 +894,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/90/96/04b8e52da071d28f5e21a805b19cb9390aa17a47462ac87f5e2696b9566d/paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591", size = 13746, upload-time = "2024-08-25T14:17:22.55Z" }, ] +[[package]] +name = "pathlib" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ac/aa/9b065a76b9af472437a0059f77e8f962fe350438b927cb80184c32f075eb/pathlib-1.0.1.tar.gz", hash = "sha256:6940718dfc3eff4258203ad5021090933e5c04707d5ca8cc9e73c94a7894ea9f", size = 49298, upload-time = "2014-09-03T15:41:57.18Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/f9/690a8600b93c332de3ab4a344a4ac34f00c8f104917061f779db6a918ed6/pathlib-1.0.1-py3-none-any.whl", hash = "sha256:f35f95ab8b0f59e6d354090350b44a80a80635d22efdedfa84c7ad1cf0a74147", size = 14363, upload-time = "2022-05-04T13:37:20.585Z" }, +] + [[package]] name = "pathspec" version = "0.12.1"