Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions picojson/examples/push_parser_demo.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Example demonstrating PushParser with SAX-style event handling

use picojson::{DefaultConfig, Event, PushParseError, PushParser, PushParserHandler};

/// A simple event handler that prints JSON events as they arrive
struct JsonEventPrinter {
indent: usize,
event_count: usize,
}

impl JsonEventPrinter {
fn new() -> Self {
Self {
indent: 0,
event_count: 0,
}
}

fn indent_str(&self) -> String {
" ".repeat(self.indent)
}
}

impl<'input, 'scratch> PushParserHandler<'input, 'scratch, String> for JsonEventPrinter {
fn handle_event(&mut self, event: Event<'input, 'scratch>) -> Result<(), String> {
self.event_count += 1;

match event {
Event::StartObject => {
println!("{}🏁 StartObject", self.indent_str());
self.indent += 1;
}
Event::EndObject => {
self.indent = self.indent.saturating_sub(1);
println!("{}🏁 EndObject", self.indent_str());
}
Event::StartArray => {
println!("{}📋 StartArray", self.indent_str());
self.indent += 1;
}
Event::EndArray => {
self.indent = self.indent.saturating_sub(1);
println!("{}📋 EndArray", self.indent_str());
}
Event::Key(key) => {
println!("{}🔑 Key: '{}'", self.indent_str(), key.as_str());
}
Event::String(s) => {
println!("{}📝 String: '{}'", self.indent_str(), s.as_str());
}
Event::Number(num) => {
println!("{}🔢 Number: {}", self.indent_str(), num);
}
Event::Bool(b) => {
println!("{}✅ Bool: {}", self.indent_str(), b);
}
Event::Null => {
println!("{}⭕ Null", self.indent_str());
}
Event::EndDocument => {
println!("{}🏁 EndDocument", self.indent_str());
}
}
Ok(())
}
}

fn main() -> Result<(), PushParseError<String>> {
println!("🚀 PushParser Demo - SAX-style JSON Processing");
println!("===============================================");
println!();

// Example JSON with various features to demonstrate push parsing
let json_chunks = vec![
br#"{"name": "Pic"#.as_slice(),
br#"oJSON", "version": 1.0, "#.as_slice(),
br#""features": ["fast", "no_std""#.as_slice(),
br#", "zero\u0041lloc"], "escapes": "hello\nworld", "#.as_slice(),
br#""nested": {"data": [1, 2.5, true, null]}}"#.as_slice(),
];

let full_json = json_chunks.concat();
let json_str = std::str::from_utf8(&full_json)?;

println!("📄 Input JSON: {}", json_str);
println!("📏 Total size: {} bytes", full_json.len());
println!(
"📦 Processing in {} chunks (simulates streaming)",
json_chunks.len()
);
println!();

// Create handler and parser
let handler = JsonEventPrinter::new();
let mut buffer = [0u8; 512]; // Scratch buffer for escape processing
let buffer_size = buffer.len();
let mut parser = PushParser::<_, DefaultConfig>::new(handler, &mut buffer);

println!("🔄 Starting PushParser with incremental data feeding:");
println!(" Buffer size: {} bytes", buffer_size);
println!();

// Feed data chunk by chunk to demonstrate streaming capability
for (i, chunk) in json_chunks.iter().enumerate() {
println!("📨 Processing chunk {} ({} bytes):", i + 1, chunk.len());
println!(" Chunk data: {:?}", std::str::from_utf8(chunk)?);

// Write chunk to parser - events are handled immediately
parser.write(chunk)?;
println!();
}

// Signal end of input and retrieve the handler
println!("🔚 Finishing parsing...");
let handler = parser.finish()?;

println!();
println!(
"✅ Successfully processed {} events with PushParser!",
handler.event_count
);

Ok(())
}
21 changes: 21 additions & 0 deletions picojson/src/copy_on_escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,27 @@ impl<'a, 'b> CopyOnEscape<'a, 'b> {
Ok(String::Borrowed(borrowed_str))
}
}

/// DataSource support methods - check if unescaped content is available
pub fn has_unescaped_content(&self) -> bool {
self.using_scratch
}

/// Direct access to scratch buffer with proper lifetime for DataSource implementation
pub fn get_scratch_buffer_slice(
&'b self,
start: usize,
end: usize,
) -> Result<&'b [u8], ParseError> {
self.scratch
.get(start..end)
.ok_or(ParseError::Unexpected(UnexpectedState::InvalidSliceBounds))
}

/// Get scratch buffer range for current string
pub fn get_scratch_range(&self) -> (usize, usize) {
(self.scratch_start, self.scratch_pos)
}
}

#[cfg(test)]
Expand Down
74 changes: 54 additions & 20 deletions picojson/src/escape_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
use crate::parse_error::ParseError;
use crate::shared::{ContentRange, UnexpectedState};

/// Result type for Unicode escape sequence processing.
///
/// Tuple contains:
/// - Optional UTF-8 byte array and its length
/// - The start position of the escape sequence (\uXXXX)
/// - The new pending high surrogate value, if any
type UnicodeEscapeResult = (Option<([u8; 4], usize)>, usize, Option<u32>);

/// Shared utilities for processing JSON escape sequences.
/// This module contains pure functions for escape processing that can be used
/// by both CopyOnEscape and StreamingBuffer components.
Expand Down Expand Up @@ -266,6 +274,21 @@ impl UnicodeEscapeCollector {
pub fn has_pending_high_surrogate(&self) -> bool {
self.pending_high_surrogate.is_some()
}

/// Get the pending high surrogate value
pub fn get_pending_high_surrogate(&self) -> Option<u32> {
self.pending_high_surrogate
}

/// Set the pending high surrogate value
pub fn set_pending_high_surrogate(&mut self, surrogate: Option<u32>) {
self.pending_high_surrogate = surrogate;
}

/// Check if the collector is in the middle of collecting hex digits or has pending state
pub fn is_in_progress(&self) -> bool {
self.hex_pos > 0 || self.has_pending_high_surrogate()
}
}

impl Default for UnicodeEscapeCollector {
Expand Down Expand Up @@ -645,51 +668,58 @@ mod tests {
/// Shared implementation for processing a Unicode escape sequence WITH surrogate pair support.
///
/// This function centralizes the logic for handling `\uXXXX` escapes, which is
/// common to both the pull-based and stream-based parsers. It uses a generic
/// `hex_slice_provider` to remain independent of the underlying buffer implementation
/// (`SliceInputBuffer` vs. `StreamBuffer`).
/// common to all parsers. It uses the generic `DataSource` trait to remain
/// independent of the underlying buffer implementation (`SliceInputBuffer` vs. `StreamBuffer`).
///
/// # Arguments
/// * `current_pos` - The parser's current position in the input buffer, right after the 4 hex digits.
/// * `unicode_escape_collector` - A mutable reference to the shared `UnicodeEscapeCollector`.
/// * `hex_slice_provider` - A closure that takes a start and end position and returns the hex digit slice.
/// * `utf8_buf` - A buffer to write the UTF-8 encoded result into.
/// * `current_pos` - The parser's current position, right after the 4 hex digits.
/// * `pending_high_surrogate` - The optional high surrogate from a previous escape.
/// * `source` - A `DataSource` implementation to provide the hex digit slice.
///
/// # Returns
/// A tuple containing:
/// - Optional UTF-8 byte slice (None if this is a high surrogate waiting for low surrogate)
/// - The start position of the escape sequence (`\uXXXX`)
pub(crate) fn process_unicode_escape_sequence<'a, F>(
/// - Optional UTF-8 byte array and its length.
/// - The start position of the escape sequence (`\uXXXX`).
/// - The new pending high surrogate value, if any.
pub(crate) fn process_unicode_escape_sequence<'input, 'scratch, D>(
current_pos: usize,
unicode_escape_collector: &mut UnicodeEscapeCollector,
mut hex_slice_provider: F,
) -> Result<(Option<([u8; 4], usize)>, usize), ParseError>
pending_high_surrogate: Option<u32>,
source: &'input D,
) -> Result<UnicodeEscapeResult, ParseError>
where
F: FnMut(usize, usize) -> Result<&'a [u8], ParseError>,
D: ?Sized + crate::shared::DataSource<'input, 'scratch>,
{
let (hex_start, hex_end, escape_start_pos) = ContentRange::unicode_escape_bounds(current_pos);

// Extract the 4 hex digits from the buffer using the provider
let hex_slice = hex_slice_provider(hex_start, hex_end)?;
// Extract the 4 hex digits from the buffer using the DataSource
let hex_slice = source.get_borrowed_slice(hex_start, hex_end)?;

if hex_slice.len() != 4 {
return Err(UnexpectedState::InvalidUnicodeEscape.into());
}

// Create a temporary collector to process the hex digits
let mut temp_collector = UnicodeEscapeCollector::new();
if let Some(surrogate) = pending_high_surrogate {
temp_collector.set_pending_high_surrogate(Some(surrogate));
}

// Feed hex digits to the shared collector
for &hex_digit in hex_slice {
unicode_escape_collector.add_hex_digit(hex_digit)?;
temp_collector.add_hex_digit(hex_digit)?;
}

// Check if we had a pending high surrogate before processing
let had_pending_high_surrogate = unicode_escape_collector.has_pending_high_surrogate();
let had_pending_high_surrogate = temp_collector.has_pending_high_surrogate();

// Create a local buffer for the UTF-8 result
let mut utf8_buf = [0u8; 4];

// Process the complete sequence to UTF-8 with surrogate support
let (utf8_bytes_opt, _surrogate_state_changed) =
unicode_escape_collector.process_to_utf8(&mut utf8_buf)?;
temp_collector.process_to_utf8(&mut utf8_buf)?;

let new_pending_high_surrogate = temp_collector.get_pending_high_surrogate();

// If we have a result, copy it to a new array to return by value
let result_by_value = utf8_bytes_opt.map(|bytes| {
Expand All @@ -708,5 +738,9 @@ where
escape_start_pos
};

Ok((result_by_value, final_escape_start_pos))
Ok((
result_by_value,
final_escape_start_pos,
new_pending_high_surrogate,
))
}
Loading
Loading