Skip to content
5 changes: 1 addition & 4 deletions picojson/src/escape_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ impl UnicodeEscapeCollector {
#[cfg(test)]
mod tests {
use super::*;
use crate::ujson::EventToken;

#[test]
fn test_simple_escapes() {
Expand Down Expand Up @@ -375,8 +376,6 @@ mod tests {

#[test]
fn test_token_to_escape_char() {
use crate::ujson::EventToken;

// Test all valid escape tokens
assert_eq!(
EscapeProcessor::token_to_escape_char(&EventToken::EscapeQuote).unwrap(),
Expand Down Expand Up @@ -420,8 +419,6 @@ mod tests {

#[test]
fn test_process_escape_token() {
use crate::ujson::EventToken;

// Test valid escape tokens that produce correct unescaped bytes
assert_eq!(
EscapeProcessor::process_escape_token(&EventToken::EscapeQuote).unwrap(),
Expand Down
38 changes: 30 additions & 8 deletions picojson/src/event_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//! This module extracts the common event handling patterns to reduce code duplication
//! while preserving the performance characteristics of each parser type.

use crate::shared::{ContentRange, State};
use crate::ujson::EventToken;
use crate::{Event, ParseError};

Expand All @@ -25,9 +26,9 @@ pub trait EscapeHandler {
Ok(())
}

/// Append a single literal byte (for per-byte accumulation patterns)
/// Default implementation is no-op - suitable for parsers that don't need per-byte processing
fn append_literal_byte(&mut self, _byte: u8) -> Result<(), crate::ParseError> {
/// Begin unicode escape sequence processing
/// Default implementation is no-op - suitable for parsers that don't need special handling
fn begin_unicode_escape(&mut self) -> Result<(), crate::ParseError> {
Ok(())
}
}
Expand All @@ -53,8 +54,6 @@ pub fn process_begin_events<C: ContentExtractor>(
event: &crate::ujson::Event,
content_extractor: &mut C,
) -> Option<EventResult<'static, 'static>> {
use crate::shared::{ContentRange, State};

match event {
// String/Key Begin events - nearly identical patterns
crate::ujson::Event::Begin(EventToken::Key) => {
Expand Down Expand Up @@ -132,6 +131,19 @@ pub trait ContentExtractor: EscapeHandler {
from_container_end: bool,
) -> Result<crate::Event<'_, '_>, crate::ParseError>;

/// Extract a completed number using shared number parsing logic
///
/// # Arguments
/// * `start_pos` - Position where the number started
/// * `from_container_end` - True if number was terminated by container delimiter
/// * `finished` - True if the parser has finished processing input (StreamParser-specific)
fn extract_number(
&mut self,
start_pos: usize,
from_container_end: bool,
finished: bool,
) -> Result<crate::Event<'_, '_>, crate::ParseError>;

/// Shared validation and extraction for string content
fn validate_and_extract_string(&mut self) -> Result<crate::Event<'_, '_>, crate::ParseError> {
let start_pos = match *self.parser_state() {
Expand Down Expand Up @@ -189,9 +201,9 @@ pub trait ContentExtractor: EscapeHandler {
///
/// This callback stores tokenizer events in the parser's event array, filling the first
/// available slot. This pattern is identical across both SliceParser and StreamParser.
pub fn create_tokenizer_callback<'a>(
event_storage: &'a mut [Option<crate::ujson::Event>; 2],
) -> impl FnMut(crate::ujson::Event, usize) + 'a {
pub fn create_tokenizer_callback(
event_storage: &mut [Option<crate::ujson::Event>; 2],
) -> impl FnMut(crate::ujson::Event, usize) + '_ {
|event, _len| {
for evt in event_storage.iter_mut() {
if evt.is_none() {
Expand Down Expand Up @@ -264,6 +276,7 @@ pub fn process_unicode_escape_events<C: ContentExtractor>(
match content_extractor.parser_state() {
crate::shared::State::String(_) | crate::shared::State::Key(_) => {
content_extractor.unicode_escape_collector_mut().reset();
content_extractor.begin_unicode_escape()?;
}
_ => {} // Ignore if not in string/key context
}
Expand Down Expand Up @@ -471,6 +484,15 @@ mod tests {
) -> Result<crate::Event<'_, '_>, crate::ParseError> {
unimplemented!("Mock doesn't need extraction")
}

fn extract_number(
&mut self,
_start_pos: usize,
_from_container_end: bool,
_finished: bool,
) -> Result<crate::Event<'_, '_>, crate::ParseError> {
unimplemented!("Mock doesn't need extraction")
}
}

#[test]
Expand Down
6 changes: 6 additions & 0 deletions picojson/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,16 @@ mod copy_on_escape;

mod escape_processor;

mod parser_core;

mod stream_buffer;

mod stream_content_builder;

mod stream_parser;

mod slice_content_builder;

mod slice_parser;

mod parse_error;
Expand Down
171 changes: 171 additions & 0 deletions picojson/src/parser_core.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
// SPDX-License-Identifier: Apache-2.0

//! Unified parser core that handles the common event processing loop.
//!
//! This module provides the `ParserCore` struct that consolidates the shared
//! event processing logic between SliceParser and StreamParser, eliminating
//! the duplication in their `next_event_impl` methods.

use crate::event_processor::{
finish_tokenizer, have_events, process_begin_escape_sequence_event, process_begin_events,
process_byte_through_tokenizer, process_simple_escape_event, process_simple_events,
process_unicode_escape_events, take_first_event, ContentExtractor, EventResult,
};
use crate::shared::{ByteProvider, Event, ParserState, UnexpectedState};
use crate::ujson::{EventToken, Tokenizer};
use crate::{ujson, ParseError};

/// Combined trait for parsers that provide both byte access and content extraction
pub trait ParserProvider: ByteProvider + ContentExtractor {}
impl<T: ByteProvider + ContentExtractor> ParserProvider for T {}

/// The core parser logic that handles the unified event processing loop.
///
/// This struct contains all the shared state and logic that was previously
/// duplicated between SliceParser and StreamParser. It uses trait abstractions
/// to handle the differences in content building and byte providing.
pub struct ParserCore<T: ujson::BitBucket, C: ujson::DepthCounter> {
/// The tokenizer that processes JSON tokens
pub tokenizer: Tokenizer<T, C>,
/// Parser state and event storage
pub parser_state: ParserState,
}

impl<T: ujson::BitBucket, C: ujson::DepthCounter> ParserCore<T, C> {
/// Create a new ParserCore
pub fn new() -> Self {
Self {
tokenizer: Tokenizer::new(),
parser_state: ParserState::new(),
}
}

/// Unified implementation that works with a single combined provider.
/// This avoids borrowing conflicts by using a single object that implements both traits.
pub fn next_event_impl_unified<'a, P>(
&mut self,
provider: &'a mut P,
escape_timing: EscapeTiming,
) -> Result<Event<'a, 'a>, ParseError>
where
P: ParserProvider,
{
self.next_event_impl_unified_with_accumulator(provider, escape_timing, |_, _| Ok(()))
}

/// Unified implementation with optional byte accumulation callback.
/// This supports StreamParser-specific byte accumulation when no events are generated.
pub fn next_event_impl_unified_with_accumulator<'a, P, F>(
&mut self,
provider: &'a mut P,
escape_timing: EscapeTiming,
mut byte_accumulator: F,
) -> Result<Event<'a, 'a>, ParseError>
where
P: ParserProvider,
F: FnMut(&mut P, u8) -> Result<(), ParseError>,
{
loop {
while !have_events(&self.parser_state.evts) {
if let Some(byte) = provider.next_byte()? {
process_byte_through_tokenizer(
byte,
&mut self.tokenizer,
&mut self.parser_state.evts,
)?;

// Call byte accumulator if no events were generated (StreamParser-specific)
if !have_events(&self.parser_state.evts) {
byte_accumulator(provider, byte)?;
}
} else {
// Handle end of stream - let the provider handle any cleanup
// For StreamParser, this is where finished flag gets set
finish_tokenizer(&mut self.tokenizer, &mut self.parser_state.evts)?;

if !have_events(&self.parser_state.evts) {
return Ok(Event::EndDocument);
}
}
}

let taken_event = take_first_event(&mut self.parser_state.evts);
let Some(taken) = taken_event else {
return Err(UnexpectedState::StateMismatch.into());
};

// Try shared event processors first
if let Some(result) =
process_simple_events(&taken).or_else(|| process_begin_events(&taken, provider))
{
match result {
EventResult::Complete(event) => return Ok(event),
EventResult::ExtractString => return provider.validate_and_extract_string(),
EventResult::ExtractKey => return provider.validate_and_extract_key(),
EventResult::ExtractNumber(from_container_end) => {
return provider.validate_and_extract_number(from_container_end)
}
EventResult::Continue => continue,
}
}

// Handle parser-specific events based on escape timing
match taken {
ujson::Event::Begin(EventToken::EscapeSequence) => {
process_begin_escape_sequence_event(provider)?;
}
_ if process_unicode_escape_events(&taken, provider)? => {
// Unicode escape events handled by shared function
}
ujson::Event::Begin(
escape_token @ (EventToken::EscapeQuote
| EventToken::EscapeBackslash
| EventToken::EscapeSlash
| EventToken::EscapeBackspace
| EventToken::EscapeFormFeed
| EventToken::EscapeNewline
| EventToken::EscapeCarriageReturn
| EventToken::EscapeTab),
) if escape_timing == EscapeTiming::OnBegin => {
// SliceParser-specific: Handle simple escape sequences on Begin events
// because CopyOnEscape requires starting unescaping immediately when
// the escape token begins to maintain zero-copy optimization
process_simple_escape_event(&escape_token, provider)?;
}
ujson::Event::End(
escape_token @ (EventToken::EscapeQuote
| EventToken::EscapeBackslash
| EventToken::EscapeSlash
| EventToken::EscapeBackspace
| EventToken::EscapeFormFeed
| EventToken::EscapeNewline
| EventToken::EscapeCarriageReturn
| EventToken::EscapeTab),
) if escape_timing == EscapeTiming::OnEnd => {
// StreamParser-specific: Handle simple escape sequences on End events
// because StreamBuffer must wait until the token ends to accumulate
// all bytes before processing the complete escape sequence
process_simple_escape_event(&escape_token, provider)?;
}
_ => {
// All other events continue to next iteration
}
}
}
}
}

impl<T: ujson::BitBucket, C: ujson::DepthCounter> Default for ParserCore<T, C> {
fn default() -> Self {
Self::new()
}
}

/// Enum to specify when escape sequences should be processed
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum EscapeTiming {
/// Process simple escape sequences on Begin events (SliceParser)
OnBegin,
/// Process simple escape sequences on End events (StreamParser)
OnEnd,
}
2 changes: 0 additions & 2 deletions picojson/src/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,12 @@ pub enum State {

/// Parser state and event storage
pub(super) struct ParserState {
pub state: State,
pub evts: [Option<crate::ujson::Event>; 2],
}

impl ParserState {
pub fn new() -> Self {
Self {
state: State::None,
evts: core::array::from_fn(|_| None),
}
}
Expand Down
Loading
Loading