From ed9cfe7a7ecc88ac86ad9623a91a69cea0bc9170 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Thu, 30 Nov 2023 18:59:32 +0000 Subject: [PATCH] optimize peak for efficiency --- src/jiter.rs | 69 ++++++++++++++++++++-------------- src/parse.rs | 102 ++++++++++++++++++++++++++++++-------------------- src/python.rs | 18 +++++---- src/value.rs | 16 ++++---- tests/main.rs | 44 +++++++++++----------- 5 files changed, 143 insertions(+), 106 deletions(-) diff --git a/src/jiter.rs b/src/jiter.rs index f8fecea..57844e0 100644 --- a/src/jiter.rs +++ b/src/jiter.rs @@ -1,4 +1,4 @@ -use crate::errors::{JiterError, JsonType, LinePosition, DEFAULT_RECURSION_LIMIT}; +use crate::errors::{json_error, JiterError, JsonType, LinePosition, DEFAULT_RECURSION_LIMIT}; use crate::number_decoder::{NumberAny, NumberFloat, NumberInt, NumberRange}; use crate::parse::{Parser, Peak}; use crate::string_decoder::{StringDecoder, StringDecoderRange, Tape}; @@ -110,13 +110,15 @@ impl<'j> Jiter<'j> { /// Knowing the next value is a number, parse it. pub fn known_number(&mut self, peak: Peak) -> JiterResult { - match peak { - Peak::Num(first) => self - .parser - .consume_number::(first, self.allow_inf_nan) - .map_err(Into::into), - _ => Err(self.wrong_type(JsonType::Int, peak)), - } + self.parser + .consume_number::(peak.into_inner(), self.allow_inf_nan) + .map_err(|e| { + if !peak.is_num() { + self.wrong_type(JsonType::Int, peak) + } else { + e.into() + } + }) } /// Assuming the next value is an integer, consume it. Error if it is not an integer, or is invalid JSON. @@ -127,13 +129,15 @@ impl<'j> Jiter<'j> { /// Knowing the next value is an integer, parse it. pub fn known_int(&mut self, peak: Peak) -> JiterResult { - match peak { - Peak::Num(first) => self - .parser - .consume_number::(first, self.allow_inf_nan) - .map_err(Into::into), - _ => Err(self.wrong_type(JsonType::Int, peak)), - } + self.parser + .consume_number::(peak.into_inner(), self.allow_inf_nan) + .map_err(|e| { + if !peak.is_num() { + self.wrong_type(JsonType::Int, peak) + } else { + e.into() + } + }) } /// Assuming the next value is a float, consume it. Error if it is not a float, or is invalid JSON. @@ -144,13 +148,15 @@ impl<'j> Jiter<'j> { /// Knowing the next value is a float, parse it. pub fn known_float(&mut self, peak: Peak) -> JiterResult { - match peak { - Peak::Num(first) => self - .parser - .consume_number::(first, self.allow_inf_nan) - .map_err(Into::into), - _ => Err(self.wrong_type(JsonType::Int, peak)), - } + self.parser + .consume_number::(peak.into_inner(), self.allow_inf_nan) + .map_err(|e| { + if !peak.is_num() { + self.wrong_type(JsonType::Float, peak) + } else { + e.into() + } + }) } /// Assuming the next value is a number, consume it and return bytes from the original JSON data. @@ -161,12 +167,18 @@ impl<'j> Jiter<'j> { /// Knowing the next value is a number, parse it and return bytes from the original JSON data. fn known_number_bytes(&mut self, peak: Peak) -> JiterResult<&[u8]> { - match peak { - Peak::Num(first) => { - let range = self.parser.consume_number::(first, self.allow_inf_nan)?; - Ok(&self.data[range]) + match self + .parser + .consume_number::(peak.into_inner(), self.allow_inf_nan) + { + Ok(range) => Ok(&self.data[range]), + Err(e) => { + if !peak.is_num() { + Err(self.wrong_type(JsonType::Float, peak)) + } else { + Err(e.into()) + } } - _ => Err(self.wrong_type(JsonType::Float, peak)), } } @@ -299,9 +311,10 @@ impl<'j> Jiter<'j> { Peak::True | Peak::False => JiterError::wrong_type(expected, JsonType::Bool, self.parser.index), Peak::Null => JiterError::wrong_type(expected, JsonType::Null, self.parser.index), Peak::String => JiterError::wrong_type(expected, JsonType::String, self.parser.index), - Peak::Num(first) => self.wrong_num(first, expected), Peak::Array => JiterError::wrong_type(expected, JsonType::Array, self.parser.index), Peak::Object => JiterError::wrong_type(expected, JsonType::Object, self.parser.index), + _ if peak.is_num() => self.wrong_num(peak.into_inner(), expected), + _ => json_error!(ExpectedSomeValue, self.parser.index).into(), } } diff --git a/src/parse.rs b/src/parse.rs index e1cf868..4d94b58 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -2,33 +2,60 @@ use crate::errors::{json_err, JsonResult, LinePosition}; use crate::number_decoder::AbstractNumberDecoder; use crate::string_decoder::{AbstractStringDecoder, Tape}; -/// Enum used to describe the next expected value in JSON. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Peak { - Null, - True, - False, - // we keep the first character of the number as we'll need it when decoding - Num(u8), - String, - Array, - Object, +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct Peak(u8); + +#[allow(non_upper_case_globals)] // while testing +impl Peak { + pub const Null: Self = Self(b'n'); + pub const True: Self = Self(b't'); + pub const False: Self = Self(b'f'); + pub const Zero: Self = Self(b'0'); + pub const One: Self = Self(b'1'); + pub const Two: Self = Self(b'2'); + pub const Three: Self = Self(b'3'); + pub const Four: Self = Self(b'4'); + pub const Five: Self = Self(b'5'); + pub const Six: Self = Self(b'6'); + pub const Seven: Self = Self(b'7'); + pub const Eight: Self = Self(b'8'); + pub const Nine: Self = Self(b'9'); + pub const Minus: Self = Self(b'-'); + pub const Plus: Self = Self(b'+'); + pub const Infinity: Self = Self(b'I'); + pub const NaN: Self = Self(b'N'); + pub const String: Self = Self(b'"'); + pub const Array: Self = Self(b'['); + pub const Object: Self = Self(b'{'); } impl Peak { - fn new(next: u8) -> Option { - match next { - b'[' => Some(Self::Array), - b'{' => Some(Self::Object), - b'"' => Some(Self::String), - b't' => Some(Self::True), - b'f' => Some(Self::False), - b'n' => Some(Self::Null), - b'0'..=b'9' => Some(Self::Num(next)), - // `-` negative, `I` Infinity, `N` NaN - b'-' | b'I' | b'N' => Some(Self::Num(next)), - _ => None, - } + const fn new(next: u8) -> Self { + Self(next) + } + + pub const fn is_num(self) -> bool { + matches!( + self, + Self::Zero + | Self::One + | Self::Two + | Self::Three + | Self::Four + | Self::Five + | Self::Six + | Self::Seven + | Self::Eight + | Self::Nine + | Self::Minus + | Self::Plus + | Self::Infinity + | Self::NaN + ) + } + + pub const fn into_inner(self) -> u8 { + self.0 } } @@ -57,10 +84,7 @@ impl<'j> Parser<'j> { pub fn peak(&mut self) -> JsonResult { if let Some(next) = self.eat_whitespace() { - match Peak::new(next) { - Some(p) => Ok(p), - None => json_err!(ExpectedSomeValue, self.index), - } + Ok(Peak::new(next)) } else { json_err!(EofWhileParsingValue, self.index) } @@ -73,7 +97,7 @@ impl<'j> Parser<'j> { self.index += 1; Ok(None) } else { - self.array_peak() + Ok(Some(Peak::new(next))) } } else { json_err!(EofWhileParsingList, self.index) @@ -85,7 +109,12 @@ impl<'j> Parser<'j> { match next { b',' => { self.index += 1; - self.array_peak() + let next = self.array_peak()?; + if next.is_none() { + json_err!(TrailingComma, self.index) + } else { + Ok(next) + } } b']' => { self.index += 1; @@ -216,16 +245,9 @@ impl<'j> Parser<'j> { fn array_peak(&mut self) -> JsonResult> { if let Some(next) = self.eat_whitespace() { - match Peak::new(next) { - Some(p) => Ok(Some(p)), - None => { - // if next is a `]`, we have a "trailing comma" error - if next == b']' { - json_err!(TrailingComma, self.index) - } else { - json_err!(ExpectedSomeValue, self.index) - } - } + match next { + b']' => Ok(None), + _ => Ok(Some(Peak::new(next))), } } else { json_err!(EofWhileParsingValue, self.index) diff --git a/src/python.rs b/src/python.rs index 3391457..a35c648 100644 --- a/src/python.rs +++ b/src/python.rs @@ -75,14 +75,6 @@ impl<'j> PythonParser<'j> { let s = self.parser.consume_string::(&mut self.tape)?; Ok(StringCache::get(py, s.as_str())) } - Peak::Num(first) => { - let n = self.parser.consume_number::(first, self.allow_inf_nan)?; - match n { - NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), - NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), - NumberAny::Float(float) => Ok(float.to_object(py)), - } - } Peak::Array => { let list = if let Some(peak_first) = self.parser.array_first()? { let mut vec: SmallVec<[PyObject; 8]> = SmallVec::with_capacity(8); @@ -125,6 +117,16 @@ impl<'j> PythonParser<'j> { } Ok(dict.to_object(py)) } + _ => { + let n = self + .parser + .consume_number::(peak.into_inner(), self.allow_inf_nan)?; + match n { + NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), + NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), + NumberAny::Float(float) => Ok(float.to_object(py)), + } + } } } diff --git a/src/value.rs b/src/value.rs index 448eed4..833f7a8 100644 --- a/src/value.rs +++ b/src/value.rs @@ -97,14 +97,6 @@ pub(crate) fn take_value( let s = parser.consume_string::(tape)?; Ok(JsonValue::Str(s.into())) } - Peak::Num(first) => { - let n = parser.consume_number::(first, allow_inf_nan)?; - match n { - NumberAny::Int(NumberInt::Int(int)) => Ok(JsonValue::Int(int)), - NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(JsonValue::BigInt(big_int)), - NumberAny::Float(float) => Ok(JsonValue::Float(float)), - } - } Peak::Array => { // we could do something clever about guessing the size of the array let mut array: SmallVec<[JsonValue; 8]> = SmallVec::new(); @@ -144,5 +136,13 @@ pub(crate) fn take_value( Ok(JsonValue::Object(Arc::new(object))) } + _ => { + let n = parser.consume_number::(peak.into_inner(), allow_inf_nan)?; + match n { + NumberAny::Int(NumberInt::Int(int)) => Ok(JsonValue::Int(int)), + NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(JsonValue::BigInt(big_int)), + NumberAny::Float(float) => Ok(JsonValue::Float(float)), + } + } } } diff --git a/tests/main.rs b/tests/main.rs index 74a1a54..8a7e704 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -36,10 +36,6 @@ fn json_vec(jiter: &mut Jiter, peak: Option) -> JiterResult> { let str = jiter.known_str()?; v.push(format!("String({str}) @ {position}")); } - Peak::Num(_) => { - let s = display_number(peak, jiter)?; - v.push(s); - } Peak::Array => { v.push(format!("[ @ {position}")); if let Some(peak) = jiter.known_array()? { @@ -66,6 +62,10 @@ fn json_vec(jiter: &mut Jiter, peak: Option) -> JiterResult> { } v.push("}".to_string()); } + _ => { + let s = display_number(peak, jiter)?; + v.push(s); + } }; Ok(v) } @@ -349,7 +349,7 @@ fn invalid_unicode_code() { fn nan_disallowed() { let json = r#"[NaN]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Num(b'N')); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::NaN); let e = jiter.next_number().unwrap_err(); assert_eq!( e.error_type, @@ -363,7 +363,7 @@ fn nan_disallowed() { fn inf_disallowed() { let json = r#"[Infinity]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Num(b'I')); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Infinity); let e = jiter.next_number().unwrap_err(); assert_eq!( e.error_type, @@ -377,7 +377,7 @@ fn inf_disallowed() { fn inf_neg_disallowed() { let json = r#"[-Infinity]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Num(b'-')); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Minus); let e = jiter.next_number().unwrap_err(); assert_eq!(e.error_type, JiterErrorType::JsonError(JsonErrorType::InvalidNumber)); assert_eq!(e.index, 2); @@ -388,7 +388,7 @@ fn inf_neg_disallowed() { fn nan_disallowed_wrong_type() { let json = r#"[NaN]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Num(b'N')); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::NaN); let e = jiter.next_str().unwrap_err(); assert_eq!( e.error_type, @@ -630,9 +630,9 @@ fn jiter_object() { assert_eq!(jiter.next_object().unwrap(), Some("foo")); assert_eq!(jiter.next_str().unwrap(), "bar"); assert_eq!(jiter.next_key().unwrap(), Some("spam")); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::Num(b'1'))); + assert_eq!(jiter.next_array().unwrap(), Some(Peak::One)); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'-'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Minus)); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(-2)); assert_eq!(jiter.array_step().unwrap(), Some(Peak::String)); assert_eq!(jiter.next_bytes().unwrap(), b"x"); @@ -644,11 +644,11 @@ fn jiter_object() { #[test] fn jiter_inf() { let mut jiter = Jiter::new(b"[Infinity, -Infinity, NaN]", true); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::Num(b'I'))); + assert_eq!(jiter.next_array().unwrap(), Some(Peak::Infinity)); assert_eq!(jiter.next_float().unwrap(), f64::INFINITY); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'-'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Minus)); assert_eq!(jiter.next_float().unwrap(), f64::NEG_INFINITY); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'N'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::NaN)); assert_eq!(jiter.next_float().unwrap().to_string(), "NaN"); assert_eq!(jiter.array_step().unwrap(), None); jiter.finish().unwrap(); @@ -681,20 +681,20 @@ fn jiter_bytes() { #[test] fn jiter_number() { let mut jiter = Jiter::new(br#" [1, 2.2, 3, 4.1, 5.67]"#, false); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::Num(b'1'))); + assert_eq!(jiter.next_array().unwrap(), Some(Peak::One)); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'2'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Two)); assert_eq!(jiter.next_float().unwrap(), 2.2); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'3'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Three)); let n = jiter.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(3))); let n_float: f64 = n.into(); assert_eq!(n_float, 3.0); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'4'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Four)); assert_eq!(jiter.next_number().unwrap(), NumberAny::Float(4.1)); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'5'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Five)); assert_eq!(jiter.next_number_bytes().unwrap(), b"5.67"); assert_eq!(jiter.array_step().unwrap(), None); jiter.finish().unwrap(); @@ -726,7 +726,7 @@ fn jiter_empty_array() { #[test] fn jiter_trailing_bracket() { let mut jiter = Jiter::new(b"[1]]", false); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::Num(b'1'))); + assert_eq!(jiter.next_array().unwrap(), Some(Peak::One)); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); assert!(jiter.array_step().unwrap().is_none()); let e = jiter.finish().unwrap_err(); @@ -914,17 +914,17 @@ fn readme_jiter() { fn jiter_clone() { let json = r#"[1, 2]"#; let mut jiter1 = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter1.next_array().unwrap().unwrap(), Peak::Num(b'1')); + assert_eq!(jiter1.next_array().unwrap().unwrap(), Peak::One); let n = jiter1.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(1))); let mut jiter2 = jiter1.clone(); - assert_eq!(jiter1.array_step().unwrap().unwrap(), Peak::Num(b'2')); + assert_eq!(jiter1.array_step().unwrap().unwrap(), Peak::Two); let n = jiter1.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(2))); - assert_eq!(jiter2.array_step().unwrap().unwrap(), Peak::Num(b'2')); + assert_eq!(jiter2.array_step().unwrap().unwrap(), Peak::Two); let n = jiter2.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(2)));