From ae43dc16d46465313fd109dd5b48f3af2520027c Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 17 Jan 2024 09:28:15 +0000 Subject: [PATCH 1/4] fix with python parsing seen on pydantic-core --- src/python.rs | 26 +++++++++++++------------- tests/main.rs | 1 + tests/python.rs | 12 ++++++++++++ 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/python.rs b/src/python.rs index 68009b4..af11600 100644 --- a/src/python.rs +++ b/src/python.rs @@ -59,6 +59,10 @@ struct PythonParser<'j> { impl<'j> PythonParser<'j> { fn py_take_value(&mut self, py: Python, peek: Peek) -> JsonResult { match peek { + Peek::Null => { + self.parser.consume_null()?; + Ok(py.None()) + } Peek::True => { self.parser.consume_true()?; Ok(true.to_object(py)) @@ -67,9 +71,15 @@ impl<'j> PythonParser<'j> { self.parser.consume_false()?; Ok(false.to_object(py)) } - Peek::Null => { - self.parser.consume_null()?; - Ok(py.None()) + Peek::Minus | Peek::Infinity | Peek::NaN => { + let n = self + .parser + .consume_number::(peek.into_inner(), self.allow_inf_nan)?; + match n { + NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), + NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), + NumberAny::Float(float) => Ok(float.to_object(py)), + } } Peek::String => { let s = self.parser.consume_string::(&mut self.tape)?; @@ -117,16 +127,6 @@ impl<'j> PythonParser<'j> { } Ok(dict.to_object(py)) } - _ => { - let n = self - .parser - .consume_number::(peek.into_inner(), self.allow_inf_nan)?; - match n { - NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), - NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), - NumberAny::Float(float) => Ok(float.to_object(py)), - } - } } } diff --git a/tests/main.rs b/tests/main.rs index 88128d6..6b3ccb0 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -230,6 +230,7 @@ single_tests! { second_line: err => "[1\nx]", "ExpectedListCommaOrEnd @ 2:1"; floats_error: err => "06", "InvalidNumber @ 1:2"; unexpect_value: err => "[\u{16}\u{8}", "ExpectedSomeValue @ 1:2"; + unexpect_value_xx: err => "xx", "ExpectedSomeValue @ 1:1"; } #[test] diff --git a/tests/python.rs b/tests/python.rs index 97ac682..c1a124c 100644 --- a/tests/python.rs +++ b/tests/python.rs @@ -126,3 +126,15 @@ fn test_recursion_limit_incr() { assert_eq!(v.as_ref(py).len().unwrap(), 2000); }); } + +#[test] +fn test_exected_value_error() { + let json = "xx"; + let bytes = json.as_bytes(); + + Python::with_gil(|py| { + let r = python_parse(py, bytes, false, true); + let e = r.map_err(|e| map_json_error(bytes, &e)).unwrap_err(); + assert_eq!(e.to_string(), "ValueError: expected value at line 1 column 1"); + }) +} From 47683a8cd9658bd551a18e1f816dcfb09e611eeb Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 17 Jan 2024 10:24:46 +0000 Subject: [PATCH 2/4] python parsing, add python to clippy --- .pre-commit-config.yaml | 2 +- src/python.rs | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e7f13a4..68e70b0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: pass_filenames: false - id: clippy name: Clippy - entry: cargo clippy -- -D warnings -A incomplete_features -W clippy::dbg_macro -W clippy::print_stdout + entry: cargo clippy -F python -- -D warnings -A incomplete_features -W clippy::dbg_macro -W clippy::print_stdout types: [rust] language: system pass_filenames: false diff --git a/src/python.rs b/src/python.rs index af11600..132b5c6 100644 --- a/src/python.rs +++ b/src/python.rs @@ -71,16 +71,8 @@ impl<'j> PythonParser<'j> { self.parser.consume_false()?; Ok(false.to_object(py)) } - Peek::Minus | Peek::Infinity | Peek::NaN => { - let n = self - .parser - .consume_number::(peek.into_inner(), self.allow_inf_nan)?; - match n { - NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), - NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), - NumberAny::Float(float) => Ok(float.to_object(py)), - } - } + Peek::Minus | Peek::Infinity | Peek::NaN => self._take_num(py, peek), + _ if peek.is_num() => self._take_num(py, peek), Peek::String => { let s = self.parser.consume_string::(&mut self.tape)?; Ok(StringCache::get(py, s.as_str())) @@ -127,6 +119,18 @@ impl<'j> PythonParser<'j> { } Ok(dict.to_object(py)) } + _ => json_err!(ExpectedSomeValue, self.parser.index), + } + } + + fn _take_num(&mut self, py: Python, peek: Peek) -> JsonResult { + let n = self + .parser + .consume_number::(peek.into_inner(), self.allow_inf_nan)?; + match n { + NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), + NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), + NumberAny::Float(float) => Ok(float.to_object(py)), } } From 5ceb41e290b8efcafee54588837244aba3c81985 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 17 Jan 2024 10:27:43 +0000 Subject: [PATCH 3/4] simplify change --- src/python.rs | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/python.rs b/src/python.rs index 132b5c6..bb60d28 100644 --- a/src/python.rs +++ b/src/python.rs @@ -71,8 +71,16 @@ impl<'j> PythonParser<'j> { self.parser.consume_false()?; Ok(false.to_object(py)) } - Peek::Minus | Peek::Infinity | Peek::NaN => self._take_num(py, peek), - _ if peek.is_num() => self._take_num(py, peek), + _ if peek.is_num() => { + let n = self + .parser + .consume_number::(peek.into_inner(), self.allow_inf_nan)?; + match n { + NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), + NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), + NumberAny::Float(float) => Ok(float.to_object(py)), + } + } Peek::String => { let s = self.parser.consume_string::(&mut self.tape)?; Ok(StringCache::get(py, s.as_str())) @@ -123,17 +131,6 @@ impl<'j> PythonParser<'j> { } } - fn _take_num(&mut self, py: Python, peek: Peek) -> JsonResult { - let n = self - .parser - .consume_number::(peek.into_inner(), self.allow_inf_nan)?; - match n { - NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), - NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), - NumberAny::Float(float) => Ok(float.to_object(py)), - } - } - fn _check_take_value(&mut self, py: Python, peek: Peek) -> JsonResult { self.recursion_limit = match self.recursion_limit.checked_sub(1) { Some(limit) => limit, From 9479c0768c6fbf10cdb2b8bfad91ffcade4c669a Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 17 Jan 2024 10:31:27 +0000 Subject: [PATCH 4/4] revert change, use Jiter solution --- src/python.rs | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/python.rs b/src/python.rs index bb60d28..e2e11af 100644 --- a/src/python.rs +++ b/src/python.rs @@ -9,7 +9,7 @@ use pyo3::{ffi, AsPyPointer}; use hashbrown::hash_map::{HashMap, RawEntryMut}; use smallvec::SmallVec; -use crate::errors::{json_err, JsonError, JsonResult, DEFAULT_RECURSION_LIMIT}; +use crate::errors::{json_err, json_error, JsonError, JsonResult, DEFAULT_RECURSION_LIMIT}; use crate::number_decoder::{NumberAny, NumberInt}; use crate::parse::{Parser, Peek}; use crate::string_decoder::{StringDecoder, Tape}; @@ -71,16 +71,6 @@ impl<'j> PythonParser<'j> { self.parser.consume_false()?; Ok(false.to_object(py)) } - _ if peek.is_num() => { - let n = self - .parser - .consume_number::(peek.into_inner(), self.allow_inf_nan)?; - match n { - NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), - NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), - NumberAny::Float(float) => Ok(float.to_object(py)), - } - } Peek::String => { let s = self.parser.consume_string::(&mut self.tape)?; Ok(StringCache::get(py, s.as_str())) @@ -127,7 +117,23 @@ impl<'j> PythonParser<'j> { } Ok(dict.to_object(py)) } - _ => json_err!(ExpectedSomeValue, self.parser.index), + _ => { + let n = self + .parser + .consume_number::(peek.into_inner(), self.allow_inf_nan); + match n { + Ok(NumberAny::Int(NumberInt::Int(int))) => Ok(int.to_object(py)), + Ok(NumberAny::Int(NumberInt::BigInt(big_int))) => Ok(big_int.to_object(py)), + Ok(NumberAny::Float(float)) => Ok(float.to_object(py)), + Err(e) => { + if !peek.is_num() { + Err(json_error!(ExpectedSomeValue, self.parser.index)) + } else { + Err(e) + } + } + } + } } }