From c48345be7281a996b87beaf97b78d3a9ded23350 Mon Sep 17 00:00:00 2001 From: Roma Sokolov Date: Sat, 12 Dec 2015 00:07:12 +0000 Subject: [PATCH 1/5] Add helper function to parse stream of json values Many modern web APIs return their data as stream of (sometimes newline delimited) JSON values. This commits add handy function `parse_stream` that returns Iterator over parsed values. --- json/src/de.rs | 51 +++++++++++++++++++++++++++++++++++ json/src/lib.rs | 4 ++- json_tests/tests/test_json.rs | 18 +++++++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/json/src/de.rs b/json/src/de.rs index 08b8ebf96..d7886f978 100644 --- a/json/src/de.rs +++ b/json/src/de.rs @@ -6,6 +6,7 @@ use std::char; use std::i32; use std::io; use std::str; +use core::marker::PhantomData; use serde::de; use serde::iter::LineColIterator; @@ -828,3 +829,53 @@ pub fn from_str(s: &str) -> Result { from_slice(s.as_bytes()) } + +/// Iterator over JSON values +pub struct JSONStream + where Iter: Iterator>, + T: de::Deserialize +{ + deser: Deserializer, + _marker: PhantomData, +} + +impl JSONStream + where Iter:Iterator>, + T: de::Deserialize { + fn new(i: Iter) -> JSONStream { + JSONStream { + deser: Deserializer::new(i), + _marker: PhantomData + } + } +} + +impl Iterator for JSONStream + where Iter:Iterator>, + T: de::Deserialize { + type Item = Result; + fn next(&mut self) -> Option> { + match de::Deserialize::deserialize(&mut self.deser) { + Ok(v) => Some(Ok(v)), + Err(e) => { + match e { + Error::SyntaxError( + ErrorCode::EOFWhileParsingValue, _, _) => + match self.deser.end() { + Ok(_) => None, + Err(e) => Some(Err(e)) + }, + _ => Some(Err(e)) + } + } + } + } +} + +/// Returns Iterator of decoded JSON value from an iterator over +/// `Iterator>`. +pub fn parse_stream(i: Iter) -> JSONStream + where Iter: Iterator>, + T: de::Deserialize { + JSONStream::new(i) +} diff --git a/json/src/lib.rs b/json/src/lib.rs index 815d06486..782915b8f 100644 --- a/json/src/lib.rs +++ b/json/src/lib.rs @@ -115,8 +115,8 @@ //! ``` #![deny(missing_docs)] - extern crate num; +extern crate core; extern crate serde; pub use self::de::{ @@ -125,6 +125,8 @@ pub use self::de::{ from_reader, from_slice, from_str, + parse_stream, + JSONStream, }; pub use self::error::{Error, ErrorCode, Result}; pub use self::ser::{ diff --git a/json_tests/tests/test_json.rs b/json_tests/tests/test_json.rs index d5ad4c9a9..5465b3849 100644 --- a/json_tests/tests/test_json.rs +++ b/json_tests/tests/test_json.rs @@ -14,6 +14,8 @@ use serde_json::{ Value, from_str, from_value, + parse_stream, + JSONStream, to_value, }; @@ -1393,3 +1395,19 @@ fn test_byte_buf_de() { let v: ByteBuf = serde_json::from_str("[1, 2, 3]").unwrap(); assert_eq!(v, bytes); } + +#[test] +fn test_parse_stream() { + let stream = "{\"x\":40}{\"x\":41}\n{\"x\":42}".to_string(); + let mut parsed:JSONStream = parse_stream(stream + .as_bytes() + .iter() + .map(|byte| Ok(*byte))); + assert_eq!(parsed.next().unwrap().ok().unwrap().lookup("x").unwrap(), + &Value::U64(40)); + assert_eq!(parsed.next().unwrap().ok().unwrap().lookup("x").unwrap(), + &Value::U64(41)); + assert_eq!(parsed.next().unwrap().ok().unwrap().lookup("x").unwrap(), + &Value::U64(42)); + assert!(parsed.next().is_none()); +} From e54f13c54d020f7c7e9a65791eaf67fa2f1bd0b5 Mon Sep 17 00:00:00 2001 From: Roma Sokolov Date: Wed, 13 Jan 2016 19:20:32 +0000 Subject: [PATCH 2/5] Apply review comments: * `parse_stream` removed in favor of direct `JSONStream` usage; * truncated input no longer silently ignored; * added test for errors; * style violation fixed. --- json/src/de.rs | 43 ++++++++++++++--------------------- json/src/lib.rs | 7 +++--- json_tests/tests/test_json.rs | 26 ++++++++++++++------- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/json/src/de.rs b/json/src/de.rs index d7886f978..6d6077373 100644 --- a/json/src/de.rs +++ b/json/src/de.rs @@ -6,7 +6,7 @@ use std::char; use std::i32; use std::io; use std::str; -use core::marker::PhantomData; +use std::marker::PhantomData; use serde::de; use serde::iter::LineColIterator; @@ -830,7 +830,7 @@ pub fn from_str(s: &str) -> Result from_slice(s.as_bytes()) } -/// Iterator over JSON values +/// Iterator over parsed JSON values pub struct JSONStream where Iter: Iterator>, T: de::Deserialize @@ -839,10 +839,13 @@ pub struct JSONStream _marker: PhantomData, } -impl JSONStream +/// Iterator over parsed JSON values +impl JSONStream where Iter:Iterator>, T: de::Deserialize { - fn new(i: Iter) -> JSONStream { + /// Returns Iterator of decoded JSON value from an iterator over + /// `Iterator>`. + pub fn new(i: Iter) -> JSONStream { JSONStream { deser: Deserializer::new(i), _marker: PhantomData @@ -850,32 +853,20 @@ impl JSONStream } } -impl Iterator for JSONStream +impl Iterator for JSONStream where Iter:Iterator>, T: de::Deserialize { type Item = Result; fn next(&mut self) -> Option> { - match de::Deserialize::deserialize(&mut self.deser) { - Ok(v) => Some(Ok(v)), - Err(e) => { - match e { - Error::SyntaxError( - ErrorCode::EOFWhileParsingValue, _, _) => - match self.deser.end() { - Ok(_) => None, - Err(e) => Some(Err(e)) - }, - _ => Some(Err(e)) - } - } + match self.deser.eof() { + Ok(true) => None, + Ok(false) => match de::Deserialize::deserialize(&mut self.deser) { + Ok(v) => Some(Ok(v)), + // EOF is handled beforehand, so report any error + Err(e) => Some(Err(e)) + }, + // Should not happen, seek .eof() + Err(e) => Some(Err(e)) } } } - -/// Returns Iterator of decoded JSON value from an iterator over -/// `Iterator>`. -pub fn parse_stream(i: Iter) -> JSONStream - where Iter: Iterator>, - T: de::Deserialize { - JSONStream::new(i) -} diff --git a/json/src/lib.rs b/json/src/lib.rs index 782915b8f..f28c5ae45 100644 --- a/json/src/lib.rs +++ b/json/src/lib.rs @@ -53,14 +53,14 @@ //! Address: Address, //! PhoneNumbers: Vec //! } -//! +//! //! #[derive(Serialize, Deserialize)] //! struct Address { //! Street: String, //! City: String, //! Country: String //! } -//! ``` +//! ``` //! //! # Type-based Serialization and Deserialization //! @@ -121,12 +121,11 @@ extern crate serde; pub use self::de::{ Deserializer, + JSONStream, from_iter, from_reader, from_slice, from_str, - parse_stream, - JSONStream, }; pub use self::error::{Error, ErrorCode, Result}; pub use self::ser::{ diff --git a/json_tests/tests/test_json.rs b/json_tests/tests/test_json.rs index 5465b3849..7c9336163 100644 --- a/json_tests/tests/test_json.rs +++ b/json_tests/tests/test_json.rs @@ -12,10 +12,9 @@ use serde::bytes::{ByteBuf, Bytes}; use serde_json::{ self, Value, + JSONStream, from_str, from_value, - parse_stream, - JSONStream, to_value, }; @@ -1397,12 +1396,12 @@ fn test_byte_buf_de() { } #[test] -fn test_parse_stream() { - let stream = "{\"x\":40}{\"x\":41}\n{\"x\":42}".to_string(); - let mut parsed:JSONStream = parse_stream(stream - .as_bytes() - .iter() - .map(|byte| Ok(*byte))); +fn test_json_stream_newlines() { + let stream = "{\"x\":39} {\"x\":40}{\"x\":41}\n{\"x\":42}".to_string(); + let mut parsed:JSONStream = JSONStream::new( + stream.as_bytes().iter().map(|byte| Ok(*byte))); + assert_eq!(parsed.next().unwrap().ok().unwrap().lookup("x").unwrap(), + &Value::U64(39)); assert_eq!(parsed.next().unwrap().ok().unwrap().lookup("x").unwrap(), &Value::U64(40)); assert_eq!(parsed.next().unwrap().ok().unwrap().lookup("x").unwrap(), @@ -1411,3 +1410,14 @@ fn test_parse_stream() { &Value::U64(42)); assert!(parsed.next().is_none()); } + +#[test] +fn test_json_stream_truncated() { + let stream = "{\"x\":40}\n{\"x\":".to_string(); + let mut parsed:JSONStream = JSONStream::new( + stream.as_bytes().iter().map(|byte| Ok(*byte))); + assert_eq!(parsed.next().unwrap().ok().unwrap().lookup("x").unwrap(), + &Value::U64(40)); + assert!(parsed.next().unwrap().is_err()); + assert!(parsed.next().is_none()); +} From 8f62d2f3229c1d89e9447c3a10419e2fcdf52e45 Mon Sep 17 00:00:00 2001 From: Roma Sokolov Date: Sat, 30 Jan 2016 19:43:08 +0000 Subject: [PATCH 3/5] [fix] Proper handling of trailing whitespaces --- json/src/de.rs | 18 +++++++++++------- json_tests/tests/test_json.rs | 10 ++++++++++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/json/src/de.rs b/json/src/de.rs index 6d6077373..86631bfde 100644 --- a/json/src/de.rs +++ b/json/src/de.rs @@ -858,15 +858,19 @@ impl Iterator for JSONStream T: de::Deserialize { type Item = Result; fn next(&mut self) -> Option> { - match self.deser.eof() { - Ok(true) => None, - Ok(false) => match de::Deserialize::deserialize(&mut self.deser) { + // skip whitespaces, if any + // this helps with trailing whitespaces, since whitespaces between + // values are handled for us. + let _:Result<()> = self.deser.parse_whitespace(); + // Since Deserializer.eof() always return Ok(_), it's safe to + // call .ok() here + if self.deser.eof().ok().unwrap() { + None + } else { + match de::Deserialize::deserialize(&mut self.deser) { Ok(v) => Some(Ok(v)), - // EOF is handled beforehand, so report any error Err(e) => Some(Err(e)) - }, - // Should not happen, seek .eof() - Err(e) => Some(Err(e)) + } } } } diff --git a/json_tests/tests/test_json.rs b/json_tests/tests/test_json.rs index 7c9336163..cb338d64f 100644 --- a/json_tests/tests/test_json.rs +++ b/json_tests/tests/test_json.rs @@ -1411,6 +1411,16 @@ fn test_json_stream_newlines() { assert!(parsed.next().is_none()); } +#[test] +fn test_json_stream_trailing_whitespaces() { + let stream = "{\"x\":42} \t\n".to_string(); + let mut parsed:JSONStream = JSONStream::new( + stream.as_bytes().iter().map(|byte| Ok(*byte))); + assert_eq!(parsed.next().unwrap().ok().unwrap().lookup("x").unwrap(), + &Value::U64(42)); + assert!(parsed.next().is_none()); +} + #[test] fn test_json_stream_truncated() { let stream = "{\"x\":40}\n{\"x\":".to_string(); From 34b3a1e9ce4762e1d652c0ab8af85b75d35c6c2b Mon Sep 17 00:00:00 2001 From: Roma Sokolov Date: Sat, 30 Jan 2016 19:43:19 +0000 Subject: [PATCH 4/5] [add] test for empty input --- json_tests/tests/test_json.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/json_tests/tests/test_json.rs b/json_tests/tests/test_json.rs index cb338d64f..b1ceeedf4 100644 --- a/json_tests/tests/test_json.rs +++ b/json_tests/tests/test_json.rs @@ -1431,3 +1431,11 @@ fn test_json_stream_truncated() { assert!(parsed.next().unwrap().is_err()); assert!(parsed.next().is_none()); } + +#[test] +fn test_json_stream_empty() { + let stream = "".to_string(); + let mut parsed:JSONStream = JSONStream::new( + stream.as_bytes().iter().map(|byte| Ok(*byte))); + assert!(parsed.next().is_none()); +} From 32f643f98686c79f373a1c6830be7666bb810dc0 Mon Sep 17 00:00:00 2001 From: Roma Sokolov Date: Fri, 5 Feb 2016 10:26:59 +0000 Subject: [PATCH 5/5] Fixed error handling .eof() can return Err, so do not try to unwrap() it unconditionally. Also, check result of .parse_whitespaces(), and report, if there is any errors. (thanks, @nixpulvis) --- json/src/de.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/json/src/de.rs b/json/src/de.rs index 86631bfde..9f526e479 100644 --- a/json/src/de.rs +++ b/json/src/de.rs @@ -861,16 +861,16 @@ impl Iterator for JSONStream // skip whitespaces, if any // this helps with trailing whitespaces, since whitespaces between // values are handled for us. - let _:Result<()> = self.deser.parse_whitespace(); - // Since Deserializer.eof() always return Ok(_), it's safe to - // call .ok() here - if self.deser.eof().ok().unwrap() { - None - } else { - match de::Deserialize::deserialize(&mut self.deser) { + if let Err(e) = self.deser.parse_whitespace() { + return Some(Err(e)) + }; + match self.deser.eof() { + Ok(true) => None, + Ok(false) => match de::Deserialize::deserialize(&mut self.deser) { Ok(v) => Some(Ok(v)), Err(e) => Some(Err(e)) - } + }, + Err(e) => Some(Err(e)) } } }