Add an OutputMode type parameter to drive parser results (#1631)

* add initial traits for GAT integration The Mode trait will be used to adapt the production of output or error values depending on the call site. If we do not care about the actual value or error, and just want to know if a parser was succssful or not, then the Mode trait allows us to signal it without producng the actual type. in Err, the Error and Failure variants get different types, because usually want to get the failure variant, while the error variant might change independently. The OutputMode trait also carries information about the streaming or complete status, and in the same way define which type of parser we want, directly at the call site * convert the Parser trait a new `process` method is used to handle the new OutputMode trait. The `OutputM` supporting structure is used to carry which modes we want to use, and depending on the case, we can call an inner parser using directly the mode we're in (example: Map), or we can convert it to Emit for Output (ex: MapRes because we have to apply the mapping function on the parser output and check for errors). We keep the `parse` method with `Emit` for both output and error, which allows us to convert combinators gradually: the ones using `parse` will work but won't benefit directly from these traits also parent and child combinators in a serie of parsers may support it * various optimizations it turns out that rustc is generating separate functions for each process() implementation, which tends to slow things down * raise minimal version to 1.65
rust-bakery · Jun 10, 2023 · 90d78d6 · 90d78d6
1 parent 71ba24f
commit 90d78d6
Show file tree

Hide file tree

Showing 53 changed files with 5,799 additions and 1,867 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -3,7 +3,7 @@ name: CI
 on: [push, pull_request]
 
 env:
-  RUST_MINVERSION: 1.56.0
+  RUST_MINVERSION: 1.65.0
   CARGO_INCREMENTAL: 0
   CARGO_NET_RETRY: 10
 
@@ -18,7 +18,7 @@ jobs:
           - stable
           - beta
           - nightly
-          - 1.56.0
+          - 1.65.0
 
         features:
           - ''

diff --git a/Cargo.toml b/Cargo.toml
@@ -115,6 +115,11 @@ name = "json"
 required-features = ["alloc"]
 path = "examples/json.rs"
 
+[[example]]
+name = "json2"
+required-features = ["alloc"]
+path = "examples/json2.rs"
+
 [[example]]
 name = "json_iterator"
 required-features = ["alloc"]

diff --git a/README.md b/README.md
@@ -46,6 +46,7 @@ use nom::{
   combinator::map_res,
   sequence::Tuple,
   IResult,
+  Parser,
 };
 
 #[derive(Debug, PartialEq)]
@@ -64,7 +65,10 @@ fn is_hex_digit(c: char) -> bool {
 }
 
 fn hex_primary(input: &str) -> IResult<&str, u8> {
-  map_res(take_while_m_n(2, 2, is_hex_digit), from_hex)(input)
+  map_res(
+    take_while_m_n(2, 2, is_hex_digit),
+    from_hex
+  ).parse(input)
 }
 
 fn hex_color(input: &str) -> IResult<&str, Color> {

diff --git a/benchmarks/benches/arithmetic.rs b/benchmarks/benches/arithmetic.rs
@@ -11,7 +11,7 @@ use nom::{
   combinator::map_res,
   multi::fold,
   sequence::{delimited, pair},
-  IResult,
+  IResult, Parser,
 };
 
 // Parser definition
@@ -29,7 +29,8 @@ fn factor(input: &[u8]) -> IResult<&[u8], i64> {
       delimited(char('('), expr, char(')')),
     )),
     space0,
-  )(input)
+  )
+  .parse(input)
 }
 
 // We read an initial factor and for each time we find
@@ -48,7 +49,8 @@ fn term(input: &[u8]) -> IResult<&[u8], i64> {
         acc / val
       }
     },
-  )(input)
+  )
+  .parse_complete(input)
 }
 
 fn expr(input: &[u8]) -> IResult<&[u8], i64> {
@@ -64,7 +66,8 @@ fn expr(input: &[u8]) -> IResult<&[u8], i64> {
         acc - val
       }
     },
-  )(input)
+  )
+  .parse_complete(input)
 }
 
 #[allow(clippy::eq_op, clippy::erasing_op)]

diff --git a/benchmarks/benches/http.rs b/benchmarks/benches/http.rs
@@ -4,7 +4,7 @@
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
 use criterion::*;
-use nom::{IResult, bytes::complete::{tag, take_while1}, character::complete::{line_ending, char}, multi::many};
+use nom::{IResult, bytes::{tag, take_while1}, character:: char, multi::many, OutputMode, Parser, PResult, error::Error, Mode, sequence::{preceded, delimited, separated_pair, terminated, pair}, OutputM, Emit, Complete};
 
 #[cfg_attr(rustfmt, rustfmt_skip)]
 #[derive(Debug)]
@@ -67,54 +67,40 @@ fn is_version(c: u8) -> bool {
   c >= b'0' && c <= b'9' || c == b'.'
 }
 
-fn request_line(input: &[u8]) -> IResult<&[u8], Request<'_>> {
-  let (input, method) = take_while1(is_token)(input)?;
-  let (input, _) = take_while1(is_space)(input)?;
-  let (input, uri) = take_while1(is_not_space)(input)?;
-  let (input, _) = take_while1(is_space)(input)?;
-  let (input, version) = http_version(input)?;
-  let (input, _) = line_ending(input)?;
-
-  Ok((input, Request {method, uri, version}))
+fn line_ending<'a>()-> impl Parser<&'a[u8], Output=&'a[u8], Error=Error<&'a[u8]>>  {
+  tag("\n").or(tag("\r\n"))
 }
 
-fn http_version(input: &[u8]) -> IResult<&[u8], &[u8]> {
-  let (input, _) = tag("HTTP/")(input)?;
-  let (input, version) = take_while1(is_version)(input)?;
-
-  Ok((input, version))
+fn request_line<'a>()-> impl Parser<&'a[u8], Output=Request<'a>, Error=Error<&'a[u8]>> {
+  (take_while1(is_token),  preceded(take_while1(is_space), take_while1(is_not_space)), delimited(take_while1(is_space), http_version(), line_ending()))
+  .map(|(method, uri, version)| Request {method, uri, version})
 }
 
-fn message_header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
-  let (input, _) = take_while1(is_horizontal_space)(input)?;
-  let (input, data) = take_while1(not_line_ending)(input)?;
-  let (input, _) = line_ending(input)?;
+fn http_version<'a>() -> impl Parser<&'a[u8], Output=&'a[u8], Error=Error<&'a[u8]>> {
 
-  Ok((input, data))
+  preceded(tag("HTTP/"), take_while1(is_version))
 }
 
-fn message_header(input: &[u8]) -> IResult<&[u8], Header<'_>> {
-  let (input, name) = take_while1(is_token)(input)?;
-  let (input, _) = char(':')(input)?;
-  let (input, value) = many(1.., message_header_value)(input)?;
+fn message_header_value<'a>() -> impl Parser<&'a[u8], Output=&'a[u8], Error=Error<&'a[u8]>> {
 
-  Ok((input, Header{ name, value }))
+  delimited(take_while1(is_horizontal_space),  take_while1(not_line_ending), line_ending())
 }
 
-fn request(input: &[u8]) -> IResult<&[u8], (Request<'_>, Vec<Header<'_>>)> {
-  let (input, req) = request_line(input)?;
-  let (input, h) = many(1.., message_header)(input)?;
-  let (input, _) = line_ending(input)?;
+fn message_header<'a>() ->  impl Parser<&'a[u8], Output=Header<'a>, Error=Error<&'a[u8]> >{
+  separated_pair(take_while1(is_token), char(':'), many(1.., message_header_value()))
+    .map(|(name, value)|Header{ name, value })
+}
 
-  Ok((input, (req, h)))
+fn request<'a>() -> impl Parser<&'a[u8], Output=(Request<'a>, Vec<Header<'a>>), Error=Error<&'a[u8]> > {
+  pair(request_line(), terminated(many(1.., message_header()), line_ending()))
 }
 
 
 fn parse(data: &[u8]) -> Option<Vec<(Request<'_>, Vec<Header<'_>>)>> {
   let mut buf = &data[..];
   let mut v = Vec::new();
   loop {
-    match request(buf) {
+    match request().process::<OutputM<Emit, Emit, Complete>>(buf) {
       Ok((b, r)) => {
         buf = b;
         v.push(r);

diff --git a/benchmarks/benches/http_streaming.rs b/benchmarks/benches/http_streaming.rs
@@ -4,7 +4,7 @@
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
 use criterion::*;
-use nom::{IResult, bytes::streaming::{tag, take_while1}, character::streaming::{line_ending, char}, multi::many};
+use nom::{IResult, bytes::streaming::{tag, take_while1}, character::streaming::{line_ending, char}, multi::many, Parser};
 
 #[cfg_attr(rustfmt, rustfmt_skip)]
 #[derive(Debug)]
@@ -96,15 +96,15 @@ fn message_header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
 fn message_header(input: &[u8]) -> IResult<&[u8], Header<'_>> {
   let (input, name) = take_while1(is_token)(input)?;
   let (input, _) = char(':')(input)?;
-  let (input, value) = many(1.., message_header_value)(input)?;
+  let (input, value) = many(1.., message_header_value).parse(input)?;
 
   Ok((input, Header{ name, value }))
 }
 
 fn request(input: &[u8]) -> IResult<&[u8], (Request<'_>, Vec<Header<'_>>)> {
   let (input, req) = request_line(input)?;
-  let (input, h) = many(1.., message_header)(input)?;
-  let (input, _) = line_ending(input)?;
+  let (input, h) = many(1.., message_header).parse(input)?;
+    let (input, _) = line_ending(input)?;
 
   Ok((input, (req, h)))
 }

diff --git a/benchmarks/benches/ini.rs b/benchmarks/benches/ini.rs
@@ -11,7 +11,7 @@ use nom::{
   combinator::{map_res, opt},
   multi::many,
   sequence::{delimited, pair, separated_pair, terminated, tuple},
-  IResult,
+  IResult, Parser,
 };
 use std::collections::HashMap;
 use std::str;
@@ -20,14 +20,16 @@ fn category(i: &[u8]) -> IResult<&[u8], &str> {
   map_res(
     delimited(char('['), take_while(|c| c != b']'), char(']')),
     str::from_utf8,
-  )(i)
+  )
+  .parse_complete(i)
 }
 
 fn key_value(i: &[u8]) -> IResult<&[u8], (&str, &str)> {
-  let (i, key) = map_res(alphanumeric, str::from_utf8)(i)?;
-  let (i, _) = tuple((opt(space), char('='), opt(space)))(i)?;
-  let (i, val) = map_res(take_while(|c| c != b'\n' && c != b';'), str::from_utf8)(i)?;
-  let (i, _) = opt(pair(char(';'), take_while(|c| c != b'\n')))(i)?;
+  let (i, key) = map_res(alphanumeric, str::from_utf8).parse_complete(i)?;
+  let (i, _) = tuple((opt(space), char('='), opt(space))).parse_complete(i)?;
+  let (i, val) =
+    map_res(take_while(|c| c != b'\n' && c != b';'), str::from_utf8).parse_complete(i)?;
+  let (i, _) = opt(pair(char(';'), take_while(|c| c != b'\n'))).parse_complete(i)?;
   Ok((i, (key, val)))
 }
 
@@ -39,7 +41,8 @@ fn categories(i: &[u8]) -> IResult<&[u8], HashMap<&str, HashMap<&str, &str>>> {
       opt(multispace),
       many(0.., terminated(key_value, opt(multispace))),
     ),
-  )(i)
+  )
+  .parse_complete(i)
 }
 
 fn bench_ini(c: &mut Criterion) {
@@ -67,7 +70,7 @@ file=payroll.dat
 \0";
 
   fn acc(i: &[u8]) -> IResult<&[u8], Vec<(&str, &str)>> {
-    many(0.., key_value)(i)
+    many(0.., key_value).parse_complete(i)
   }
 
   let mut group = c.benchmark_group("ini keys and values");

diff --git a/benchmarks/benches/ini_str.rs b/benchmarks/benches/ini_str.rs
@@ -9,7 +9,7 @@ use nom::{
   combinator::opt,
   multi::many,
   sequence::{delimited, pair, terminated, tuple},
-  IResult,
+  IResult, Parser,
 };
 
 use std::collections::HashMap;
@@ -26,29 +26,30 @@ fn category(i: &str) -> IResult<&str, &str> {
   terminated(
     delimited(char('['), take_while(|c| c != ']'), char(']')),
     opt(is_a(" \r\n")),
-  )(i)
+  )
+  .parse(i)
 }
 
 fn key_value(i: &str) -> IResult<&str, (&str, &str)> {
   let (i, key) = alphanumeric(i)?;
   let (i, _) = tuple((opt(space), tag("="), opt(space)))(i)?;
   let (i, val) = take_till(is_line_ending_or_comment)(i)?;
-  let (i, _) = opt(space)(i)?;
-  let (i, _) = opt(pair(tag(";"), not_line_ending))(i)?;
-  let (i, _) = opt(space_or_line_ending)(i)?;
+  let (i, _) = opt(space).parse_complete(i)?;
+  let (i, _) = opt(pair(tag(";"), not_line_ending)).parse_complete(i)?;
+  let (i, _) = opt(space_or_line_ending).parse_complete(i)?;
   Ok((i, (key, val)))
 }
 
 fn keys_and_values(input: &str) -> IResult<&str, HashMap<&str, &str>> {
-  many(0.., key_value)(input)
+  many(0.., key_value).parse_complete(input)
 }
 
 fn category_and_keys(i: &str) -> IResult<&str, (&str, HashMap<&str, &str>)> {
-  pair(category, keys_and_values)(i)
+  pair(category, keys_and_values).parse_complete(i)
 }
 
 fn categories(input: &str) -> IResult<&str, HashMap<&str, HashMap<&str, &str>>> {
-  many(0.., category_and_keys)(input)
+  many(0.., category_and_keys).parse_complete(input)
 }
 
 fn bench_ini_str(c: &mut Criterion) {