Skip to content

Commit

Permalink
Add an OutputMode type parameter to drive parser results (#1631)
Browse files Browse the repository at this point in the history
* add initial traits for GAT integration

The Mode trait will be used to adapt the production of output or error
values depending on the call site. If we do not care about the actual
value or error, and just want to know if a parser was succssful or not,
then the Mode trait allows us to signal it without producng the actual
type.

in Err, the Error and Failure variants get different types, because
usually want to get the failure variant, while the error variant might
change independently.

The OutputMode trait also carries information about the streaming or
complete status, and in the same way define which type of parser we
want, directly at the call site

* convert the Parser trait

a new `process` method is used to handle the new OutputMode trait.
The `OutputM` supporting structure is used to carry which modes we want
to use, and depending on the case, we can call an inner parser using
directly the mode we're in (example: Map), or we can convert it to Emit
for Output (ex: MapRes because we have to apply the mapping function on
the parser output and check for errors).

We keep the `parse` method with `Emit` for both output and error, which
allows us to convert combinators gradually: the ones using `parse` will
work but won't benefit directly from these traits also parent and child
combinators in a serie of parsers may support it

* various optimizations

it turns out that rustc is generating separate functions for each
process() implementation, which tends to slow things down

* raise minimal version to 1.65
  • Loading branch information
Geal authored Jun 10, 2023
1 parent 71ba24f commit 90d78d6
Show file tree
Hide file tree
Showing 53 changed files with 5,799 additions and 1,867 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: CI
on: [push, pull_request]

env:
RUST_MINVERSION: 1.56.0
RUST_MINVERSION: 1.65.0
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10

Expand All @@ -18,7 +18,7 @@ jobs:
- stable
- beta
- nightly
- 1.56.0
- 1.65.0

features:
- ''
Expand Down
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ name = "json"
required-features = ["alloc"]
path = "examples/json.rs"

[[example]]
name = "json2"
required-features = ["alloc"]
path = "examples/json2.rs"

[[example]]
name = "json_iterator"
required-features = ["alloc"]
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ use nom::{
combinator::map_res,
sequence::Tuple,
IResult,
Parser,
};

#[derive(Debug, PartialEq)]
Expand All @@ -64,7 +65,10 @@ fn is_hex_digit(c: char) -> bool {
}

fn hex_primary(input: &str) -> IResult<&str, u8> {
map_res(take_while_m_n(2, 2, is_hex_digit), from_hex)(input)
map_res(
take_while_m_n(2, 2, is_hex_digit),
from_hex
).parse(input)
}

fn hex_color(input: &str) -> IResult<&str, Color> {
Expand Down
11 changes: 7 additions & 4 deletions benchmarks/benches/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use nom::{
combinator::map_res,
multi::fold,
sequence::{delimited, pair},
IResult,
IResult, Parser,
};

// Parser definition
Expand All @@ -29,7 +29,8 @@ fn factor(input: &[u8]) -> IResult<&[u8], i64> {
delimited(char('('), expr, char(')')),
)),
space0,
)(input)
)
.parse(input)
}

// We read an initial factor and for each time we find
Expand All @@ -48,7 +49,8 @@ fn term(input: &[u8]) -> IResult<&[u8], i64> {
acc / val
}
},
)(input)
)
.parse_complete(input)
}

fn expr(input: &[u8]) -> IResult<&[u8], i64> {
Expand All @@ -64,7 +66,8 @@ fn expr(input: &[u8]) -> IResult<&[u8], i64> {
acc - val
}
},
)(input)
)
.parse_complete(input)
}

#[allow(clippy::eq_op, clippy::erasing_op)]
Expand Down
48 changes: 17 additions & 31 deletions benchmarks/benches/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;

use criterion::*;
use nom::{IResult, bytes::complete::{tag, take_while1}, character::complete::{line_ending, char}, multi::many};
use nom::{IResult, bytes::{tag, take_while1}, character:: char, multi::many, OutputMode, Parser, PResult, error::Error, Mode, sequence::{preceded, delimited, separated_pair, terminated, pair}, OutputM, Emit, Complete};

#[cfg_attr(rustfmt, rustfmt_skip)]
#[derive(Debug)]
Expand Down Expand Up @@ -67,54 +67,40 @@ fn is_version(c: u8) -> bool {
c >= b'0' && c <= b'9' || c == b'.'
}

fn request_line(input: &[u8]) -> IResult<&[u8], Request<'_>> {
let (input, method) = take_while1(is_token)(input)?;
let (input, _) = take_while1(is_space)(input)?;
let (input, uri) = take_while1(is_not_space)(input)?;
let (input, _) = take_while1(is_space)(input)?;
let (input, version) = http_version(input)?;
let (input, _) = line_ending(input)?;

Ok((input, Request {method, uri, version}))
fn line_ending<'a>()-> impl Parser<&'a[u8], Output=&'a[u8], Error=Error<&'a[u8]>> {
tag("\n").or(tag("\r\n"))
}

fn http_version(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (input, _) = tag("HTTP/")(input)?;
let (input, version) = take_while1(is_version)(input)?;

Ok((input, version))
fn request_line<'a>()-> impl Parser<&'a[u8], Output=Request<'a>, Error=Error<&'a[u8]>> {
(take_while1(is_token), preceded(take_while1(is_space), take_while1(is_not_space)), delimited(take_while1(is_space), http_version(), line_ending()))
.map(|(method, uri, version)| Request {method, uri, version})
}

fn message_header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (input, _) = take_while1(is_horizontal_space)(input)?;
let (input, data) = take_while1(not_line_ending)(input)?;
let (input, _) = line_ending(input)?;
fn http_version<'a>() -> impl Parser<&'a[u8], Output=&'a[u8], Error=Error<&'a[u8]>> {

Ok((input, data))
preceded(tag("HTTP/"), take_while1(is_version))
}

fn message_header(input: &[u8]) -> IResult<&[u8], Header<'_>> {
let (input, name) = take_while1(is_token)(input)?;
let (input, _) = char(':')(input)?;
let (input, value) = many(1.., message_header_value)(input)?;
fn message_header_value<'a>() -> impl Parser<&'a[u8], Output=&'a[u8], Error=Error<&'a[u8]>> {

Ok((input, Header{ name, value }))
delimited(take_while1(is_horizontal_space), take_while1(not_line_ending), line_ending())
}

fn request(input: &[u8]) -> IResult<&[u8], (Request<'_>, Vec<Header<'_>>)> {
let (input, req) = request_line(input)?;
let (input, h) = many(1.., message_header)(input)?;
let (input, _) = line_ending(input)?;
fn message_header<'a>() -> impl Parser<&'a[u8], Output=Header<'a>, Error=Error<&'a[u8]> >{
separated_pair(take_while1(is_token), char(':'), many(1.., message_header_value()))
.map(|(name, value)|Header{ name, value })
}

Ok((input, (req, h)))
fn request<'a>() -> impl Parser<&'a[u8], Output=(Request<'a>, Vec<Header<'a>>), Error=Error<&'a[u8]> > {
pair(request_line(), terminated(many(1.., message_header()), line_ending()))
}


fn parse(data: &[u8]) -> Option<Vec<(Request<'_>, Vec<Header<'_>>)>> {
let mut buf = &data[..];
let mut v = Vec::new();
loop {
match request(buf) {
match request().process::<OutputM<Emit, Emit, Complete>>(buf) {
Ok((b, r)) => {
buf = b;
v.push(r);
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/benches/http_streaming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;

use criterion::*;
use nom::{IResult, bytes::streaming::{tag, take_while1}, character::streaming::{line_ending, char}, multi::many};
use nom::{IResult, bytes::streaming::{tag, take_while1}, character::streaming::{line_ending, char}, multi::many, Parser};

#[cfg_attr(rustfmt, rustfmt_skip)]
#[derive(Debug)]
Expand Down Expand Up @@ -96,15 +96,15 @@ fn message_header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
fn message_header(input: &[u8]) -> IResult<&[u8], Header<'_>> {
let (input, name) = take_while1(is_token)(input)?;
let (input, _) = char(':')(input)?;
let (input, value) = many(1.., message_header_value)(input)?;
let (input, value) = many(1.., message_header_value).parse(input)?;

Ok((input, Header{ name, value }))
}

fn request(input: &[u8]) -> IResult<&[u8], (Request<'_>, Vec<Header<'_>>)> {
let (input, req) = request_line(input)?;
let (input, h) = many(1.., message_header)(input)?;
let (input, _) = line_ending(input)?;
let (input, h) = many(1.., message_header).parse(input)?;
let (input, _) = line_ending(input)?;

Ok((input, (req, h)))
}
Expand Down
19 changes: 11 additions & 8 deletions benchmarks/benches/ini.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use nom::{
combinator::{map_res, opt},
multi::many,
sequence::{delimited, pair, separated_pair, terminated, tuple},
IResult,
IResult, Parser,
};
use std::collections::HashMap;
use std::str;
Expand All @@ -20,14 +20,16 @@ fn category(i: &[u8]) -> IResult<&[u8], &str> {
map_res(
delimited(char('['), take_while(|c| c != b']'), char(']')),
str::from_utf8,
)(i)
)
.parse_complete(i)
}

fn key_value(i: &[u8]) -> IResult<&[u8], (&str, &str)> {
let (i, key) = map_res(alphanumeric, str::from_utf8)(i)?;
let (i, _) = tuple((opt(space), char('='), opt(space)))(i)?;
let (i, val) = map_res(take_while(|c| c != b'\n' && c != b';'), str::from_utf8)(i)?;
let (i, _) = opt(pair(char(';'), take_while(|c| c != b'\n')))(i)?;
let (i, key) = map_res(alphanumeric, str::from_utf8).parse_complete(i)?;
let (i, _) = tuple((opt(space), char('='), opt(space))).parse_complete(i)?;
let (i, val) =
map_res(take_while(|c| c != b'\n' && c != b';'), str::from_utf8).parse_complete(i)?;
let (i, _) = opt(pair(char(';'), take_while(|c| c != b'\n'))).parse_complete(i)?;
Ok((i, (key, val)))
}

Expand All @@ -39,7 +41,8 @@ fn categories(i: &[u8]) -> IResult<&[u8], HashMap<&str, HashMap<&str, &str>>> {
opt(multispace),
many(0.., terminated(key_value, opt(multispace))),
),
)(i)
)
.parse_complete(i)
}

fn bench_ini(c: &mut Criterion) {
Expand Down Expand Up @@ -67,7 +70,7 @@ file=payroll.dat
\0";

fn acc(i: &[u8]) -> IResult<&[u8], Vec<(&str, &str)>> {
many(0.., key_value)(i)
many(0.., key_value).parse_complete(i)
}

let mut group = c.benchmark_group("ini keys and values");
Expand Down
17 changes: 9 additions & 8 deletions benchmarks/benches/ini_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use nom::{
combinator::opt,
multi::many,
sequence::{delimited, pair, terminated, tuple},
IResult,
IResult, Parser,
};

use std::collections::HashMap;
Expand All @@ -26,29 +26,30 @@ fn category(i: &str) -> IResult<&str, &str> {
terminated(
delimited(char('['), take_while(|c| c != ']'), char(']')),
opt(is_a(" \r\n")),
)(i)
)
.parse(i)
}

fn key_value(i: &str) -> IResult<&str, (&str, &str)> {
let (i, key) = alphanumeric(i)?;
let (i, _) = tuple((opt(space), tag("="), opt(space)))(i)?;
let (i, val) = take_till(is_line_ending_or_comment)(i)?;
let (i, _) = opt(space)(i)?;
let (i, _) = opt(pair(tag(";"), not_line_ending))(i)?;
let (i, _) = opt(space_or_line_ending)(i)?;
let (i, _) = opt(space).parse_complete(i)?;
let (i, _) = opt(pair(tag(";"), not_line_ending)).parse_complete(i)?;
let (i, _) = opt(space_or_line_ending).parse_complete(i)?;
Ok((i, (key, val)))
}

fn keys_and_values(input: &str) -> IResult<&str, HashMap<&str, &str>> {
many(0.., key_value)(input)
many(0.., key_value).parse_complete(input)
}

fn category_and_keys(i: &str) -> IResult<&str, (&str, HashMap<&str, &str>)> {
pair(category, keys_and_values)(i)
pair(category, keys_and_values).parse_complete(i)
}

fn categories(input: &str) -> IResult<&str, HashMap<&str, HashMap<&str, &str>>> {
many(0.., category_and_keys)(input)
many(0.., category_and_keys).parse_complete(input)
}

fn bench_ini_str(c: &mut Criterion) {
Expand Down
Loading

0 comments on commit 90d78d6

Please sign in to comment.