Merge branch 'main' into trailing-comma

rust-bakery · May 5, 2024 · 7b47f77 · 7b47f77
2 parents a6b9fcf + 996efb8
commit 7b47f77
Show file tree

Hide file tree

Showing 39 changed files with 511 additions and 382 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -190,9 +190,10 @@ jobs:
         uses: actions-rs/cargo@v1
         with:
           command: tarpaulin
-          args: --output-dir coverage --out Lcov
+          args: --output-dir coverage --out xml --workspace --exclude benchmarks
 
-      - name: Publish to Coveralls
-        uses: coverallsapp/github-action@master
+      - name: Upload coverage reports to Codecov
+        uses: codecov/codecov-action@v4.0.1
         with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
+          token: ${{ secrets.CODECOV_TOKEN }}
+          slug: rust-bakery/nom
diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml
@@ -0,0 +1,26 @@
+name: CIFuzz
+on: [pull_request]
+jobs:
+  Fuzzing:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Build Fuzzers
+      id: build
+      uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'nom'
+        dry-run: false
+        language: rust
+    - name: Run Fuzzers
+      uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'nom'
+        fuzz-seconds: 300
+        dry-run: false
+        language: rust
+    - name: Upload Crash
+      uses: actions/upload-artifact@v3
+      if: failure() && steps.build.outcome == 'success'
+      with:
+        name: artifacts
+        path: ./out/artifacts
diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
@@ -0,0 +1,32 @@
+name: codspeed-benchmarks
+
+on:
+  push:
+    branches:
+      - "main"
+  pull_request:
+  # `workflow_dispatch` allows CodSpeed to trigger backtest
+  # performance analysis in order to generate initial data.
+  workflow_dispatch:
+
+jobs:
+  benchmarks:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup rust toolchain, cache and cargo-codspeed binary
+        uses: moonrepo/setup-rust@v0
+        with:
+          channel: stable
+          cache-target: release
+          bins: cargo-codspeed
+
+      - name: Build the benchmark target(s)
+        run: cargo codspeed build -p benchmarks
+
+      - name: Run the benchmarks
+        uses: CodSpeedHQ/action@v2
+        with:
+          run: cargo codspeed run -p benchmarks
+          token: ${{ secrets.CODSPEED_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,10 @@
 
 ### Thanks
 
+### Removed
+
+- `nom::bits::*` is no longer re-exported at the crate root. This export caused frequent confusion, since e.g. `nom::complete::tag` referred to `nom::bits::complete::tag` instead of the much more commonly used `nom::bytes::complete::tag`. To migrate, change any imports of `nom::{complete::*, streaming::*, bits, bytes}` to `nom::bits::[...]`.
+
 ### Changed
 
 ## 7.1.2 - 2023-01-01

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 
 name = "nom"
-version = "7.1.2"
+version = "8.0.0-alpha1"
 authors = ["contact@geoffroycouprie.com"]
 description = "A byte-oriented, zero-copy, parser combinators library"
 license = "MIT"
@@ -30,14 +30,10 @@ include = [
 
 [features]
 alloc = []
-std = ["alloc", "memchr/std", "minimal-lexical/std"]
+std = ["alloc", "memchr/std"]
 default = ["std"]
 docsrs = []
 
-[dependencies.minimal-lexical]
-version = "0.2.0"
-default-features = false
-
 [dependencies.memchr]
 version = "2.3"
 default-features = false

diff --git a/README.md b/README.md
@@ -158,7 +158,6 @@ parsing, and construct an AST in place.
 Example projects:
 
 - [PHP VM](https://github.com/tagua-vm/parser)
-- eve language prototype
 - [xshade shading language](https://github.com/xshade-lang/xshade)
 
 ### Streaming formats
@@ -266,7 +265,7 @@ Here is a (non exhaustive) list of known projects using nom:
 [proto files](https://github.com/tafia/protobuf-parser),
 [Fountain screenplay markup](https://github.com/adamchalmers/fountain-rs),
 [vimwiki](https://github.com/chipsenkbeil/vimwiki-rs/tree/master/vimwiki) & [vimwiki_macros](https://github.com/chipsenkbeil/vimwiki-rs/tree/master/vimwiki_macros),
-[Kconfig language](https://github.com/Mcdostone/nom-kconfig)
+[Kconfig language](https://github.com/Mcdostone/nom-kconfig), [Askama templates](https://crates.io/crates/askama_parser/)
 - Programming languages:
 [PHP](https://github.com/tagua-vm/parser),
 [Basic Calculator](https://github.com/balajisivaraman/basic_calculator_rs),
@@ -289,7 +288,8 @@ Here is a (non exhaustive) list of known projects using nom:
 [MIDI](https://github.com/derekdreery/nom-midi-rs),
 [SWF](https://github.com/open-flash/swf-parser),
 [WAVE](https://github.com/Noise-Labs/wave),
-[Matroska (MKV)](https://github.com/rust-av/matroska)
+[Matroska (MKV)](https://github.com/rust-av/matroska),
+[Exif/Metadata parser for JPEG/HEIF/HEIC/MOV/MP4](https://github.com/mindeng/nom-exif)
 - Document formats:
 [TAR](https://github.com/Keruspe/tar-parser.rs),
 [GZ](https://github.com/nharward/nom-gzip),
@@ -328,7 +328,8 @@ Here is a (non exhaustive) list of known projects using nom:
 [MySQL binary log](https://github.com/PrivateRookie/boxercrab),
 [URI](https://github.com/Skasselbard/nom-uri),
 [Furigana](https://github.com/sachaarbonel/furigana.rs),
-[Wordle Result](https://github.com/Fyko/wordle-stats/tree/main/parser)
+[Wordle Result](https://github.com/Fyko/wordle-stats/tree/main/parser),
+[NBT](https://github.com/phoenixr-codes/mcnbt)
 
 Want to create a new parser using `nom`? A list of not yet implemented formats is available [here](https://github.com/rust-bakery/nom/issues/14).
 

diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
@@ -6,8 +6,8 @@ edition = "2018"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-criterion = "0.3.5"
-jemallocator = "0.3.2"
+criterion = "0.5.0"
+jemallocator = "0.5.4"
 nom = { path = "../" }
 
 [lib]
@@ -52,4 +52,7 @@ harness = false
 [[bench]]
 name = "json_streaming"
 path = "benches/json_streaming.rs"
-harness = false
+harness = false
+
+[dev-dependencies]
+codspeed-criterion-compat = "2.4.1"
diff --git a/benchmarks/benches/arithmetic.rs b/benchmarks/benches/arithmetic.rs
@@ -1,10 +1,7 @@
-#[macro_use]
-extern crate criterion;
-
 #[global_allocator]
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
-use criterion::Criterion;
+use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion};
 use nom::{
   branch::alt,
   character::complete::{char, digit1, one_of, space0},

diff --git a/benchmarks/benches/http.rs b/benchmarks/benches/http.rs
@@ -3,7 +3,7 @@
 #[global_allocator]
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
-use criterion::*;
+use codspeed_criterion_compat::*;
 use nom::{IResult, bytes::{tag, take_while1}, character:: char, multi::many, OutputMode, Parser, PResult, error::Error, Mode, sequence::{preceded, delimited, separated_pair, terminated, pair}, OutputM, Emit, Complete};
 
 #[cfg_attr(rustfmt, rustfmt_skip)]

diff --git a/benchmarks/benches/http_streaming.rs b/benchmarks/benches/http_streaming.rs
@@ -3,7 +3,7 @@
 #[global_allocator]
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
-use criterion::*;
+use codspeed_criterion_compat::*;
 use nom::{IResult, bytes::streaming::{tag, take_while1}, character::streaming::{line_ending, char}, multi::many, Parser};
 
 #[cfg_attr(rustfmt, rustfmt_skip)]

diff --git a/benchmarks/benches/ini.rs b/benchmarks/benches/ini.rs
@@ -1,7 +1,7 @@
 #[global_allocator]
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
-use criterion::*;
+use codspeed_criterion_compat::*;
 
 use nom::{
   bytes::complete::take_while,

diff --git a/benchmarks/benches/ini_str.rs b/benchmarks/benches/ini_str.rs
@@ -1,7 +1,7 @@
 #[global_allocator]
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
-use criterion::*;
+use codspeed_criterion_compat::*;
 
 use nom::{
   bytes::complete::{is_a, tag, take_till, take_while},

diff --git a/benchmarks/benches/json.rs b/benchmarks/benches/json.rs
@@ -1,10 +1,7 @@
-#[macro_use]
-extern crate criterion;
-
 #[global_allocator]
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
-use criterion::Criterion;
+use codspeed_criterion_compat::*;
 use nom::{
   branch::alt,
   bytes::{tag, take},
@@ -15,7 +12,7 @@ use nom::{
   number::double,
   number::recognize_float,
   sequence::{delimited, preceded, separated_pair},
-  Complete, Emit, IResult, Mode, OutputM, Parser,
+  Check, Complete, Emit, IResult, Mode, OutputM, Parser,
 };
 
 use std::{collections::HashMap, marker::PhantomData, num::ParseIntError};
@@ -274,6 +271,28 @@ fn json_bench(c: &mut Criterion) {
   });
 }
 
+fn json_bench_error_check(c: &mut Criterion) {
+  let data = "  { \"a\"\t: 42,
+  \"b\": [ \"x\", \"y\", 12 ,\"\\u2014\", \"\\uD83D\\uDE10\"] ,
+  \"c\": { \"hello\" : \"world\"
+  }
+  }  ";
+
+  // test once to make sure it parses correctly
+  json::<Error<&str>>()
+    .process::<OutputM<Emit, Check, Complete>>(data)
+    .unwrap();
+
+  // println!("data:\n{:?}", json(data));
+  c.bench_function("json", |b| {
+    b.iter(|| {
+      json::<Error<&str>>()
+        .process::<OutputM<Emit, Check, Complete>>(data)
+        .unwrap()
+    });
+  });
+}
+
 static CANADA: &str = include_str!("../canada.json");
 fn canada_json(c: &mut Criterion) {
   // test once to make sure it parses correctly
@@ -304,7 +323,7 @@ fn verbose_json(c: &mut Criterion) {
     .unwrap();
 
   // println!("data:\n{:?}", json(data));
-  c.bench_function("json vebose", |b| {
+  c.bench_function("json verbose", |b| {
     b.iter(|| {
       json::<VerboseError<&str>>()
         .process::<OutputM<Emit, Emit, Complete>>(data)
@@ -402,6 +421,7 @@ fn std_float_bytes(c: &mut Criterion) {
 criterion_group!(
   benches,
   json_bench,
+  json_bench_error_check,
   verbose_json,
   canada_json,
   verbose_canada_json,

diff --git a/benchmarks/benches/json_streaming.rs b/benchmarks/benches/json_streaming.rs
@@ -1,10 +1,7 @@
-#[macro_use]
-extern crate criterion;
-
 #[global_allocator]
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
-use criterion::Criterion;
+use codspeed_criterion_compat::*;
 use nom::{
   branch::alt,
   bytes::streaming::{tag, take},

diff --git a/benchmarks/benches/number.rs b/benchmarks/benches/number.rs
@@ -1,10 +1,7 @@
-#[macro_use]
-extern crate criterion;
-
 #[global_allocator]
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
-use criterion::Criterion;
+use codspeed_criterion_compat::*;
 use nom::number::complete;
 
 fn parser(i: &[u8]) -> nom::IResult<&[u8], u64> {

diff --git a/doc/choosing_a_combinator.md b/doc/choosing_a_combinator.md
@@ -38,7 +38,7 @@ Those are used to recognize the lowest level elements of your grammar, like, "he
 | [terminated](https://docs.rs/nom/latest/nom/sequence/fn.terminated.html) | `terminated(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", "ab"))` ||
 | [pair](https://docs.rs/nom/latest/nom/sequence/fn.pair.html) | `pair(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", ("ab", "XY")))` ||
 | [separated_pair](https://docs.rs/nom/latest/nom/sequence/fn.separated_pair.html) | `separated_pair(tag("hello"), char(','), tag("world"))` | `"hello,world!"` | `Ok(("!", ("hello", "world")))` ||
-| [tuple](https://docs.rs/nom/latest/nom/sequence/fn.tuple.html) | `((tag("ab"), tag("XY"), take(1)))` | `"abXYZ!"` | `Ok(("!", ("ab", "XY", "Z")))` | Chains parsers and assemble the sub results in a tuple. You can use as many child parsers as you can put elements in a tuple|
+| [tuple](https://docs.rs/nom/latest/nom/sequence/fn.tuple.html) | `tuple((tag("ab"), tag("XY"), take(1)))` | `"abXYZ!"` | `Ok(("!", ("ab", "XY", "Z")))` | Chains parsers and assemble the sub results in a tuple. You can use as many child parsers as you can put elements in a tuple|
 
 ## Applying a parser multiple times
 

diff --git a/examples/json.rs b/examples/json.rs
@@ -97,7 +97,8 @@ fn string<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
   context(
     "string",
     preceded(char('\"'), cut(terminated(parse_str, char('\"')))),
-  )(i)
+  )
+  .parse(i)
 }
 
 /// some combinators, like `separated_list0` or `many0`, will call a parser repeatedly,
@@ -116,7 +117,8 @@ fn array<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
         preceded(sp, char(']')),
       )),
     ),
-  )(i)
+  )
+  .parse(i)
 }
 
 fn key_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
@@ -150,7 +152,8 @@ fn hash<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
         preceded(sp, char('}')),
       )),
     ),
-  )(i)
+  )
+  .parse(i)
 }
 
 /// here, we apply the space parser before trying to parse a value

diff --git a/examples/json_iterator.rs b/examples/json_iterator.rs
@@ -217,7 +217,8 @@ fn string(i: &str) -> IResult<&str, &str> {
   context(
     "string",
     preceded(char('\"'), cut(terminated(parse_str, char('\"')))),
-  )(i)
+  )
+  .parse(i)
 }
 
 fn boolean(input: &str) -> IResult<&str, bool> {
@@ -234,7 +235,8 @@ fn array(i: &str) -> IResult<&str, ()> {
         preceded(sp, char(']')),
       )),
     ),
-  )(i)
+  )
+  .parse(i)
 }
 
 fn key_value(i: &str) -> IResult<&str, (&str, ())> {
@@ -251,7 +253,8 @@ fn hash(i: &str) -> IResult<&str, ()> {
         preceded(sp, char('}')),
       )),
     ),
-  )(i)
+  )
+  .parse(i)
 }
 
 fn value(i: &str) -> IResult<&str, ()> {