Skip to content

Commit

Permalink
fuzz: use structured fuzzer input
Browse files Browse the repository at this point in the history
This makes a couple of the fuzzer targets a bit nicer by just asking for
structured data instead of trying to manifest it ourselves out of a
&[u8].

Closes #821
  • Loading branch information
5225225 authored and BurntSushi committed May 18, 2023
1 parent 34de1ac commit 7638671
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 35 deletions.
2 changes: 1 addition & 1 deletion fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ edition = "2021"
cargo-fuzz = true

[dependencies]
libfuzzer-sys = "0.4.1"
libfuzzer-sys = { version = "0.4.1", features = ["arbitrary-derive"] }
regex = { path = ".." }
regex-automata = { path = "../regex-automata" }
regex-lite = { path = "../regex-lite" }
Expand Down
71 changes: 54 additions & 17 deletions fuzz/fuzz_targets/fuzz_regex_lite_match.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,60 @@
#![no_main]

use libfuzzer_sys::fuzz_target;
use libfuzzer_sys::{arbitrary, fuzz_target};

fuzz_target!(|data: &[u8]| {
let _ = run(data);
});
#[derive(arbitrary::Arbitrary)]
struct FuzzCase<'a> {
pattern: &'a str,
haystack: &'a str,
case_insensitive: bool,
multi_line: bool,
crlf: bool,
dot_matches_new_line: bool,
swap_greed: bool,
ignore_whitespace: bool,
}

impl std::fmt::Debug for FuzzCase<'_> {
fn fmt(
&self,
fmt: &mut std::fmt::Formatter,
) -> Result<(), std::fmt::Error> {
let FuzzCase {
pattern,
case_insensitive,
multi_line,
crlf,
dot_matches_new_line,
swap_greed,
ignore_whitespace,
haystack,
} = self;

fn run(data: &[u8]) -> Option<()> {
if data.len() < 2 {
return None;
write!(
fmt,
r#"
let Ok(re) = regex_lite::RegexBuilder::new({pattern:?})
.case_insensitive({case_insensitive:?})
.multi_line({multi_line:?})
.crlf({crlf:?})
.dot_matches_new_line({dot_matches_new_line:?})
.swap_greed({swap_greed:?})
.ignore_whitespace({ignore_whitespace:?})
.build() else {{ return }};
re.is_match({haystack:?});
"#
)
}
let mut split_at = usize::from(data[0]);
let data = std::str::from_utf8(&data[1..]).ok()?;
// Split data into a regex and haystack to search.
let len = usize::try_from(data.chars().count()).ok()?;
split_at = std::cmp::max(split_at, 1) % len;
let char_index = data.char_indices().nth(split_at)?.0;
let (pattern, input) = data.split_at(char_index);
let re = regex_lite::Regex::new(pattern).ok()?;
re.is_match(input);
Some(())
}

fuzz_target!(|case: FuzzCase| {
let Ok(re) = regex_lite::RegexBuilder::new(case.pattern)
.case_insensitive(case.case_insensitive)
.multi_line(case.multi_line)
.crlf(case.crlf)
.dot_matches_new_line(case.dot_matches_new_line)
.swap_greed(case.swap_greed)
.ignore_whitespace(case.ignore_whitespace)
.build() else { return };
re.is_match(case.haystack);
});
75 changes: 58 additions & 17 deletions fuzz/fuzz_targets/fuzz_regex_match.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,64 @@
#![no_main]

use libfuzzer_sys::fuzz_target;
use libfuzzer_sys::{arbitrary, fuzz_target};

fuzz_target!(|data: &[u8]| {
let _ = run(data);
});
#[derive(arbitrary::Arbitrary)]
struct FuzzCase<'a> {
pattern: &'a str,
haystack: &'a str,
case_insensitive: bool,
multi_line: bool,
dot_matches_new_line: bool,
swap_greed: bool,
ignore_whitespace: bool,
unicode: bool,
octal: bool,
}

impl std::fmt::Debug for FuzzCase<'_> {
fn fmt(
&self,
fmt: &mut std::fmt::Formatter,
) -> Result<(), std::fmt::Error> {
let FuzzCase {
pattern,
case_insensitive,
multi_line,
dot_matches_new_line,
swap_greed,
ignore_whitespace,
unicode,
octal,
haystack,
} = self;

fn run(data: &[u8]) -> Option<()> {
if data.len() < 2 {
return None;
write!(
fmt,
r#"
let Ok(re) = regex::RegexBuilder::new({pattern:?})
.case_insensitive({case_insensitive:?})
.multi_line({multi_line:?})
.dot_matches_new_line({dot_matches_new_line:?})
.swap_greed({swap_greed:?})
.ignore_whitespace({ignore_whitespace:?})
.unicode({unicode:?})
.octal({octal:?})
.build() else {{ return }};
re.is_match({haystack:?});
"#
)
}
let mut split_at = usize::from(data[0]);
let data = std::str::from_utf8(&data[1..]).ok()?;
// Split data into a regex and haystack to search.
let len = usize::try_from(data.chars().count()).ok()?;
split_at = std::cmp::max(split_at, 1) % len;
let char_index = data.char_indices().nth(split_at)?.0;
let (pattern, input) = data.split_at(char_index);
let re = regex::Regex::new(pattern).ok()?;
re.is_match(input);
Some(())
}

fuzz_target!(|case: FuzzCase| {
let Ok(re) = regex::RegexBuilder::new(case.pattern)
.case_insensitive(case.case_insensitive)
.multi_line(case.multi_line)
.dot_matches_new_line(case.dot_matches_new_line)
.swap_greed(case.swap_greed)
.ignore_whitespace(case.ignore_whitespace)
.unicode(case.unicode)
.octal(case.octal)
.build() else { return };
re.is_match(case.haystack);
});

0 comments on commit 7638671

Please sign in to comment.