diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2122cc5502..48fffb52dd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: include: - build: pinned os: ubuntu-18.04 - rust: 1.28.0 + rust: 1.41.1 - build: stable os: ubuntu-18.04 rust: stable diff --git a/Cargo.toml b/Cargo.toml index 1d89911630..8f2ffb864b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ finite automata and guarantees linear time matching on all inputs. categories = ["text-processing"] autotests = false exclude = ["/scripts/*", "/.github/*"] +edition = "2018" [workspace] members = [ @@ -105,12 +106,12 @@ pattern = [] # For very fast prefix literal matching. [dependencies.aho-corasick] -version = "0.7.6" +version = "0.7.18" optional = true # For skipping along search text quickly when a leading byte is known. [dependencies.memchr] -version = "2.2.1" +version = "2.4.0" optional = true # For parsing regular expressions. diff --git a/PERFORMANCE.md b/PERFORMANCE.md index b4aeb89c1b..7904e1f6bc 100644 --- a/PERFORMANCE.md +++ b/PERFORMANCE.md @@ -62,9 +62,7 @@ on how your program is structured. Thankfully, the [`lazy_static`](https://crates.io/crates/lazy_static) crate provides an answer that works well: - #[macro_use] extern crate lazy_static; - extern crate regex; - + use lazy_static::lazy_static; use regex::Regex; fn some_helper_function(text: &str) -> bool { diff --git a/README.md b/README.md index f7a2554555..3554e3e7e7 100644 --- a/README.md +++ b/README.md @@ -27,13 +27,7 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -regex = "1" -``` - -and this to your crate root (if you're using Rust 2015): - -```rust -extern crate regex; +regex = "1.5" ``` Here's a simple example that matches a date in YYYY-MM-DD format and prints the @@ -228,7 +222,7 @@ The full set of features one can disable are ### Minimum Rust version policy -This crate's minimum supported `rustc` version is `1.28.0`. +This crate's minimum supported `rustc` version is `1.41.1`. The current **tentative** policy is that the minimum Rust version required to use this crate can be increased in minor version updates. For example, if diff --git a/bench/Cargo.toml b/bench/Cargo.toml index 0c08a74c5c..9e61fd0469 100644 --- a/bench/Cargo.toml +++ b/bench/Cargo.toml @@ -10,6 +10,7 @@ homepage = "https://github.com/rust-lang/regex" description = "Regex benchmarks for Rust's and other engines." build = "build.rs" workspace = ".." +edition = "2018" [dependencies] docopt = "1" @@ -20,8 +21,7 @@ libpcre-sys = { version = "0.2", optional = true } memmap = "0.6.2" regex = { version = "1", path = ".." } regex-syntax = { version = "0.6", path = "../regex-syntax" } -serde = "1" -serde_derive = "1" +serde = { version = "1", features = ["derive"] } cfg-if = "0.1" [build-dependencies] diff --git a/bench/build.rs b/bench/build.rs index 5d44849745..d16cc644d2 100644 --- a/bench/build.rs +++ b/bench/build.rs @@ -1,6 +1,3 @@ -extern crate cc; -extern crate pkg_config; - use std::env; use std::process; diff --git a/bench/log/08-new-memmem/rust-after-01 b/bench/log/08-new-memmem/rust-after-01 new file mode 100644 index 0000000000..521e935f43 --- /dev/null +++ b/bench/log/08-new-memmem/rust-after-01 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 1) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 2) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 1) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 19 ns/iter (+/- 0) = 1368 MB/s +test misc::easy0_1K ... bench: 15 ns/iter (+/- 2) = 70066 MB/s +test misc::easy0_1MB ... bench: 22 ns/iter (+/- 0) = 47663772 MB/s +test misc::easy0_32 ... bench: 14 ns/iter (+/- 2) = 4214 MB/s +test misc::easy0_32K ... bench: 15 ns/iter (+/- 1) = 2186333 MB/s +test misc::easy1_1K ... bench: 40 ns/iter (+/- 2) = 26100 MB/s +test misc::easy1_1MB ... bench: 44 ns/iter (+/- 5) = 23831727 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 5) = 1333 MB/s +test misc::easy1_32K ... bench: 41 ns/iter (+/- 3) = 799707 MB/s +test misc::hard_1K ... bench: 50 ns/iter (+/- 7) = 21020 MB/s +test misc::hard_1MB ... bench: 55 ns/iter (+/- 6) = 19065509 MB/s +test misc::hard_32 ... bench: 50 ns/iter (+/- 7) = 1180 MB/s +test misc::hard_32K ... bench: 50 ns/iter (+/- 2) = 655900 MB/s +test misc::is_match_set ... bench: 60 ns/iter (+/- 2) = 416 MB/s +test misc::literal ... bench: 12 ns/iter (+/- 1) = 4250 MB/s +test misc::long_needle1 ... bench: 3,252 ns/iter (+/- 168) = 30750 MB/s +test misc::long_needle2 ... bench: 355,576 ns/iter (+/- 34,074) = 281 MB/s +test misc::match_class ... bench: 67 ns/iter (+/- 2) = 1208 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::match_class_unicode ... bench: 256 ns/iter (+/- 36) = 628 MB/s +test misc::matches_set ... bench: 458 ns/iter (+/- 65) = 54 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 1) = 70133 MB/s +test misc::medium_1MB ... bench: 23 ns/iter (+/- 2) = 45591478 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 0) = 4000 MB/s +test misc::medium_32K ... bench: 15 ns/iter (+/- 0) = 2186400 MB/s +test misc::no_exponential ... bench: 406 ns/iter (+/- 32) = 246 MB/s +test misc::not_literal ... bench: 90 ns/iter (+/- 12) = 566 MB/s +test misc::one_pass_long_prefix ... bench: 53 ns/iter (+/- 1) = 490 MB/s +test misc::one_pass_long_prefix_not ... bench: 51 ns/iter (+/- 7) = 509 MB/s +test misc::one_pass_short ... bench: 37 ns/iter (+/- 1) = 459 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 5) = 435 MB/s +test misc::reallyhard2_1K ... bench: 75 ns/iter (+/- 2) = 13866 MB/s +test misc::reallyhard_1K ... bench: 1,591 ns/iter (+/- 227) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,576,602 ns/iter (+/- 204,573) = 665 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 7) = 578 MB/s +test misc::reallyhard_32K ... bench: 49,327 ns/iter (+/- 4,812) = 664 MB/s +test misc::replace_all ... bench: 132 ns/iter (+/- 13) +test misc::reverse_suffix_no_quadratic ... bench: 4,190 ns/iter (+/- 581) = 1909 MB/s +test misc::short_haystack_1000000x ... bench: 132,982 ns/iter (+/- 18,045) = 60158 MB/s +test misc::short_haystack_100000x ... bench: 14,720 ns/iter (+/- 946) = 54348 MB/s +test misc::short_haystack_10000x ... bench: 5,993 ns/iter (+/- 381) = 13350 MB/s +test misc::short_haystack_1000x ... bench: 476 ns/iter (+/- 58) = 16829 MB/s +test misc::short_haystack_100x ... bench: 227 ns/iter (+/- 22) = 3572 MB/s +test misc::short_haystack_10x ... bench: 211 ns/iter (+/- 13) = 431 MB/s +test misc::short_haystack_1x ... bench: 204 ns/iter (+/- 29) = 93 MB/s +test misc::short_haystack_2x ... bench: 206 ns/iter (+/- 7) = 131 MB/s +test misc::short_haystack_3x ... bench: 212 ns/iter (+/- 16) = 165 MB/s +test misc::short_haystack_4x ... bench: 207 ns/iter (+/- 29) = 207 MB/s +test regexdna::find_new_lines ... bench: 12,053,740 ns/iter (+/- 393,644) = 421 MB/s +test regexdna::subst1 ... bench: 786,112 ns/iter (+/- 91,136) = 6466 MB/s +test regexdna::subst10 ... bench: 831,353 ns/iter (+/- 67,293) = 6114 MB/s +test regexdna::subst11 ... bench: 784,021 ns/iter (+/- 28,112) = 6483 MB/s +test regexdna::subst2 ... bench: 785,838 ns/iter (+/- 108,510) = 6468 MB/s +test regexdna::subst3 ... bench: 791,789 ns/iter (+/- 37,364) = 6420 MB/s +test regexdna::subst4 ... bench: 784,224 ns/iter (+/- 23,802) = 6482 MB/s +test regexdna::subst5 ... bench: 788,368 ns/iter (+/- 75,171) = 6448 MB/s +test regexdna::subst6 ... bench: 784,730 ns/iter (+/- 48,594) = 6477 MB/s +test regexdna::subst7 ... bench: 788,067 ns/iter (+/- 88,333) = 6450 MB/s +test regexdna::subst8 ... bench: 810,784 ns/iter (+/- 111,836) = 6269 MB/s +test regexdna::subst9 ... bench: 788,854 ns/iter (+/- 66,496) = 6444 MB/s +test regexdna::variant1 ... bench: 2,238,677 ns/iter (+/- 144,752) = 2270 MB/s +test regexdna::variant2 ... bench: 3,258,761 ns/iter (+/- 205,012) = 1559 MB/s +test regexdna::variant3 ... bench: 3,818,146 ns/iter (+/- 254,877) = 1331 MB/s +test regexdna::variant4 ... bench: 3,837,323 ns/iter (+/- 349,373) = 1324 MB/s +test regexdna::variant5 ... bench: 2,698,901 ns/iter (+/- 111,145) = 1883 MB/s +test regexdna::variant6 ... bench: 2,687,854 ns/iter (+/- 184,039) = 1891 MB/s +test regexdna::variant7 ... bench: 3,291,211 ns/iter (+/- 220,992) = 1544 MB/s +test regexdna::variant8 ... bench: 3,359,262 ns/iter (+/- 185,610) = 1513 MB/s +test regexdna::variant9 ... bench: 3,293,953 ns/iter (+/- 245,454) = 1543 MB/s +test rust_compile::compile_huge ... bench: 95,142 ns/iter (+/- 10,195) +test rust_compile::compile_huge_bytes ... bench: 5,650,680 ns/iter (+/- 252,936) +test rust_compile::compile_huge_full ... bench: 10,867,986 ns/iter (+/- 275,259) +test rust_compile::compile_simple ... bench: 3,751 ns/iter (+/- 310) +test rust_compile::compile_simple_bytes ... bench: 3,664 ns/iter (+/- 172) +test rust_compile::compile_simple_full ... bench: 22,078 ns/iter (+/- 3,259) +test rust_compile::compile_small ... bench: 8,499 ns/iter (+/- 942) +test rust_compile::compile_small_bytes ... bench: 151,196 ns/iter (+/- 16,322) +test rust_compile::compile_small_full ... bench: 309,597 ns/iter (+/- 32,622) +test sherlock::before_after_holmes ... bench: 917,591 ns/iter (+/- 55,643) = 648 MB/s +test sherlock::before_holmes ... bench: 62,726 ns/iter (+/- 8,861) = 9484 MB/s +test sherlock::everything_greedy ... bench: 2,036,050 ns/iter (+/- 152,461) = 292 MB/s +test sherlock::everything_greedy_nl ... bench: 796,690 ns/iter (+/- 71,089) = 746 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,258 ns/iter (+/- 8,294) = 5598 MB/s +test sherlock::holmes_coword_watson ... bench: 481,086 ns/iter (+/- 60,212) = 1236 MB/s +test sherlock::ing_suffix ... bench: 322,033 ns/iter (+/- 8,912) = 1847 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,067,523 ns/iter (+/- 89,630) = 557 MB/s +test sherlock::letters ... bench: 22,745,932 ns/iter (+/- 428,787) = 26 MB/s +test sherlock::letters_lower ... bench: 22,228,365 ns/iter (+/- 495,287) = 26 MB/s +test sherlock::letters_upper ... bench: 1,775,941 ns/iter (+/- 158,985) = 334 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,327 ns/iter (+/- 49,085) = 663 MB/s +test sherlock::name_alt1 ... bench: 32,008 ns/iter (+/- 4,011) = 18587 MB/s +test sherlock::name_alt2 ... bench: 86,850 ns/iter (+/- 5,463) = 6850 MB/s +test sherlock::name_alt3 ... bench: 98,359 ns/iter (+/- 14,052) = 6048 MB/s +test sherlock::name_alt3_nocase ... bench: 381,147 ns/iter (+/- 16,996) = 1560 MB/s +test sherlock::name_alt4 ... bench: 121,025 ns/iter (+/- 16,654) = 4915 MB/s +test sherlock::name_alt4_nocase ... bench: 188,972 ns/iter (+/- 26,145) = 3148 MB/s +test sherlock::name_alt5 ... bench: 91,832 ns/iter (+/- 6,188) = 6478 MB/s +test sherlock::name_alt5_nocase ... bench: 351,422 ns/iter (+/- 49,084) = 1692 MB/s +test sherlock::name_holmes ... bench: 33,405 ns/iter (+/- 3,113) = 17809 MB/s +test sherlock::name_holmes_nocase ... bench: 134,899 ns/iter (+/- 10,883) = 4410 MB/s +test sherlock::name_sherlock ... bench: 22,455 ns/iter (+/- 2,027) = 26494 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,283 ns/iter (+/- 2,281) = 26698 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,492 ns/iter (+/- 6,496) = 6102 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,627 ns/iter (+/- 8,442) = 6221 MB/s +test sherlock::name_whitespace ... bench: 30,702 ns/iter (+/- 4,194) = 19377 MB/s +test sherlock::no_match_common ... bench: 19,616 ns/iter (+/- 2,677) = 30328 MB/s +test sherlock::no_match_really_common ... bench: 25,601 ns/iter (+/- 2,506) = 23238 MB/s +test sherlock::no_match_uncommon ... bench: 19,641 ns/iter (+/- 2,175) = 30290 MB/s +test sherlock::quotes ... bench: 369,048 ns/iter (+/- 25,898) = 1612 MB/s +test sherlock::repeated_class_negation ... bench: 75,780,396 ns/iter (+/- 1,032,817) = 7 MB/s +test sherlock::the_lower ... bench: 327,762 ns/iter (+/- 48,769) = 1815 MB/s +test sherlock::the_nocase ... bench: 532,075 ns/iter (+/- 40,117) = 1118 MB/s +test sherlock::the_upper ... bench: 45,197 ns/iter (+/- 1,621) = 13163 MB/s +test sherlock::the_whitespace ... bench: 819,239 ns/iter (+/- 81,388) = 726 MB/s +test sherlock::word_ending_n ... bench: 1,716,625 ns/iter (+/- 120,247) = 346 MB/s +test sherlock::words ... bench: 8,690,764 ns/iter (+/- 322,915) = 68 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 114.31s + diff --git a/bench/log/08-new-memmem/rust-after-02 b/bench/log/08-new-memmem/rust-after-02 new file mode 100644 index 0000000000..60d057836c --- /dev/null +++ b/bench/log/08-new-memmem/rust-after-02 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 2) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 1) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 1) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 19 ns/iter (+/- 2) = 1368 MB/s +test misc::easy0_1K ... bench: 15 ns/iter (+/- 1) = 70066 MB/s +test misc::easy0_1MB ... bench: 22 ns/iter (+/- 1) = 47663772 MB/s +test misc::easy0_32 ... bench: 14 ns/iter (+/- 1) = 4214 MB/s +test misc::easy0_32K ... bench: 15 ns/iter (+/- 1) = 2186333 MB/s +test misc::easy1_1K ... bench: 39 ns/iter (+/- 4) = 26769 MB/s +test misc::easy1_1MB ... bench: 43 ns/iter (+/- 3) = 24385953 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 4) = 1333 MB/s +test misc::easy1_32K ... bench: 39 ns/iter (+/- 3) = 840717 MB/s +test misc::hard_1K ... bench: 50 ns/iter (+/- 5) = 21020 MB/s +test misc::hard_1MB ... bench: 55 ns/iter (+/- 7) = 19065509 MB/s +test misc::hard_32 ... bench: 50 ns/iter (+/- 5) = 1180 MB/s +test misc::hard_32K ... bench: 50 ns/iter (+/- 6) = 655900 MB/s +test misc::is_match_set ... bench: 60 ns/iter (+/- 4) = 416 MB/s +test misc::literal ... bench: 12 ns/iter (+/- 0) = 4250 MB/s +test misc::long_needle1 ... bench: 3,251 ns/iter (+/- 333) = 30760 MB/s +test misc::long_needle2 ... bench: 355,576 ns/iter (+/- 24,612) = 281 MB/s +test misc::match_class ... bench: 66 ns/iter (+/- 1) = 1227 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 1) = 5785 MB/s +test misc::match_class_unicode ... bench: 254 ns/iter (+/- 25) = 633 MB/s +test misc::matches_set ... bench: 456 ns/iter (+/- 17) = 54 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 0) = 70133 MB/s +test misc::medium_1MB ... bench: 23 ns/iter (+/- 2) = 45591478 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 2) = 4000 MB/s +test misc::medium_32K ... bench: 15 ns/iter (+/- 2) = 2186400 MB/s +test misc::no_exponential ... bench: 403 ns/iter (+/- 55) = 248 MB/s +test misc::not_literal ... bench: 90 ns/iter (+/- 12) = 566 MB/s +test misc::one_pass_long_prefix ... bench: 51 ns/iter (+/- 7) = 509 MB/s +test misc::one_pass_long_prefix_not ... bench: 51 ns/iter (+/- 5) = 509 MB/s +test misc::one_pass_short ... bench: 38 ns/iter (+/- 5) = 447 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 1) = 435 MB/s +test misc::reallyhard2_1K ... bench: 75 ns/iter (+/- 2) = 13866 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 148) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,576,299 ns/iter (+/- 142,145) = 665 MB/s +test misc::reallyhard_32 ... bench: 103 ns/iter (+/- 8) = 572 MB/s +test misc::reallyhard_32K ... bench: 49,326 ns/iter (+/- 3,202) = 664 MB/s +test misc::replace_all ... bench: 132 ns/iter (+/- 16) +test misc::reverse_suffix_no_quadratic ... bench: 4,168 ns/iter (+/- 227) = 1919 MB/s +test misc::short_haystack_1000000x ... bench: 132,733 ns/iter (+/- 18,141) = 60271 MB/s +test misc::short_haystack_100000x ... bench: 14,468 ns/iter (+/- 1,777) = 55295 MB/s +test misc::short_haystack_10000x ... bench: 6,316 ns/iter (+/- 360) = 12667 MB/s +test misc::short_haystack_1000x ... bench: 474 ns/iter (+/- 69) = 16900 MB/s +test misc::short_haystack_100x ... bench: 229 ns/iter (+/- 32) = 3541 MB/s +test misc::short_haystack_10x ... bench: 212 ns/iter (+/- 18) = 429 MB/s +test misc::short_haystack_1x ... bench: 205 ns/iter (+/- 28) = 92 MB/s +test misc::short_haystack_2x ... bench: 207 ns/iter (+/- 20) = 130 MB/s +test misc::short_haystack_3x ... bench: 213 ns/iter (+/- 7) = 164 MB/s +test misc::short_haystack_4x ... bench: 208 ns/iter (+/- 9) = 206 MB/s +test regexdna::find_new_lines ... bench: 12,050,847 ns/iter (+/- 346,484) = 421 MB/s +test regexdna::subst1 ... bench: 817,689 ns/iter (+/- 104,629) = 6216 MB/s +test regexdna::subst10 ... bench: 788,728 ns/iter (+/- 66,497) = 6445 MB/s +test regexdna::subst11 ... bench: 787,188 ns/iter (+/- 49,158) = 6457 MB/s +test regexdna::subst2 ... bench: 787,143 ns/iter (+/- 108,541) = 6458 MB/s +test regexdna::subst3 ... bench: 792,452 ns/iter (+/- 32,963) = 6414 MB/s +test regexdna::subst4 ... bench: 820,043 ns/iter (+/- 71,037) = 6198 MB/s +test regexdna::subst5 ... bench: 790,043 ns/iter (+/- 39,234) = 6434 MB/s +test regexdna::subst6 ... bench: 785,007 ns/iter (+/- 18,701) = 6475 MB/s +test regexdna::subst7 ... bench: 789,393 ns/iter (+/- 51,525) = 6439 MB/s +test regexdna::subst8 ... bench: 784,190 ns/iter (+/- 90,675) = 6482 MB/s +test regexdna::subst9 ... bench: 789,021 ns/iter (+/- 88,256) = 6442 MB/s +test regexdna::variant1 ... bench: 2,237,592 ns/iter (+/- 146,174) = 2271 MB/s +test regexdna::variant2 ... bench: 3,255,382 ns/iter (+/- 179,473) = 1561 MB/s +test regexdna::variant3 ... bench: 3,812,799 ns/iter (+/- 210,786) = 1333 MB/s +test regexdna::variant4 ... bench: 3,853,476 ns/iter (+/- 263,442) = 1319 MB/s +test regexdna::variant5 ... bench: 2,696,756 ns/iter (+/- 161,353) = 1885 MB/s +test regexdna::variant6 ... bench: 2,683,221 ns/iter (+/- 149,650) = 1894 MB/s +test regexdna::variant7 ... bench: 3,289,426 ns/iter (+/- 209,217) = 1545 MB/s +test regexdna::variant8 ... bench: 3,362,858 ns/iter (+/- 274,273) = 1511 MB/s +test regexdna::variant9 ... bench: 3,287,253 ns/iter (+/- 188,894) = 1546 MB/s +test rust_compile::compile_huge ... bench: 94,912 ns/iter (+/- 12,311) +test rust_compile::compile_huge_bytes ... bench: 5,534,281 ns/iter (+/- 192,069) +test rust_compile::compile_huge_full ... bench: 10,969,970 ns/iter (+/- 312,230) +test rust_compile::compile_simple ... bench: 3,523 ns/iter (+/- 525) +test rust_compile::compile_simple_bytes ... bench: 3,564 ns/iter (+/- 355) +test rust_compile::compile_simple_full ... bench: 19,887 ns/iter (+/- 1,885) +test rust_compile::compile_small ... bench: 8,294 ns/iter (+/- 1,123) +test rust_compile::compile_small_bytes ... bench: 153,070 ns/iter (+/- 20,825) +test rust_compile::compile_small_full ... bench: 313,318 ns/iter (+/- 28,271) +test sherlock::before_after_holmes ... bench: 907,585 ns/iter (+/- 86,027) = 655 MB/s +test sherlock::before_holmes ... bench: 62,765 ns/iter (+/- 6,413) = 9478 MB/s +test sherlock::everything_greedy ... bench: 2,033,519 ns/iter (+/- 97,963) = 292 MB/s +test sherlock::everything_greedy_nl ... bench: 796,514 ns/iter (+/- 48,247) = 746 MB/s +test sherlock::holmes_cochar_watson ... bench: 107,788 ns/iter (+/- 15,545) = 5519 MB/s +test sherlock::holmes_coword_watson ... bench: 482,686 ns/iter (+/- 49,033) = 1232 MB/s +test sherlock::ing_suffix ... bench: 322,901 ns/iter (+/- 46,329) = 1842 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,067,799 ns/iter (+/- 57,022) = 557 MB/s +test sherlock::letters ... bench: 22,823,246 ns/iter (+/- 472,094) = 26 MB/s +test sherlock::letters_lower ... bench: 22,137,278 ns/iter (+/- 443,188) = 26 MB/s +test sherlock::letters_upper ... bench: 1,773,598 ns/iter (+/- 96,994) = 335 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,623 ns/iter (+/- 48,509) = 662 MB/s +test sherlock::name_alt1 ... bench: 31,882 ns/iter (+/- 3,354) = 18660 MB/s +test sherlock::name_alt2 ... bench: 86,500 ns/iter (+/- 7,997) = 6877 MB/s +test sherlock::name_alt3 ... bench: 98,159 ns/iter (+/- 6,106) = 6060 MB/s +test sherlock::name_alt3_nocase ... bench: 383,858 ns/iter (+/- 19,224) = 1549 MB/s +test sherlock::name_alt4 ... bench: 122,489 ns/iter (+/- 17,271) = 4857 MB/s +test sherlock::name_alt4_nocase ... bench: 192,081 ns/iter (+/- 10,999) = 3097 MB/s +test sherlock::name_alt5 ... bench: 91,396 ns/iter (+/- 6,399) = 6509 MB/s +test sherlock::name_alt5_nocase ... bench: 354,804 ns/iter (+/- 26,158) = 1676 MB/s +test sherlock::name_holmes ... bench: 33,569 ns/iter (+/- 4,647) = 17722 MB/s +test sherlock::name_holmes_nocase ... bench: 136,387 ns/iter (+/- 14,005) = 4362 MB/s +test sherlock::name_sherlock ... bench: 22,468 ns/iter (+/- 1,144) = 26479 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,279 ns/iter (+/- 1,563) = 26703 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 98,003 ns/iter (+/- 10,978) = 6070 MB/s +test sherlock::name_sherlock_nocase ... bench: 96,130 ns/iter (+/- 4,373) = 6188 MB/s +test sherlock::name_whitespace ... bench: 30,532 ns/iter (+/- 3,125) = 19485 MB/s +test sherlock::no_match_common ... bench: 19,644 ns/iter (+/- 2,118) = 30285 MB/s +test sherlock::no_match_really_common ... bench: 25,374 ns/iter (+/- 1,538) = 23446 MB/s +test sherlock::no_match_uncommon ... bench: 19,602 ns/iter (+/- 427) = 30350 MB/s +test sherlock::quotes ... bench: 369,657 ns/iter (+/- 52,406) = 1609 MB/s +test sherlock::repeated_class_negation ... bench: 76,922,839 ns/iter (+/- 1,261,770) = 7 MB/s +test sherlock::the_lower ... bench: 326,221 ns/iter (+/- 35,683) = 1823 MB/s +test sherlock::the_nocase ... bench: 525,254 ns/iter (+/- 26,000) = 1132 MB/s +test sherlock::the_upper ... bench: 44,702 ns/iter (+/- 5,012) = 13308 MB/s +test sherlock::the_whitespace ... bench: 814,494 ns/iter (+/- 66,715) = 730 MB/s +test sherlock::word_ending_n ... bench: 1,705,139 ns/iter (+/- 97,420) = 348 MB/s +test sherlock::words ... bench: 8,632,437 ns/iter (+/- 278,177) = 68 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 106.01s + diff --git a/bench/log/08-new-memmem/rust-before-01 b/bench/log/08-new-memmem/rust-before-01 new file mode 100644 index 0000000000..1316e6d695 --- /dev/null +++ b/bench/log/08-new-memmem/rust-before-01 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 1) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 17 ns/iter (+/- 1) = 22941 MB/s +test misc::anchored_literal_short_match ... bench: 16 ns/iter (+/- 2) = 1625 MB/s +test misc::anchored_literal_short_non_match ... bench: 17 ns/iter (+/- 2) = 1529 MB/s +test misc::easy0_1K ... bench: 12 ns/iter (+/- 1) = 87583 MB/s +test misc::easy0_1MB ... bench: 15 ns/iter (+/- 0) = 69906866 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 0) = 5363 MB/s +test misc::easy0_32K ... bench: 12 ns/iter (+/- 2) = 2732916 MB/s +test misc::easy1_1K ... bench: 39 ns/iter (+/- 5) = 26769 MB/s +test misc::easy1_1MB ... bench: 40 ns/iter (+/- 6) = 26214900 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 3) = 1333 MB/s +test misc::easy1_32K ... bench: 39 ns/iter (+/- 5) = 840717 MB/s +test misc::hard_1K ... bench: 49 ns/iter (+/- 1) = 21448 MB/s +test misc::hard_1MB ... bench: 52 ns/iter (+/- 2) = 20165442 MB/s +test misc::hard_32 ... bench: 49 ns/iter (+/- 2) = 1204 MB/s +test misc::hard_32K ... bench: 49 ns/iter (+/- 3) = 669285 MB/s +test misc::is_match_set ... bench: 59 ns/iter (+/- 2) = 423 MB/s +test misc::literal ... bench: 11 ns/iter (+/- 1) = 4636 MB/s +test misc::long_needle1 ... bench: 1,161 ns/iter (+/- 54) = 86133 MB/s +test misc::long_needle2 ... bench: 680,687 ns/iter (+/- 63,713) = 146 MB/s +test misc::match_class ... bench: 69 ns/iter (+/- 4) = 1173 MB/s +test misc::match_class_in_range ... bench: 13 ns/iter (+/- 0) = 6230 MB/s +test misc::match_class_unicode ... bench: 253 ns/iter (+/- 9) = 636 MB/s +test misc::matches_set ... bench: 453 ns/iter (+/- 65) = 55 MB/s +test misc::medium_1K ... bench: 13 ns/iter (+/- 0) = 80923 MB/s +test misc::medium_1MB ... bench: 17 ns/iter (+/- 2) = 61682588 MB/s +test misc::medium_32 ... bench: 13 ns/iter (+/- 0) = 4615 MB/s +test misc::medium_32K ... bench: 13 ns/iter (+/- 0) = 2522769 MB/s +test misc::no_exponential ... bench: 330 ns/iter (+/- 47) = 303 MB/s +test misc::not_literal ... bench: 88 ns/iter (+/- 9) = 579 MB/s +test misc::one_pass_long_prefix ... bench: 50 ns/iter (+/- 7) = 520 MB/s +test misc::one_pass_long_prefix_not ... bench: 50 ns/iter (+/- 2) = 520 MB/s +test misc::one_pass_short ... bench: 36 ns/iter (+/- 4) = 472 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 2) = 435 MB/s +test misc::reallyhard2_1K ... bench: 55 ns/iter (+/- 7) = 18909 MB/s +test misc::reallyhard_1K ... bench: 1,590 ns/iter (+/- 225) = 661 MB/s +test misc::reallyhard_1MB ... bench: 1,580,163 ns/iter (+/- 224,935) = 663 MB/s +test misc::reallyhard_32 ... bench: 100 ns/iter (+/- 6) = 590 MB/s +test misc::reallyhard_32K ... bench: 49,318 ns/iter (+/- 6,046) = 664 MB/s +test misc::replace_all ... bench: 127 ns/iter (+/- 14) +test misc::reverse_suffix_no_quadratic ... bench: 4,240 ns/iter (+/- 117) = 1886 MB/s +test misc::short_haystack_1000000x ... bench: 89,004 ns/iter (+/- 2,927) = 89883 MB/s +test misc::short_haystack_100000x ... bench: 10,349 ns/iter (+/- 334) = 77303 MB/s +test misc::short_haystack_10000x ... bench: 5,835 ns/iter (+/- 700) = 13712 MB/s +test misc::short_haystack_1000x ... bench: 563 ns/iter (+/- 33) = 14229 MB/s +test misc::short_haystack_100x ... bench: 260 ns/iter (+/- 21) = 3119 MB/s +test misc::short_haystack_10x ... bench: 221 ns/iter (+/- 31) = 411 MB/s +test misc::short_haystack_1x ... bench: 211 ns/iter (+/- 30) = 90 MB/s +test misc::short_haystack_2x ... bench: 213 ns/iter (+/- 19) = 126 MB/s +test misc::short_haystack_3x ... bench: 212 ns/iter (+/- 7) = 165 MB/s +test misc::short_haystack_4x ... bench: 221 ns/iter (+/- 26) = 194 MB/s +test regexdna::find_new_lines ... bench: 12,035,248 ns/iter (+/- 362,122) = 422 MB/s +test regexdna::subst1 ... bench: 787,853 ns/iter (+/- 29,667) = 6452 MB/s +test regexdna::subst10 ... bench: 750,718 ns/iter (+/- 103,118) = 6771 MB/s +test regexdna::subst11 ... bench: 749,377 ns/iter (+/- 103,312) = 6783 MB/s +test regexdna::subst2 ... bench: 748,785 ns/iter (+/- 83,175) = 6788 MB/s +test regexdna::subst3 ... bench: 755,004 ns/iter (+/- 75,589) = 6732 MB/s +test regexdna::subst4 ... bench: 747,617 ns/iter (+/- 70,600) = 6799 MB/s +test regexdna::subst5 ... bench: 752,458 ns/iter (+/- 86,154) = 6755 MB/s +test regexdna::subst6 ... bench: 749,801 ns/iter (+/- 102,642) = 6779 MB/s +test regexdna::subst7 ... bench: 760,975 ns/iter (+/- 105,159) = 6680 MB/s +test regexdna::subst8 ... bench: 749,002 ns/iter (+/- 82,082) = 6786 MB/s +test regexdna::subst9 ... bench: 751,248 ns/iter (+/- 100,152) = 6766 MB/s +test regexdna::variant1 ... bench: 2,211,035 ns/iter (+/- 150,147) = 2299 MB/s +test regexdna::variant2 ... bench: 3,210,193 ns/iter (+/- 161,942) = 1583 MB/s +test regexdna::variant3 ... bench: 3,793,641 ns/iter (+/- 203,795) = 1339 MB/s +test regexdna::variant4 ... bench: 3,799,721 ns/iter (+/- 140,933) = 1337 MB/s +test regexdna::variant5 ... bench: 2,652,750 ns/iter (+/- 185,489) = 1916 MB/s +test regexdna::variant6 ... bench: 2,633,257 ns/iter (+/- 211,323) = 1930 MB/s +test regexdna::variant7 ... bench: 3,268,111 ns/iter (+/- 176,273) = 1555 MB/s +test regexdna::variant8 ... bench: 3,331,333 ns/iter (+/- 264,431) = 1525 MB/s +test regexdna::variant9 ... bench: 3,268,398 ns/iter (+/- 298,223) = 1555 MB/s +test rust_compile::compile_huge ... bench: 94,562 ns/iter (+/- 2,194) +test rust_compile::compile_huge_bytes ... bench: 5,611,428 ns/iter (+/- 202,365) +test rust_compile::compile_huge_full ... bench: 10,933,505 ns/iter (+/- 325,078) +test rust_compile::compile_simple ... bench: 3,496 ns/iter (+/- 156) +test rust_compile::compile_simple_bytes ... bench: 3,572 ns/iter (+/- 389) +test rust_compile::compile_simple_full ... bench: 20,283 ns/iter (+/- 1,894) +test rust_compile::compile_small ... bench: 8,475 ns/iter (+/- 1,008) +test rust_compile::compile_small_bytes ... bench: 157,446 ns/iter (+/- 11,319) +test rust_compile::compile_small_full ... bench: 316,041 ns/iter (+/- 23,620) +test sherlock::before_after_holmes ... bench: 906,578 ns/iter (+/- 129,507) = 656 MB/s +test sherlock::before_holmes ... bench: 64,715 ns/iter (+/- 9,107) = 9193 MB/s +test sherlock::everything_greedy ... bench: 2,065,017 ns/iter (+/- 156,855) = 288 MB/s +test sherlock::everything_greedy_nl ... bench: 810,672 ns/iter (+/- 100,547) = 733 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,124 ns/iter (+/- 10,948) = 5606 MB/s +test sherlock::holmes_coword_watson ... bench: 488,503 ns/iter (+/- 63,243) = 1217 MB/s +test sherlock::ing_suffix ... bench: 384,936 ns/iter (+/- 25,316) = 1545 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,060,294 ns/iter (+/- 152,263) = 561 MB/s +test sherlock::letters ... bench: 22,127,059 ns/iter (+/- 413,502) = 26 MB/s +test sherlock::letters_lower ... bench: 21,535,012 ns/iter (+/- 463,835) = 27 MB/s +test sherlock::letters_upper ... bench: 1,758,480 ns/iter (+/- 130,352) = 338 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,329 ns/iter (+/- 96,625) = 663 MB/s +test sherlock::name_alt1 ... bench: 31,585 ns/iter (+/- 2,796) = 18835 MB/s +test sherlock::name_alt2 ... bench: 86,223 ns/iter (+/- 9,553) = 6899 MB/s +test sherlock::name_alt3 ... bench: 97,177 ns/iter (+/- 11,479) = 6122 MB/s +test sherlock::name_alt3_nocase ... bench: 381,511 ns/iter (+/- 55,025) = 1559 MB/s +test sherlock::name_alt4 ... bench: 121,672 ns/iter (+/- 9,253) = 4889 MB/s +test sherlock::name_alt4_nocase ... bench: 187,887 ns/iter (+/- 26,932) = 3166 MB/s +test sherlock::name_alt5 ... bench: 90,732 ns/iter (+/- 7,251) = 6557 MB/s +test sherlock::name_alt5_nocase ... bench: 352,388 ns/iter (+/- 50,408) = 1688 MB/s +test sherlock::name_holmes ... bench: 33,836 ns/iter (+/- 3,388) = 17582 MB/s +test sherlock::name_holmes_nocase ... bench: 133,068 ns/iter (+/- 7,602) = 4470 MB/s +test sherlock::name_sherlock ... bench: 62,719 ns/iter (+/- 8,927) = 9485 MB/s +test sherlock::name_sherlock_holmes ... bench: 24,688 ns/iter (+/- 2,482) = 24098 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,793 ns/iter (+/- 12,078) = 6083 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,772 ns/iter (+/- 13,713) = 6211 MB/s +test sherlock::name_whitespace ... bench: 70,942 ns/iter (+/- 5,565) = 8386 MB/s +test sherlock::no_match_common ... bench: 14,645 ns/iter (+/- 1,430) = 40623 MB/s +test sherlock::no_match_really_common ... bench: 239,346 ns/iter (+/- 17,292) = 2485 MB/s +test sherlock::no_match_uncommon ... bench: 14,637 ns/iter (+/- 1,360) = 40645 MB/s +test sherlock::quotes ... bench: 367,945 ns/iter (+/- 35,370) = 1616 MB/s +test sherlock::repeated_class_negation ... bench: 74,367,046 ns/iter (+/- 1,114,875) = 7 MB/s +test sherlock::the_lower ... bench: 463,888 ns/iter (+/- 67,551) = 1282 MB/s +test sherlock::the_nocase ... bench: 520,822 ns/iter (+/- 76,131) = 1142 MB/s +test sherlock::the_upper ... bench: 37,354 ns/iter (+/- 4,110) = 15926 MB/s +test sherlock::the_whitespace ... bench: 922,312 ns/iter (+/- 95,082) = 645 MB/s +test sherlock::word_ending_n ... bench: 1,679,343 ns/iter (+/- 165,580) = 354 MB/s +test sherlock::words ... bench: 8,280,082 ns/iter (+/- 290,280) = 71 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 113.49s + diff --git a/bench/log/08-new-memmem/rust-before-02 b/bench/log/08-new-memmem/rust-before-02 new file mode 100644 index 0000000000..5d75102189 --- /dev/null +++ b/bench/log/08-new-memmem/rust-before-02 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 17 ns/iter (+/- 0) = 22941 MB/s +test misc::anchored_literal_long_non_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_short_match ... bench: 16 ns/iter (+/- 2) = 1625 MB/s +test misc::anchored_literal_short_non_match ... bench: 17 ns/iter (+/- 2) = 1529 MB/s +test misc::easy0_1K ... bench: 12 ns/iter (+/- 0) = 87583 MB/s +test misc::easy0_1MB ... bench: 14 ns/iter (+/- 1) = 74900214 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 1) = 5363 MB/s +test misc::easy0_32K ... bench: 12 ns/iter (+/- 1) = 2732916 MB/s +test misc::easy1_1K ... bench: 38 ns/iter (+/- 5) = 27473 MB/s +test misc::easy1_1MB ... bench: 40 ns/iter (+/- 5) = 26214900 MB/s +test misc::easy1_32 ... bench: 38 ns/iter (+/- 1) = 1368 MB/s +test misc::easy1_32K ... bench: 38 ns/iter (+/- 1) = 862842 MB/s +test misc::hard_1K ... bench: 49 ns/iter (+/- 4) = 21448 MB/s +test misc::hard_1MB ... bench: 52 ns/iter (+/- 7) = 20165442 MB/s +test misc::hard_32 ... bench: 49 ns/iter (+/- 1) = 1204 MB/s +test misc::hard_32K ... bench: 49 ns/iter (+/- 6) = 669285 MB/s +test misc::is_match_set ... bench: 59 ns/iter (+/- 2) = 423 MB/s +test misc::literal ... bench: 11 ns/iter (+/- 0) = 4636 MB/s +test misc::long_needle1 ... bench: 1,179 ns/iter (+/- 92) = 84818 MB/s +test misc::long_needle2 ... bench: 680,418 ns/iter (+/- 27,142) = 146 MB/s +test misc::match_class ... bench: 68 ns/iter (+/- 6) = 1191 MB/s +test misc::match_class_in_range ... bench: 13 ns/iter (+/- 1) = 6230 MB/s +test misc::match_class_unicode ... bench: 253 ns/iter (+/- 33) = 636 MB/s +test misc::matches_set ... bench: 453 ns/iter (+/- 65) = 55 MB/s +test misc::medium_1K ... bench: 13 ns/iter (+/- 1) = 80923 MB/s +test misc::medium_1MB ... bench: 17 ns/iter (+/- 2) = 61682588 MB/s +test misc::medium_32 ... bench: 13 ns/iter (+/- 0) = 4615 MB/s +test misc::medium_32K ... bench: 13 ns/iter (+/- 0) = 2522769 MB/s +test misc::no_exponential ... bench: 330 ns/iter (+/- 47) = 303 MB/s +test misc::not_literal ... bench: 88 ns/iter (+/- 12) = 579 MB/s +test misc::one_pass_long_prefix ... bench: 50 ns/iter (+/- 6) = 520 MB/s +test misc::one_pass_long_prefix_not ... bench: 50 ns/iter (+/- 7) = 520 MB/s +test misc::one_pass_short ... bench: 36 ns/iter (+/- 2) = 472 MB/s +test misc::one_pass_short_not ... bench: 38 ns/iter (+/- 5) = 447 MB/s +test misc::reallyhard2_1K ... bench: 55 ns/iter (+/- 7) = 18909 MB/s +test misc::reallyhard_1K ... bench: 1,590 ns/iter (+/- 64) = 661 MB/s +test misc::reallyhard_1MB ... bench: 1,581,975 ns/iter (+/- 126,709) = 662 MB/s +test misc::reallyhard_32 ... bench: 100 ns/iter (+/- 4) = 590 MB/s +test misc::reallyhard_32K ... bench: 49,323 ns/iter (+/- 7,063) = 664 MB/s +test misc::replace_all ... bench: 127 ns/iter (+/- 5) +test misc::reverse_suffix_no_quadratic ... bench: 4,171 ns/iter (+/- 624) = 1918 MB/s +test misc::short_haystack_1000000x ... bench: 88,960 ns/iter (+/- 7,710) = 89928 MB/s +test misc::short_haystack_100000x ... bench: 10,193 ns/iter (+/- 952) = 78486 MB/s +test misc::short_haystack_10000x ... bench: 5,798 ns/iter (+/- 636) = 13799 MB/s +test misc::short_haystack_1000x ... bench: 418 ns/iter (+/- 60) = 19165 MB/s +test misc::short_haystack_100x ... bench: 258 ns/iter (+/- 21) = 3143 MB/s +test misc::short_haystack_10x ... bench: 216 ns/iter (+/- 21) = 421 MB/s +test misc::short_haystack_1x ... bench: 205 ns/iter (+/- 29) = 92 MB/s +test misc::short_haystack_2x ... bench: 206 ns/iter (+/- 22) = 131 MB/s +test misc::short_haystack_3x ... bench: 205 ns/iter (+/- 29) = 170 MB/s +test misc::short_haystack_4x ... bench: 214 ns/iter (+/- 6) = 200 MB/s +test regexdna::find_new_lines ... bench: 12,039,715 ns/iter (+/- 410,515) = 422 MB/s +test regexdna::subst1 ... bench: 750,454 ns/iter (+/- 65,358) = 6773 MB/s +test regexdna::subst10 ... bench: 748,321 ns/iter (+/- 93,416) = 6793 MB/s +test regexdna::subst11 ... bench: 747,906 ns/iter (+/- 92,141) = 6796 MB/s +test regexdna::subst2 ... bench: 755,082 ns/iter (+/- 88,044) = 6732 MB/s +test regexdna::subst3 ... bench: 753,496 ns/iter (+/- 70,987) = 6746 MB/s +test regexdna::subst4 ... bench: 747,103 ns/iter (+/- 102,992) = 6804 MB/s +test regexdna::subst5 ... bench: 750,805 ns/iter (+/- 72,572) = 6770 MB/s +test regexdna::subst6 ... bench: 748,419 ns/iter (+/- 47,272) = 6792 MB/s +test regexdna::subst7 ... bench: 752,556 ns/iter (+/- 95,329) = 6754 MB/s +test regexdna::subst8 ... bench: 756,009 ns/iter (+/- 78,049) = 6724 MB/s +test regexdna::subst9 ... bench: 749,278 ns/iter (+/- 70,259) = 6784 MB/s +test regexdna::variant1 ... bench: 2,215,182 ns/iter (+/- 114,543) = 2294 MB/s +test regexdna::variant2 ... bench: 3,207,983 ns/iter (+/- 184,419) = 1584 MB/s +test regexdna::variant3 ... bench: 3,791,716 ns/iter (+/- 192,185) = 1340 MB/s +test regexdna::variant4 ... bench: 3,809,934 ns/iter (+/- 222,872) = 1334 MB/s +test regexdna::variant5 ... bench: 2,651,345 ns/iter (+/- 183,673) = 1917 MB/s +test regexdna::variant6 ... bench: 2,635,566 ns/iter (+/- 170,288) = 1928 MB/s +test regexdna::variant7 ... bench: 3,265,519 ns/iter (+/- 234,923) = 1556 MB/s +test regexdna::variant8 ... bench: 3,340,830 ns/iter (+/- 183,129) = 1521 MB/s +test regexdna::variant9 ... bench: 3,267,141 ns/iter (+/- 185,543) = 1555 MB/s +test rust_compile::compile_huge ... bench: 94,368 ns/iter (+/- 13,293) +test rust_compile::compile_huge_bytes ... bench: 5,616,594 ns/iter (+/- 243,462) +test rust_compile::compile_huge_full ... bench: 10,862,100 ns/iter (+/- 260,207) +test rust_compile::compile_simple ... bench: 3,463 ns/iter (+/- 350) +test rust_compile::compile_simple_bytes ... bench: 3,542 ns/iter (+/- 504) +test rust_compile::compile_simple_full ... bench: 20,562 ns/iter (+/- 3,117) +test rust_compile::compile_small ... bench: 8,325 ns/iter (+/- 641) +test rust_compile::compile_small_bytes ... bench: 153,450 ns/iter (+/- 11,174) +test rust_compile::compile_small_full ... bench: 315,871 ns/iter (+/- 33,828) +test sherlock::before_after_holmes ... bench: 906,423 ns/iter (+/- 34,801) = 656 MB/s +test sherlock::before_holmes ... bench: 64,457 ns/iter (+/- 8,343) = 9229 MB/s +test sherlock::everything_greedy ... bench: 2,058,675 ns/iter (+/- 208,885) = 288 MB/s +test sherlock::everything_greedy_nl ... bench: 810,638 ns/iter (+/- 39,955) = 733 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,048 ns/iter (+/- 8,158) = 5610 MB/s +test sherlock::holmes_coword_watson ... bench: 482,243 ns/iter (+/- 30,955) = 1233 MB/s +test sherlock::ing_suffix ... bench: 385,767 ns/iter (+/- 24,902) = 1542 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,060,762 ns/iter (+/- 94,273) = 560 MB/s +test sherlock::letters ... bench: 22,127,007 ns/iter (+/- 467,539) = 26 MB/s +test sherlock::letters_lower ... bench: 21,719,871 ns/iter (+/- 459,587) = 27 MB/s +test sherlock::letters_upper ... bench: 1,753,028 ns/iter (+/- 172,914) = 339 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,091 ns/iter (+/- 109,954) = 663 MB/s +test sherlock::name_alt1 ... bench: 31,636 ns/iter (+/- 2,323) = 18805 MB/s +test sherlock::name_alt2 ... bench: 85,898 ns/iter (+/- 10,486) = 6926 MB/s +test sherlock::name_alt3 ... bench: 97,104 ns/iter (+/- 8,851) = 6126 MB/s +test sherlock::name_alt3_nocase ... bench: 381,487 ns/iter (+/- 14,829) = 1559 MB/s +test sherlock::name_alt4 ... bench: 121,301 ns/iter (+/- 17,178) = 4904 MB/s +test sherlock::name_alt4_nocase ... bench: 187,262 ns/iter (+/- 17,478) = 3177 MB/s +test sherlock::name_alt5 ... bench: 90,773 ns/iter (+/- 2,791) = 6554 MB/s +test sherlock::name_alt5_nocase ... bench: 351,900 ns/iter (+/- 40,408) = 1690 MB/s +test sherlock::name_holmes ... bench: 34,767 ns/iter (+/- 3,334) = 17112 MB/s +test sherlock::name_holmes_nocase ... bench: 132,953 ns/iter (+/- 15,747) = 4474 MB/s +test sherlock::name_sherlock ... bench: 66,566 ns/iter (+/- 6,822) = 8937 MB/s +test sherlock::name_sherlock_holmes ... bench: 24,481 ns/iter (+/- 2,330) = 24301 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,531 ns/iter (+/- 12,331) = 6099 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,808 ns/iter (+/- 13,250) = 6209 MB/s +test sherlock::name_whitespace ... bench: 71,342 ns/iter (+/- 9,877) = 8339 MB/s +test sherlock::no_match_common ... bench: 14,704 ns/iter (+/- 1,241) = 40460 MB/s +test sherlock::no_match_really_common ... bench: 238,731 ns/iter (+/- 31,179) = 2492 MB/s +test sherlock::no_match_uncommon ... bench: 14,620 ns/iter (+/- 1,250) = 40693 MB/s +test sherlock::quotes ... bench: 367,740 ns/iter (+/- 10,107) = 1617 MB/s +test sherlock::repeated_class_negation ... bench: 76,315,217 ns/iter (+/- 940,903) = 7 MB/s +test sherlock::the_lower ... bench: 464,322 ns/iter (+/- 14,654) = 1281 MB/s +test sherlock::the_nocase ... bench: 519,069 ns/iter (+/- 59,161) = 1146 MB/s +test sherlock::the_upper ... bench: 37,575 ns/iter (+/- 2,455) = 15833 MB/s +test sherlock::the_whitespace ... bench: 939,412 ns/iter (+/- 60,941) = 633 MB/s +test sherlock::word_ending_n ... bench: 1,681,192 ns/iter (+/- 156,265) = 353 MB/s +test sherlock::words ... bench: 8,213,141 ns/iter (+/- 322,533) = 72 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 94.52s + diff --git a/bench/src/bench.rs b/bench/src/bench.rs index ec0e5478ab..a1b03deb14 100644 --- a/bench/src/bench.rs +++ b/bench/src/bench.rs @@ -1,29 +1,21 @@ // Enable the benchmarking harness. #![feature(test)] +// It's too annoying to carefully define macros based on which regex engines +// have which benchmarks, so just ignore these warnings. +#![allow(unused_macros)] -#[macro_use] -extern crate lazy_static; -#[macro_use] -extern crate cfg_if; - -#[cfg(not(any(feature = "re-rust", feature = "re-rust-bytes")))] -extern crate libc; - -extern crate regex_syntax; extern crate test; +use cfg_if::cfg_if; + cfg_if! { if #[cfg(feature = "re-pcre1")] { - extern crate libpcre_sys; pub use ffi::pcre1::Regex; } else if #[cfg(feature = "re-onig")] { - extern crate onig; pub use ffi::onig::Regex; } else if #[cfg(any(feature = "re-rust"))] { - extern crate regex; pub use regex::{Regex, RegexSet}; } else if #[cfg(feature = "re-rust-bytes")] { - extern crate regex; pub use regex::bytes::{Regex, RegexSet}; } else if #[cfg(feature = "re-re2")] { pub use ffi::re2::Regex; @@ -51,7 +43,7 @@ cfg_if! { // native and dynamic regexes. macro_rules! regex { ($re:expr) => { - ::Regex::new(&$re.to_owned()).unwrap() + crate::Regex::new(&$re.to_owned()).unwrap() }; } @@ -72,7 +64,7 @@ cfg_if! { // regex accepts in its `is_match` and `find_iter` methods. macro_rules! text { ($text:expr) => {{ - use ffi::tcl::Text; + use crate::ffi::tcl::Text; Text::new($text) }} } @@ -148,6 +140,7 @@ macro_rules! bench_is_match { ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { #[bench] fn $name(b: &mut Bencher) { + use lazy_static::lazy_static; use std::sync::Mutex; // Why do we use lazy_static here? It seems sensible to just @@ -192,6 +185,7 @@ macro_rules! bench_find { ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { #[bench] fn $name(b: &mut Bencher) { + use lazy_static::lazy_static; use std::sync::Mutex; lazy_static! { @@ -224,6 +218,7 @@ macro_rules! bench_captures { #[cfg(feature = "re-rust")] #[bench] fn $name(b: &mut Bencher) { + use lazy_static::lazy_static; use std::sync::Mutex; lazy_static! { @@ -246,7 +241,9 @@ macro_rules! bench_is_match_set { ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { #[bench] fn $name(b: &mut Bencher) { + use lazy_static::lazy_static; use std::sync::Mutex; + lazy_static! { static ref RE: Mutex = Mutex::new($re); static ref TEXT: Mutex = Mutex::new(text!($haystack)); @@ -272,7 +269,9 @@ macro_rules! bench_matches_set { ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { #[bench] fn $name(b: &mut Bencher) { + use lazy_static::lazy_static; use std::sync::Mutex; + lazy_static! { static ref RE: Mutex = Mutex::new($re); static ref TEXT: Mutex = Mutex::new(text!($haystack)); diff --git a/bench/src/ffi/pcre2.rs b/bench/src/ffi/pcre2.rs index 59b9cb2614..6fc5f6de0b 100644 --- a/bench/src/ffi/pcre2.rs +++ b/bench/src/ffi/pcre2.rs @@ -4,7 +4,7 @@ use std::fmt; use std::ptr; use std::str; -use libc::{c_int, c_void, size_t, uint32_t, uint8_t}; +use libc::{c_int, c_void, size_t}; pub struct Regex { code: *mut code, @@ -142,10 +142,10 @@ impl fmt::Debug for Error { // PCRE2 FFI. We only wrap the bits we need. -const PCRE2_UCP: uint32_t = 0x00020000; -const PCRE2_UTF: uint32_t = 0x00080000; -const PCRE2_NO_UTF_CHECK: uint32_t = 0x40000000; -const PCRE2_JIT_COMPLETE: uint32_t = 0x00000001; +const PCRE2_UCP: u32 = 0x00020000; +const PCRE2_UTF: u32 = 0x00080000; +const PCRE2_NO_UTF_CHECK: u32 = 0x40000000; +const PCRE2_JIT_COMPLETE: u32 = 0x00000001; const PCRE2_ERROR_NOMATCH: c_int = -1; type code = c_void; @@ -160,9 +160,9 @@ type match_context = c_void; // unused extern "C" { fn pcre2_compile_8( - pattern: *const uint8_t, + pattern: *const u8, len: size_t, - options: uint32_t, + options: u32, error_code: *mut c_int, error_offset: *mut size_t, context: *mut compile_context, @@ -180,21 +180,21 @@ extern "C" { fn pcre2_get_ovector_pointer_8(match_data: *mut match_data) -> *mut size_t; - fn pcre2_jit_compile_8(code: *const code, options: uint32_t) -> c_int; + fn pcre2_jit_compile_8(code: *const code, options: u32) -> c_int; fn pcre2_jit_match_8( code: *const code, - subject: *const uint8_t, + subject: *const u8, length: size_t, startoffset: size_t, - options: uint32_t, + options: u32, match_data: *mut match_data, match_context: *mut match_context, ) -> c_int; fn pcre2_get_error_message_8( error_code: c_int, - buf: *mut uint8_t, + buf: *mut u8, buflen: size_t, ) -> c_int; } diff --git a/bench/src/main.rs b/bench/src/main.rs index b8bab8d537..7da99107d1 100644 --- a/bench/src/main.rs +++ b/bench/src/main.rs @@ -1,18 +1,3 @@ -extern crate docopt; -extern crate libc; -#[cfg(feature = "re-pcre1")] -extern crate libpcre_sys; -extern crate memmap; -#[cfg(feature = "re-onig")] -extern crate onig; -#[cfg(any(feature = "re-rust", feature = "re-rust-bytes",))] -extern crate regex; -#[cfg(feature = "re-rust")] -extern crate regex_syntax; -extern crate serde; -#[macro_use] -extern crate serde_derive; - use std::fs::File; use std::str; @@ -39,7 +24,7 @@ Options: -h, --help Show this usage message. "; -#[derive(Debug, Deserialize)] +#[derive(Debug, serde::Deserialize)] struct Args { arg_pattern: String, arg_file: String, diff --git a/bench/src/misc.rs b/bench/src/misc.rs index 9e7b2c9c76..50bcef0fbc 100644 --- a/bench/src/misc.rs +++ b/bench/src/misc.rs @@ -5,8 +5,8 @@ use std::iter::repeat; use test::Bencher; #[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))] -use RegexSet; -use {Regex, Text}; +use crate::RegexSet; +use crate::{Regex, Text}; #[cfg(not(feature = "re-onig"))] #[cfg(not(feature = "re-pcre1"))] diff --git a/bench/src/regexdna.rs b/bench/src/regexdna.rs index 45ace9e297..7a35aa4563 100644 --- a/bench/src/regexdna.rs +++ b/bench/src/regexdna.rs @@ -1,6 +1,6 @@ use test::Bencher; -use {Regex, Text}; +use crate::{Regex, Text}; // USAGE: dna!(name, pattern, count) // diff --git a/bench/src/sherlock.rs b/bench/src/sherlock.rs index e7328df573..8feb5862ef 100644 --- a/bench/src/sherlock.rs +++ b/bench/src/sherlock.rs @@ -1,6 +1,6 @@ use test::Bencher; -use {Regex, Text}; +use crate::{Regex, Text}; // USAGE: sherlock!(name, pattern, count) // diff --git a/examples/shootout-regex-dna-bytes.rs b/examples/shootout-regex-dna-bytes.rs index ac4c1153b6..773fd9ba8d 100644 --- a/examples/shootout-regex-dna-bytes.rs +++ b/examples/shootout-regex-dna-bytes.rs @@ -5,8 +5,6 @@ // contributed by TeXitoi // contributed by BurntSushi -extern crate regex; - use std::io::{self, Read}; use std::sync::Arc; use std::thread; diff --git a/examples/shootout-regex-dna-cheat.rs b/examples/shootout-regex-dna-cheat.rs index c7395b77be..1bde7ab1ff 100644 --- a/examples/shootout-regex-dna-cheat.rs +++ b/examples/shootout-regex-dna-cheat.rs @@ -10,8 +10,6 @@ // replacing them with a single linear scan. i.e., it re-implements // `replace_all`. As a result, this is around 25% faster. ---AG -extern crate regex; - use std::io::{self, Read}; use std::sync::Arc; use std::thread; diff --git a/examples/shootout-regex-dna-replace.rs b/examples/shootout-regex-dna-replace.rs index 681e077ba3..20694e06f3 100644 --- a/examples/shootout-regex-dna-replace.rs +++ b/examples/shootout-regex-dna-replace.rs @@ -1,5 +1,3 @@ -extern crate regex; - use std::io::{self, Read}; macro_rules! regex { diff --git a/examples/shootout-regex-dna-single-cheat.rs b/examples/shootout-regex-dna-single-cheat.rs index 04ed05a031..70a979c6d4 100644 --- a/examples/shootout-regex-dna-single-cheat.rs +++ b/examples/shootout-regex-dna-single-cheat.rs @@ -5,8 +5,6 @@ // contributed by TeXitoi // contributed by BurntSushi -extern crate regex; - use std::io::{self, Read}; macro_rules! regex { diff --git a/examples/shootout-regex-dna-single.rs b/examples/shootout-regex-dna-single.rs index a70c711011..b474059600 100644 --- a/examples/shootout-regex-dna-single.rs +++ b/examples/shootout-regex-dna-single.rs @@ -5,8 +5,6 @@ // contributed by TeXitoi // contributed by BurntSushi -extern crate regex; - use std::io::{self, Read}; macro_rules! regex { diff --git a/examples/shootout-regex-dna.rs b/examples/shootout-regex-dna.rs index 4527422714..b96518e4c4 100644 --- a/examples/shootout-regex-dna.rs +++ b/examples/shootout-regex-dna.rs @@ -5,8 +5,6 @@ // contributed by TeXitoi // contributed by BurntSushi -extern crate regex; - use std::io::{self, Read}; use std::sync::Arc; use std::thread; diff --git a/regex-capi/Cargo.toml b/regex-capi/Cargo.toml index 6d9807fed1..47938092d8 100644 --- a/regex-capi/Cargo.toml +++ b/regex-capi/Cargo.toml @@ -11,6 +11,7 @@ description = """ A C API for Rust's regular expression library. """ workspace = ".." +edition = "2018" [lib] name = "rure" diff --git a/regex-capi/src/error.rs b/regex-capi/src/error.rs index 95c9fdb8ef..2ca3fae804 100644 --- a/regex-capi/src/error.rs +++ b/regex-capi/src/error.rs @@ -36,7 +36,7 @@ impl Error { } impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.kind { ErrorKind::None => write!(f, "no error"), ErrorKind::Str(ref e) => e.fmt(f), diff --git a/regex-capi/src/lib.rs b/regex-capi/src/lib.rs index 8a952b9896..aa03c60aff 100644 --- a/regex-capi/src/lib.rs +++ b/regex-capi/src/lib.rs @@ -1,10 +1,7 @@ -extern crate libc; -extern crate regex; - #[macro_use] mod macros; mod error; mod rure; -pub use error::*; -pub use rure::*; +pub use crate::error::*; +pub use crate::rure::*; diff --git a/regex-capi/src/rure.rs b/regex-capi/src/rure.rs index 467c8aefa4..dc49b20a33 100644 --- a/regex-capi/src/rure.rs +++ b/regex-capi/src/rure.rs @@ -8,7 +8,7 @@ use std::str; use libc::{c_char, size_t}; use regex::bytes; -use error::{Error, ErrorKind}; +use crate::error::{Error, ErrorKind}; const RURE_FLAG_CASEI: u32 = 1 << 0; const RURE_FLAG_MULTI: u32 = 1 << 1; diff --git a/regex-debug/Cargo.toml b/regex-debug/Cargo.toml index d174b47e37..34a62f87bf 100644 --- a/regex-debug/Cargo.toml +++ b/regex-debug/Cargo.toml @@ -9,10 +9,10 @@ documentation = "https://docs.rs/regex" homepage = "https://github.com/rust-lang/regex" description = "A tool useful for debugging regular expressions." workspace = ".." +edition = "2018" [dependencies] docopt = "1" regex = { version = "1.1", path = ".." } regex-syntax = { version = "0.6", path = "../regex-syntax" } -serde = "1" -serde_derive = "1" +serde = { version = "1", features = ["derive"] } diff --git a/regex-debug/src/main.rs b/regex-debug/src/main.rs index f6fd63c7e7..a7dd453e1f 100644 --- a/regex-debug/src/main.rs +++ b/regex-debug/src/main.rs @@ -1,10 +1,3 @@ -extern crate docopt; -extern crate regex; -extern crate regex_syntax as syntax; -extern crate serde; -#[macro_use] -extern crate serde_derive; - use std::error; use std::io::{self, Write}; use std::process; @@ -12,8 +5,8 @@ use std::result; use docopt::Docopt; use regex::internal::{Compiler, LiteralSearcher}; -use syntax::hir::literal::Literals; -use syntax::hir::Hir; +use regex_syntax::hir::literal::Literals; +use regex_syntax::hir::Hir; const USAGE: &'static str = " Usage: @@ -54,7 +47,7 @@ Options: --quiet Show less output. "; -#[derive(Deserialize)] +#[derive(serde::Deserialize)] struct Args { cmd_ast: bool, cmd_hir: bool, @@ -127,7 +120,7 @@ fn run(args: &Args) -> Result<()> { } fn cmd_ast(args: &Args) -> Result<()> { - use syntax::ast::parse::Parser; + use regex_syntax::ast::parse::Parser; let mut parser = Parser::new(); let ast = parser.parse(&args.arg_pattern)?; @@ -136,7 +129,7 @@ fn cmd_ast(args: &Args) -> Result<()> { } fn cmd_hir(args: &Args) -> Result<()> { - use syntax::ParserBuilder; + use regex_syntax::ParserBuilder; let mut parser = ParserBuilder::new().allow_invalid_utf8(false).build(); let hir = parser.parse(&args.arg_pattern)?; @@ -225,9 +218,9 @@ fn cmd_compile(args: &Args) -> Result<()> { } fn cmd_utf8_ranges(args: &Args) -> Result<()> { - use syntax::hir::{self, HirKind}; - use syntax::utf8::Utf8Sequences; - use syntax::ParserBuilder; + use regex_syntax::hir::{self, HirKind}; + use regex_syntax::utf8::Utf8Sequences; + use regex_syntax::ParserBuilder; let hir = ParserBuilder::new() .build() @@ -258,9 +251,9 @@ fn cmd_utf8_ranges(args: &Args) -> Result<()> { } fn cmd_utf8_ranges_rev(args: &Args) -> Result<()> { - use syntax::hir::{self, HirKind}; - use syntax::utf8::Utf8Sequences; - use syntax::ParserBuilder; + use regex_syntax::hir::{self, HirKind}; + use regex_syntax::utf8::Utf8Sequences; + use regex_syntax::ParserBuilder; let hir = ParserBuilder::new() .build() @@ -334,7 +327,7 @@ impl Args { } fn parse(re: &str) -> Result { - use syntax::ParserBuilder; + use regex_syntax::ParserBuilder; ParserBuilder::new() .allow_invalid_utf8(true) .build() diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml index 84611783a5..283377793a 100644 --- a/regex-syntax/Cargo.toml +++ b/regex-syntax/Cargo.toml @@ -8,6 +8,7 @@ documentation = "https://docs.rs/regex-syntax" homepage = "https://github.com/rust-lang/regex" description = "A regular expression parser." workspace = ".." +edition = "2018" # Features are documented in the "Crate features" section of the crate docs: # https://docs.rs/regex-syntax/*/#crate-features diff --git a/regex-syntax/benches/bench.rs b/regex-syntax/benches/bench.rs index ba7f81c018..d4703d4fc1 100644 --- a/regex-syntax/benches/bench.rs +++ b/regex-syntax/benches/bench.rs @@ -1,6 +1,5 @@ #![feature(test)] -extern crate regex_syntax; extern crate test; use regex_syntax::Parser; diff --git a/regex-syntax/src/ast/mod.rs b/regex-syntax/src/ast/mod.rs index 7179f2d403..9b9127b1fc 100644 --- a/regex-syntax/src/ast/mod.rs +++ b/regex-syntax/src/ast/mod.rs @@ -6,7 +6,7 @@ use std::cmp::Ordering; use std::error; use std::fmt; -pub use ast::visitor::{visit, Visitor}; +pub use crate::ast::visitor::{visit, Visitor}; pub mod parse; pub mod print; @@ -220,13 +220,13 @@ impl error::Error for Error { } impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - ::error::Formatter::from(self).fmt(f) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + crate::error::Formatter::from(self).fmt(f) } } impl fmt::Display for ErrorKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use self::ErrorKind::*; match *self { CaptureLimitExceeded => write!( @@ -328,7 +328,7 @@ pub struct Span { } impl fmt::Debug for Span { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Span({:?}, {:?})", self.start, self.end) } } @@ -361,7 +361,7 @@ pub struct Position { } impl fmt::Debug for Position { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "Position(o: {:?}, l: {:?}, c: {:?})", @@ -542,8 +542,8 @@ impl Ast { /// This implementation uses constant stack space and heap space proportional /// to the size of the `Ast`. impl fmt::Display for Ast { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use ast::print::Printer; + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use crate::ast::print::Printer; Printer::new().print(self, f) } } diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs index 55c5f79898..e62a7c249f 100644 --- a/regex-syntax/src/ast/parse.rs +++ b/regex-syntax/src/ast/parse.rs @@ -7,10 +7,10 @@ use std::cell::{Cell, RefCell}; use std::mem; use std::result; -use ast::{self, Ast, Position, Span}; -use either::Either; +use crate::ast::{self, Ast, Position, Span}; +use crate::either::Either; -use is_meta_character; +use crate::is_meta_character; type Result = result::Result; @@ -58,10 +58,10 @@ impl Primitive { /// then return an error. fn into_class_set_item>( self, - p: &ParserI

, + p: &ParserI<'_, P>, ) -> Result { use self::Primitive::*; - use ast::ClassSetItem; + use crate::ast::ClassSetItem; match self { Literal(lit) => Ok(ClassSetItem::Literal(lit)), @@ -79,7 +79,7 @@ impl Primitive { /// dot), then return an error. fn into_class_literal>( self, - p: &ParserI

, + p: &ParserI<'_, P>, ) -> Result { use self::Primitive::*; @@ -2137,7 +2137,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// A type that traverses a fully parsed Ast and checks whether its depth /// exceeds the specified nesting limit. If it does, then an error is returned. #[derive(Debug)] -struct NestLimiter<'p, 's: 'p, P: 'p + 's> { +struct NestLimiter<'p, 's, P> { /// The parser that is checking the nest limit. p: &'p ParserI<'s, P>, /// The current depth while walking an Ast. @@ -2312,7 +2312,7 @@ mod tests { use std::ops::Range; use super::{Parser, ParserBuilder, ParserI, Primitive}; - use ast::{self, Ast, Position, Span}; + use crate::ast::{self, Ast, Position, Span}; // Our own assert_eq, which has slightly better formatting (but honestly // still kind of crappy). @@ -2357,21 +2357,24 @@ mod tests { str.to_string() } - fn parser(pattern: &str) -> ParserI { + fn parser(pattern: &str) -> ParserI<'_, Parser> { ParserI::new(Parser::new(), pattern) } - fn parser_octal(pattern: &str) -> ParserI { + fn parser_octal(pattern: &str) -> ParserI<'_, Parser> { let parser = ParserBuilder::new().octal(true).build(); ParserI::new(parser, pattern) } - fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI { + fn parser_nest_limit( + pattern: &str, + nest_limit: u32, + ) -> ParserI<'_, Parser> { let p = ParserBuilder::new().nest_limit(nest_limit).build(); ParserI::new(p, pattern) } - fn parser_ignore_whitespace(pattern: &str) -> ParserI { + fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> { let p = ParserBuilder::new().ignore_whitespace(true).build(); ParserI::new(p, pattern) } diff --git a/regex-syntax/src/ast/print.rs b/regex-syntax/src/ast/print.rs index 1b9bc41306..283ce4c579 100644 --- a/regex-syntax/src/ast/print.rs +++ b/regex-syntax/src/ast/print.rs @@ -4,8 +4,8 @@ This module provides a regular expression printer for `Ast`. use std::fmt; -use ast::visitor::{self, Visitor}; -use ast::{self, Ast}; +use crate::ast::visitor::{self, Visitor}; +use crate::ast::{self, Ast}; /// A builder for constructing a printer. /// @@ -86,7 +86,7 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { } fn visit_post(&mut self, ast: &Ast) -> fmt::Result { - use ast::Class; + use crate::ast::Class; match *ast { Ast::Empty(_) => Ok(()), @@ -126,7 +126,7 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { &mut self, ast: &ast::ClassSetItem, ) -> Result<(), Self::Err> { - use ast::ClassSetItem::*; + use crate::ast::ClassSetItem::*; match *ast { Empty(_) => Ok(()), @@ -155,7 +155,7 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { impl<'p, W: fmt::Write> Writer<'p, W> { fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result { - use ast::GroupKind::*; + use crate::ast::GroupKind::*; match ast.kind { CaptureIndex(_) => self.wtr.write_str("("), CaptureName(ref x) => { @@ -178,7 +178,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { } fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result { - use ast::RepetitionKind::*; + use crate::ast::RepetitionKind::*; match ast.op.kind { ZeroOrOne if ast.greedy => self.wtr.write_str("?"), ZeroOrOne => self.wtr.write_str("??"), @@ -200,7 +200,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { &mut self, ast: &ast::RepetitionRange, ) -> fmt::Result { - use ast::RepetitionRange::*; + use crate::ast::RepetitionRange::*; match *ast { Exactly(x) => write!(self.wtr, "{{{}}}", x), AtLeast(x) => write!(self.wtr, "{{{},}}", x), @@ -209,7 +209,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { } fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result { - use ast::LiteralKind::*; + use crate::ast::LiteralKind::*; match ast.kind { Verbatim => self.wtr.write_char(ast.c), @@ -256,7 +256,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { } fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result { - use ast::AssertionKind::*; + use crate::ast::AssertionKind::*; match ast.kind { StartLine => self.wtr.write_str("^"), EndLine => self.wtr.write_str("$"), @@ -275,7 +275,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { } fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result { - use ast::{Flag, FlagsItemKind}; + use crate::ast::{Flag, FlagsItemKind}; for item in &ast.items { match item.kind { @@ -315,7 +315,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { &mut self, ast: &ast::ClassSetBinaryOpKind, ) -> fmt::Result { - use ast::ClassSetBinaryOpKind::*; + use crate::ast::ClassSetBinaryOpKind::*; match *ast { Intersection => self.wtr.write_str("&&"), Difference => self.wtr.write_str("--"), @@ -324,7 +324,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { } fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result { - use ast::ClassPerlKind::*; + use crate::ast::ClassPerlKind::*; match ast.kind { Digit if ast.negated => self.wtr.write_str(r"\D"), Digit => self.wtr.write_str(r"\d"), @@ -336,7 +336,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { } fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result { - use ast::ClassAsciiKind::*; + use crate::ast::ClassAsciiKind::*; match ast.kind { Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"), Alnum => self.wtr.write_str("[:alnum:]"), @@ -370,8 +370,8 @@ impl<'p, W: fmt::Write> Writer<'p, W> { } fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result { - use ast::ClassUnicodeKind::*; - use ast::ClassUnicodeOpKind::*; + use crate::ast::ClassUnicodeKind::*; + use crate::ast::ClassUnicodeOpKind::*; if ast.negated { self.wtr.write_str(r"\P")?; @@ -397,7 +397,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { #[cfg(test)] mod tests { use super::Printer; - use ast::parse::ParserBuilder; + use crate::ast::parse::ParserBuilder; fn roundtrip(given: &str) { roundtrip_with(|b| b, given); diff --git a/regex-syntax/src/ast/visitor.rs b/regex-syntax/src/ast/visitor.rs index 3eaa4b0a46..a0d1e7dd5d 100644 --- a/regex-syntax/src/ast/visitor.rs +++ b/regex-syntax/src/ast/visitor.rs @@ -1,6 +1,6 @@ use std::fmt; -use ast::{self, Ast}; +use crate::ast::{self, Ast}; /// A trait for visiting an abstract syntax tree (AST) in depth first order. /// @@ -478,7 +478,7 @@ impl<'a> ClassInduct<'a> { } impl<'a> fmt::Debug for ClassFrame<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let x = match *self { ClassFrame::Union { .. } => "Union", ClassFrame::Binary { .. } => "Binary", @@ -490,7 +490,7 @@ impl<'a> fmt::Debug for ClassFrame<'a> { } impl<'a> fmt::Debug for ClassInduct<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let x = match *self { ClassInduct::Item(it) => match *it { ast::ClassSetItem::Empty(_) => "Item(Empty)", diff --git a/regex-syntax/src/error.rs b/regex-syntax/src/error.rs index 93c2b0dd93..71cfa426a8 100644 --- a/regex-syntax/src/error.rs +++ b/regex-syntax/src/error.rs @@ -3,8 +3,8 @@ use std::error; use std::fmt; use std::result; -use ast; -use hir; +use crate::ast; +use crate::hir; /// A type alias for dealing with errors returned by this crate. pub type Result = result::Result; @@ -52,7 +52,7 @@ impl error::Error for Error { } impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Error::Parse(ref x) => x.fmt(f), Error::Translate(ref x) => x.fmt(f), @@ -67,7 +67,7 @@ impl fmt::Display for Error { /// readable format. Most of its complexity is from interspersing notational /// markers pointing out the position where an error occurred. #[derive(Debug)] -pub struct Formatter<'e, E: 'e> { +pub struct Formatter<'e, E> { /// The original regex pattern in which the error occurred. pattern: &'e str, /// The error kind. It must impl fmt::Display. @@ -102,7 +102,7 @@ impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> { } impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let spans = Spans::from_formatter(self); if self.pattern.contains('\n') { let divider = repeat_char('~', 79); @@ -286,7 +286,7 @@ fn repeat_char(c: char, count: usize) -> String { #[cfg(test)] mod tests { - use ast::parse::Parser; + use crate::ast::parse::Parser; fn assert_panic_message(pattern: &str, expected_msg: &str) -> () { let result = Parser::new().parse(pattern); diff --git a/regex-syntax/src/hir/interval.rs b/regex-syntax/src/hir/interval.rs index 51eed52595..cfaa2cb45e 100644 --- a/regex-syntax/src/hir/interval.rs +++ b/regex-syntax/src/hir/interval.rs @@ -4,7 +4,7 @@ use std::fmt::Debug; use std::slice; use std::u8; -use unicode; +use crate::unicode; // This module contains an *internal* implementation of interval sets. // @@ -60,7 +60,7 @@ impl IntervalSet { /// Return an iterator over all intervals in this set. /// /// The iterator yields intervals in ascending order. - pub fn iter(&self) -> IntervalSetIter { + pub fn iter(&self) -> IntervalSetIter<'_, I> { IntervalSetIter(self.ranges.iter()) } @@ -322,7 +322,7 @@ impl IntervalSet { /// An iterator over intervals. #[derive(Debug)] -pub struct IntervalSetIter<'a, I: 'a>(slice::Iter<'a, I>); +pub struct IntervalSetIter<'a, I>(slice::Iter<'a, I>); impl<'a, I> Iterator for IntervalSetIter<'a, I> { type Item = &'a I; diff --git a/regex-syntax/src/hir/literal/mod.rs b/regex-syntax/src/hir/literal/mod.rs index 3ba225c657..25ee88b065 100644 --- a/regex-syntax/src/hir/literal/mod.rs +++ b/regex-syntax/src/hir/literal/mod.rs @@ -8,7 +8,7 @@ use std::iter; use std::mem; use std::ops; -use hir::{self, Hir, HirKind}; +use crate::hir::{self, Hir, HirKind}; /// A set of literal byte strings extracted from a regular expression. /// @@ -838,7 +838,7 @@ fn alternate_literals( } impl fmt::Debug for Literals { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Literals") .field("lits", &self.lits) .field("limit_size", &self.limit_size) @@ -882,7 +882,7 @@ impl PartialOrd for Literal { } impl fmt::Debug for Literal { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if self.is_cut() { write!(f, "Cut({})", escape_unicode(&self.v)) } else { @@ -977,8 +977,8 @@ mod tests { use std::fmt; use super::{escape_bytes, Literal, Literals}; - use hir::Hir; - use ParserBuilder; + use crate::hir::Hir; + use crate::ParserBuilder; // To make test failures easier to read. #[derive(Debug, Eq, PartialEq)] @@ -1017,7 +1017,7 @@ mod tests { } impl fmt::Debug for ULiteral { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if self.is_cut() { write!(f, "Cut({})", self.v) } else { diff --git a/regex-syntax/src/hir/mod.rs b/regex-syntax/src/hir/mod.rs index 53d90b8425..4969f1265a 100644 --- a/regex-syntax/src/hir/mod.rs +++ b/regex-syntax/src/hir/mod.rs @@ -8,12 +8,12 @@ use std::fmt; use std::result; use std::u8; -use ast::Span; -use hir::interval::{Interval, IntervalSet, IntervalSetIter}; -use unicode; +use crate::ast::Span; +use crate::hir::interval::{Interval, IntervalSet, IntervalSetIter}; +use crate::unicode; -pub use hir::visitor::{visit, Visitor}; -pub use unicode::CaseFoldError; +pub use crate::hir::visitor::{visit, Visitor}; +pub use crate::unicode::CaseFoldError; mod interval; pub mod literal; @@ -123,13 +123,13 @@ impl error::Error for Error { } impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - ::error::Formatter::from(self).fmt(f) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + crate::error::Formatter::from(self).fmt(f) } } impl fmt::Display for ErrorKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // TODO: Remove this on the next breaking semver release. #[allow(deprecated)] f.write_str(self.description()) @@ -727,8 +727,8 @@ impl HirKind { /// This implementation uses constant stack space and heap space proportional /// to the size of the `Hir`. impl fmt::Display for Hir { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use hir::print::Printer; + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use crate::hir::print::Printer; Printer::new().print(self, f) } } @@ -859,7 +859,7 @@ impl ClassUnicode { /// Return an iterator over all ranges in this class. /// /// The iterator yields ranges in ascending order. - pub fn iter(&self) -> ClassUnicodeIter { + pub fn iter(&self) -> ClassUnicodeIter<'_> { ClassUnicodeIter(self.set.iter()) } @@ -972,7 +972,7 @@ pub struct ClassUnicodeRange { } impl fmt::Debug for ClassUnicodeRange { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let start = if !self.start.is_whitespace() && !self.start.is_control() { self.start.to_string() @@ -1102,7 +1102,7 @@ impl ClassBytes { /// Return an iterator over all ranges in this class. /// /// The iterator yields ranges in ascending order. - pub fn iter(&self) -> ClassBytesIter { + pub fn iter(&self) -> ClassBytesIter<'_> { ClassBytesIter(self.set.iter()) } @@ -1258,7 +1258,7 @@ impl ClassBytesRange { } impl fmt::Debug for ClassBytesRange { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut debug = f.debug_struct("ClassBytesRange"); if self.start <= 0x7F { debug.field("start", &(self.start as char)); diff --git a/regex-syntax/src/hir/print.rs b/regex-syntax/src/hir/print.rs index eb44b9381f..ff18c6e92d 100644 --- a/regex-syntax/src/hir/print.rs +++ b/regex-syntax/src/hir/print.rs @@ -4,9 +4,9 @@ This module provides a regular expression printer for `Hir`. use std::fmt; -use hir::visitor::{self, Visitor}; -use hir::{self, Hir, HirKind}; -use is_meta_character; +use crate::hir::visitor::{self, Visitor}; +use crate::hir::{self, Hir, HirKind}; +use crate::is_meta_character; /// A builder for constructing a printer. /// @@ -239,7 +239,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { #[cfg(test)] mod tests { use super::Printer; - use ParserBuilder; + use crate::ParserBuilder; fn roundtrip(given: &str, expected: &str) { roundtrip_with(|b| b, given, expected); diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index 44d2813d62..99c9493022 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -5,9 +5,9 @@ Defines a translator that converts an `Ast` to an `Hir`. use std::cell::{Cell, RefCell}; use std::result; -use ast::{self, Ast, Span, Visitor}; -use hir::{self, Error, ErrorKind, Hir}; -use unicode::{self, ClassQuery}; +use crate::ast::{self, Ast, Span, Visitor}; +use crate::hir::{self, Error, ErrorKind, Hir}; +use crate::unicode::{self, ClassQuery}; type Result = result::Result; @@ -533,7 +533,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { &mut self, op: &ast::ClassSetBinaryOp, ) -> Result<()> { - use ast::ClassSetBinaryOpKind::*; + use crate::ast::ClassSetBinaryOpKind::*; if self.flags().unicode() { let mut rhs = self.pop().unwrap().unwrap_class_unicode(); @@ -819,7 +819,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { &self, ast_class: &ast::ClassUnicode, ) -> Result { - use ast::ClassUnicodeKind::*; + use crate::ast::ClassUnicodeKind::*; if !self.flags().unicode() { return Err( @@ -857,7 +857,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { &self, ast_class: &ast::ClassPerl, ) -> Result { - use ast::ClassPerlKind::*; + use crate::ast::ClassPerlKind::*; assert!(self.flags().unicode()); let result = match ast_class.kind { @@ -879,7 +879,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { &self, ast_class: &ast::ClassPerl, ) -> hir::ClassBytes { - use ast::ClassPerlKind::*; + use crate::ast::ClassPerlKind::*; assert!(!self.flags().unicode()); let mut class = match ast_class.kind { @@ -1077,7 +1077,7 @@ fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes { } fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] { - use ast::ClassAsciiKind::*; + use crate::ast::ClassAsciiKind::*; match *kind { Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')], Alpha => &[('A', 'Z'), ('a', 'z')], @@ -1105,10 +1105,10 @@ fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] { #[cfg(test)] mod tests { - use ast::parse::ParserBuilder; - use ast::{self, Ast, Position, Span}; - use hir::{self, Hir, HirKind}; - use unicode::{self, ClassQuery}; + use crate::ast::parse::ParserBuilder; + use crate::ast::{self, Ast, Position, Span}; + use crate::hir::{self, Hir, HirKind}; + use crate::unicode::{self, ClassQuery}; use super::{ascii_class, TranslatorBuilder}; @@ -1256,7 +1256,7 @@ mod tests { } #[allow(dead_code)] - fn hir_uclass_query(query: ClassQuery) -> Hir { + fn hir_uclass_query(query: ClassQuery<'_>) -> Hir { Hir::class(hir::Class::Unicode(unicode::class(query).unwrap())) } @@ -1315,7 +1315,7 @@ mod tests { #[allow(dead_code)] fn hir_union(expr1: Hir, expr2: Hir) -> Hir { - use hir::Class::{Bytes, Unicode}; + use crate::hir::Class::{Bytes, Unicode}; match (expr1.into_kind(), expr2.into_kind()) { (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { @@ -1332,7 +1332,7 @@ mod tests { #[allow(dead_code)] fn hir_difference(expr1: Hir, expr2: Hir) -> Hir { - use hir::Class::{Bytes, Unicode}; + use crate::hir::Class::{Bytes, Unicode}; match (expr1.into_kind(), expr2.into_kind()) { (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { diff --git a/regex-syntax/src/hir/visitor.rs b/regex-syntax/src/hir/visitor.rs index 81a9e9817c..4f5a70909c 100644 --- a/regex-syntax/src/hir/visitor.rs +++ b/regex-syntax/src/hir/visitor.rs @@ -1,4 +1,4 @@ -use hir::{self, Hir, HirKind}; +use crate::hir::{self, Hir, HirKind}; /// A trait for visiting the high-level IR (HIR) in depth first order. /// diff --git a/regex-syntax/src/lib.rs b/regex-syntax/src/lib.rs index 6be634a9db..9e9af756a8 100644 --- a/regex-syntax/src/lib.rs +++ b/regex-syntax/src/lib.rs @@ -158,9 +158,9 @@ The following features are available: #![warn(missing_debug_implementations)] #![forbid(unsafe_code)] -pub use error::{Error, Result}; -pub use parser::{Parser, ParserBuilder}; -pub use unicode::UnicodeWordError; +pub use crate::error::{Error, Result}; +pub use crate::parser::{Parser, ParserBuilder}; +pub use crate::unicode::UnicodeWordError; pub mod ast; mod either; diff --git a/regex-syntax/src/parser.rs b/regex-syntax/src/parser.rs index 00f1391642..eb363cae47 100644 --- a/regex-syntax/src/parser.rs +++ b/regex-syntax/src/parser.rs @@ -1,7 +1,7 @@ -use ast; -use hir; +use crate::ast; +use crate::hir; -use Result; +use crate::Result; /// A builder for a regular expression parser. /// diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs index 73df6987b5..24dd4f026b 100644 --- a/regex-syntax/src/unicode.rs +++ b/regex-syntax/src/unicode.rs @@ -2,7 +2,7 @@ use std::error; use std::fmt; use std::result; -use hir; +use crate::hir; /// A type alias for errors specific to Unicode handling of classes. pub type Result = result::Result; @@ -38,7 +38,7 @@ pub struct CaseFoldError(()); impl error::Error for CaseFoldError {} impl fmt::Display for CaseFoldError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "Unicode-aware case folding is not available \ @@ -58,7 +58,7 @@ pub struct UnicodeWordError(()); impl error::Error for UnicodeWordError {} impl fmt::Display for UnicodeWordError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "Unicode-aware \\w class is not available \ @@ -95,7 +95,7 @@ pub fn simple_fold( c: char, ) -> FoldResult, Option>> { - use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE; + use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE; Ok(CASE_FOLDING_SIMPLE .binary_search_by_key(&c, |&(c1, _)| c1) @@ -130,8 +130,8 @@ pub fn contains_simple_case_mapping( #[cfg(feature = "unicode-case")] fn imp(start: char, end: char) -> FoldResult { + use crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE; use std::cmp::Ordering; - use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE; assert!(start <= end); Ok(CASE_FOLDING_SIMPLE @@ -285,7 +285,7 @@ enum CanonicalClassQuery { /// Looks up a Unicode class given a query. If one doesn't exist, then /// `None` is returned. -pub fn class(query: ClassQuery) -> Result { +pub fn class(query: ClassQuery<'_>) -> Result { use self::CanonicalClassQuery::*; match query.canonicalize()? { @@ -330,7 +330,7 @@ pub fn perl_word() -> Result { #[cfg(feature = "unicode-perl")] fn imp() -> Result { - use unicode_tables::perl_word::PERL_WORD; + use crate::unicode_tables::perl_word::PERL_WORD; Ok(hir_class(PERL_WORD)) } @@ -354,7 +354,7 @@ pub fn perl_space() -> Result { #[cfg(feature = "unicode-bool")] fn imp() -> Result { - use unicode_tables::property_bool::WHITE_SPACE; + use crate::unicode_tables::property_bool::WHITE_SPACE; Ok(hir_class(WHITE_SPACE)) } @@ -378,7 +378,7 @@ pub fn perl_digit() -> Result { #[cfg(feature = "unicode-gencat")] fn imp() -> Result { - use unicode_tables::general_category::DECIMAL_NUMBER; + use crate::unicode_tables::general_category::DECIMAL_NUMBER; Ok(hir_class(DECIMAL_NUMBER)) } @@ -405,9 +405,9 @@ pub fn is_word_character(c: char) -> result::Result { #[cfg(feature = "unicode-perl")] fn imp(c: char) -> result::Result { - use is_word_byte; + use crate::is_word_byte; + use crate::unicode_tables::perl_word::PERL_WORD; use std::cmp::Ordering; - use unicode_tables::perl_word::PERL_WORD; if c <= 0x7F as char && is_word_byte(c as u8) { return Ok(true); @@ -482,7 +482,7 @@ fn canonical_prop(normalized_name: &str) -> Result> { feature = "unicode-segment", ))] fn imp(name: &str) -> Result> { - use unicode_tables::property_names::PROPERTY_NAMES; + use crate::unicode_tables::property_names::PROPERTY_NAMES; Ok(PROPERTY_NAMES .binary_search_by_key(&name, |&(n, _)| n) @@ -539,7 +539,7 @@ fn property_values( feature = "unicode-segment", ))] fn imp(name: &'static str) -> Result> { - use unicode_tables::property_values::PROPERTY_VALUES; + use crate::unicode_tables::property_values::PROPERTY_VALUES; Ok(PROPERTY_VALUES .binary_search_by_key(&name, |&(n, _)| n) @@ -578,7 +578,7 @@ fn ages(canonical_age: &str) -> Result> { #[cfg(feature = "unicode-age")] fn imp(canonical_age: &str) -> Result> { - use unicode_tables::age; + use crate::unicode_tables::age; const AGES: &'static [(&'static str, Range)] = &[ ("V1_1", age::V1_1), @@ -631,7 +631,7 @@ fn gencat(canonical_name: &'static str) -> Result { #[cfg(feature = "unicode-gencat")] fn imp(name: &'static str) -> Result { - use unicode_tables::general_category::BY_NAME; + use crate::unicode_tables::general_category::BY_NAME; match name { "ASCII" => Ok(hir_class(&[('\0', '\x7F')])), "Any" => Ok(hir_class(&[('\0', '\u{10FFFF}')])), @@ -666,7 +666,7 @@ fn script(canonical_name: &'static str) -> Result { #[cfg(feature = "unicode-script")] fn imp(name: &'static str) -> Result { - use unicode_tables::script::BY_NAME; + use crate::unicode_tables::script::BY_NAME; property_set(BY_NAME, name) .map(hir_class) .ok_or(Error::PropertyValueNotFound) @@ -691,7 +691,7 @@ fn script_extension( #[cfg(feature = "unicode-script")] fn imp(name: &'static str) -> Result { - use unicode_tables::script_extension::BY_NAME; + use crate::unicode_tables::script_extension::BY_NAME; property_set(BY_NAME, name) .map(hir_class) .ok_or(Error::PropertyValueNotFound) @@ -715,7 +715,7 @@ fn bool_property(canonical_name: &'static str) -> Result { #[cfg(feature = "unicode-bool")] fn imp(name: &'static str) -> Result { - use unicode_tables::property_bool::BY_NAME; + use crate::unicode_tables::property_bool::BY_NAME; property_set(BY_NAME, name) .map(hir_class) .ok_or(Error::PropertyNotFound) @@ -743,7 +743,7 @@ fn gcb(canonical_name: &'static str) -> Result { #[cfg(feature = "unicode-segment")] fn imp(name: &'static str) -> Result { - use unicode_tables::grapheme_cluster_break::BY_NAME; + use crate::unicode_tables::grapheme_cluster_break::BY_NAME; property_set(BY_NAME, name) .map(hir_class) .ok_or(Error::PropertyValueNotFound) @@ -767,7 +767,7 @@ fn wb(canonical_name: &'static str) -> Result { #[cfg(feature = "unicode-segment")] fn imp(name: &'static str) -> Result { - use unicode_tables::word_break::BY_NAME; + use crate::unicode_tables::word_break::BY_NAME; property_set(BY_NAME, name) .map(hir_class) .ok_or(Error::PropertyValueNotFound) @@ -791,7 +791,7 @@ fn sb(canonical_name: &'static str) -> Result { #[cfg(feature = "unicode-segment")] fn imp(name: &'static str) -> Result { - use unicode_tables::sentence_break::BY_NAME; + use crate::unicode_tables::sentence_break::BY_NAME; property_set(BY_NAME, name) .map(hir_class) .ok_or(Error::PropertyValueNotFound) diff --git a/regex-syntax/src/utf8.rs b/regex-syntax/src/utf8.rs index 947ba7c5fe..dc055033e5 100644 --- a/regex-syntax/src/utf8.rs +++ b/regex-syntax/src/utf8.rs @@ -203,7 +203,7 @@ impl<'a> IntoIterator for &'a Utf8Sequence { } impl fmt::Debug for Utf8Sequence { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use self::Utf8Sequence::*; match *self { One(ref r) => write!(f, "{:?}", r), @@ -237,7 +237,7 @@ impl Utf8Range { } impl fmt::Debug for Utf8Range { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if self.start == self.end { write!(f, "[{:X}]", self.start) } else { @@ -331,7 +331,7 @@ struct ScalarRange { } impl fmt::Debug for ScalarRange { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "ScalarRange({:X}, {:X})", self.start, self.end) } } @@ -457,7 +457,7 @@ fn max_scalar_value(nbytes: usize) -> u32 { mod tests { use std::char; - use utf8::{Utf8Range, Utf8Sequences}; + use crate::utf8::{Utf8Range, Utf8Sequences}; fn rutf8(s: u8, e: u8) -> Utf8Range { Utf8Range::new(s, e) @@ -504,7 +504,7 @@ mod tests { #[test] fn bmp() { - use utf8::Utf8Sequence::*; + use crate::utf8::Utf8Sequence::*; let seqs = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect::>(); assert_eq!( @@ -538,7 +538,7 @@ mod tests { #[test] fn reverse() { - use utf8::Utf8Sequence::*; + use crate::utf8::Utf8Sequence::*; let mut s = One(rutf8(0xA, 0xB)); s.reverse(); diff --git a/src/backtrack.rs b/src/backtrack.rs index 6100c1730d..a3d25d6622 100644 --- a/src/backtrack.rs +++ b/src/backtrack.rs @@ -16,10 +16,10 @@ // the bitset has to be zeroed on each execution, which becomes quite expensive // on large bitsets. -use exec::ProgramCache; -use input::{Input, InputAt}; -use prog::{InstPtr, Program}; -use re_trait::Slot; +use crate::exec::ProgramCache; +use crate::input::{Input, InputAt}; +use crate::prog::{InstPtr, Program}; +use crate::re_trait::Slot; type Bits = u32; @@ -196,7 +196,7 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> { } fn step(&mut self, mut ip: InstPtr, mut at: InputAt) -> bool { - use prog::Inst::*; + use crate::prog::Inst::*; loop { // This loop is an optimization to avoid constantly pushing/popping // from the stack. Namely, if we're pushing a job only to run it diff --git a/src/compile.rs b/src/compile.rs index 9bbd464e0f..8904f15168 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -4,16 +4,16 @@ use std::iter; use std::result; use std::sync::Arc; -use syntax::hir::{self, Hir}; -use syntax::is_word_byte; -use syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences}; +use regex_syntax::hir::{self, Hir}; +use regex_syntax::is_word_byte; +use regex_syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences}; -use prog::{ +use crate::prog::{ EmptyLook, Inst, InstBytes, InstChar, InstEmptyLook, InstPtr, InstRanges, InstSave, InstSplit, Program, }; -use Error; +use crate::Error; type Result = result::Result; type ResultOrEmpty = result::Result, Error>; @@ -255,8 +255,8 @@ impl Compiler { /// Ok(None) is returned when an expression is compiled to no /// instruction, and so no patch.entry value makes sense. fn c(&mut self, expr: &Hir) -> ResultOrEmpty { - use prog; - use syntax::hir::HirKind::*; + use crate::prog; + use regex_syntax::hir::HirKind::*; self.check_size()?; match *expr.kind() { @@ -554,7 +554,7 @@ impl Compiler { } fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty { - use syntax::hir::RepetitionKind::*; + use regex_syntax::hir::RepetitionKind::*; match rep.kind { ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy), ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy), diff --git a/src/dfa.rs b/src/dfa.rs index 9ac0c2c393..4b60f4d19b 100644 --- a/src/dfa.rs +++ b/src/dfa.rs @@ -42,9 +42,9 @@ use std::iter::repeat; use std::mem; use std::sync::Arc; -use exec::ProgramCache; -use prog::{Inst, Program}; -use sparse::SparseSet; +use crate::exec::ProgramCache; +use crate::prog::{Inst, Program}; +use crate::sparse::SparseSet; /// Return true if and only if the given program can be executed by a DFA. /// @@ -55,7 +55,7 @@ use sparse::SparseSet; /// This function will also return false if the given program has any Unicode /// instructions (Char or Ranges) since the DFA operates on bytes only. pub fn can_exec(insts: &Program) -> bool { - use prog::Inst::*; + use crate::prog::Inst::*; // If for some reason we manage to allocate a regex program with more // than i32::MAX instructions, then we can't execute the DFA because we // use 32 bit instruction pointer deltas for memory savings. @@ -306,7 +306,7 @@ impl State { StateFlags(self.data[0]) } - fn inst_ptrs(&self) -> InstPtrs { + fn inst_ptrs(&self) -> InstPtrs<'_> { InstPtrs { base: 0, data: &self.data[1..] } } } @@ -894,7 +894,7 @@ impl<'a> Fsm<'a> { mut si: StatePtr, b: Byte, ) -> Option { - use prog::Inst::*; + use crate::prog::Inst::*; // Initialize a queue with the current DFA state's NFA states. qcur.clear(); @@ -1056,8 +1056,8 @@ impl<'a> Fsm<'a> { q: &mut SparseSet, flags: EmptyFlags, ) { - use prog::EmptyLook::*; - use prog::Inst::*; + use crate::prog::EmptyLook::*; + use crate::prog::Inst::*; // We need to traverse the NFA to follow epsilon transitions, so avoid // recursion with an explicit stack. @@ -1190,7 +1190,7 @@ impl<'a> Fsm<'a> { q: &SparseSet, state_flags: &mut StateFlags, ) -> Option { - use prog::Inst::*; + use crate::prog::Inst::*; // We need to build up enough information to recognize pre-built states // in the DFA. Generally speaking, this includes every instruction @@ -1754,7 +1754,7 @@ impl Byte { } impl fmt::Debug for State { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let ips: Vec = self.inst_ptrs().collect(); f.debug_struct("State") .field("flags", &self.flags()) @@ -1764,7 +1764,7 @@ impl fmt::Debug for State { } impl fmt::Debug for Transitions { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut fmtd = f.debug_map(); for si in 0..self.num_states() { let s = si * self.num_byte_classes; @@ -1778,7 +1778,7 @@ impl fmt::Debug for Transitions { struct TransitionsRow<'a>(&'a [StatePtr]); impl<'a> fmt::Debug for TransitionsRow<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut fmtd = f.debug_map(); for (b, si) in self.0.iter().enumerate() { match *si { @@ -1796,7 +1796,7 @@ impl<'a> fmt::Debug for TransitionsRow<'a> { } impl fmt::Debug for StateFlags { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("StateFlags") .field("is_match", &self.is_match()) .field("is_word", &self.is_word()) @@ -1889,7 +1889,6 @@ fn read_varu32(data: &[u8]) -> (u32, usize) { #[cfg(test)] mod tests { - extern crate rand; use super::{ push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32, diff --git a/src/error.rs b/src/error.rs index 1c32c85b99..3e0ec75210 100644 --- a/src/error.rs +++ b/src/error.rs @@ -31,7 +31,7 @@ impl ::std::error::Error for Error { } impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Error::Syntax(ref err) => err.fmt(f), Error::CompiledTooBig(limit) => write!( @@ -49,7 +49,7 @@ impl fmt::Display for Error { // but the `Syntax` variant is already storing a `String` anyway, so we might // as well format it nicely. impl fmt::Debug for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Error::Syntax(ref err) => { let hr: String = repeat('~').take(79).collect(); diff --git a/src/exec.rs b/src/exec.rs index 3d5a52bead..3cec8bbe98 100644 --- a/src/exec.rs +++ b/src/exec.rs @@ -5,26 +5,26 @@ use std::sync::Arc; #[cfg(feature = "perf-literal")] use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind}; -use syntax::hir::literal::Literals; -use syntax::hir::Hir; -use syntax::ParserBuilder; +use regex_syntax::hir::literal::Literals; +use regex_syntax::hir::Hir; +use regex_syntax::ParserBuilder; -use backtrack; -use compile::Compiler; +use crate::backtrack; +use crate::compile::Compiler; #[cfg(feature = "perf-dfa")] -use dfa; -use error::Error; -use input::{ByteInput, CharInput}; -use literal::LiteralSearcher; -use pikevm; -use pool::{Pool, PoolGuard}; -use prog::Program; -use re_builder::RegexOptions; -use re_bytes; -use re_set; -use re_trait::{Locations, RegularExpression, Slot}; -use re_unicode; -use utf8::next_utf8; +use crate::dfa; +use crate::error::Error; +use crate::input::{ByteInput, CharInput}; +use crate::literal::LiteralSearcher; +use crate::pikevm; +use crate::pool::{Pool, PoolGuard}; +use crate::prog::Program; +use crate::re_builder::RegexOptions; +use crate::re_bytes; +use crate::re_set; +use crate::re_trait::{Locations, RegularExpression, Slot}; +use crate::re_unicode; +use crate::utf8::next_utf8; /// `Exec` manages the execution of a regular expression. /// @@ -739,7 +739,7 @@ impl<'c> ExecNoSync<'c> { text: &[u8], start: usize, ) -> dfa::Result<(usize, usize)> { - use dfa::Result::*; + use crate::dfa::Result::*; let end = match dfa::Fsm::forward( &self.ro.dfa, self.cache.value(), @@ -779,7 +779,7 @@ impl<'c> ExecNoSync<'c> { text: &[u8], start: usize, ) -> dfa::Result<(usize, usize)> { - use dfa::Result::*; + use crate::dfa::Result::*; match dfa::Fsm::reverse( &self.ro.dfa_reverse, self.cache.value(), @@ -835,7 +835,7 @@ impl<'c> ExecNoSync<'c> { text: &[u8], original_start: usize, ) -> Option> { - use dfa::Result::*; + use crate::dfa::Result::*; let lcs = self.ro.suffixes.lcs(); debug_assert!(lcs.len() >= 1); @@ -880,7 +880,7 @@ impl<'c> ExecNoSync<'c> { text: &[u8], start: usize, ) -> dfa::Result<(usize, usize)> { - use dfa::Result::*; + use crate::dfa::Result::*; let match_start = match self.exec_dfa_reverse_suffix(text, start) { None => return self.find_dfa_forward(text, start), @@ -1263,7 +1263,7 @@ impl<'c> ExecNoSyncStr<'c> { impl Exec { /// Get a searcher that isn't Sync. #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn searcher(&self) -> ExecNoSync { + pub fn searcher(&self) -> ExecNoSync<'_> { ExecNoSync { ro: &self.ro, // a clone is too expensive here! (and not needed) cache: self.pool.get(), @@ -1272,7 +1272,7 @@ impl Exec { /// Get a searcher that isn't Sync and can match on &str. #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn searcher_str(&self) -> ExecNoSyncStr { + pub fn searcher_str(&self) -> ExecNoSyncStr<'_> { ExecNoSyncStr(self.searcher()) } @@ -1550,7 +1550,7 @@ impl ProgramCacheInner { /// literals, and if so, returns them. Otherwise, this returns None. #[cfg(feature = "perf-literal")] fn alternation_literals(expr: &Hir) -> Option>> { - use syntax::hir::{HirKind, Literal}; + use regex_syntax::hir::{HirKind, Literal}; // This is pretty hacky, but basically, if `is_alternation_literal` is // true, then we can make several assumptions about the structure of our @@ -1602,7 +1602,7 @@ fn alternation_literals(expr: &Hir) -> Option>> { mod test { #[test] fn uppercut_s_backtracking_bytes_default_bytes_mismatch() { - use internal::ExecBuilder; + use crate::internal::ExecBuilder; let backtrack_bytes_re = ExecBuilder::new("^S") .bounded_backtracking() @@ -1630,7 +1630,7 @@ mod test { #[test] fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() { - use internal::ExecBuilder; + use crate::internal::ExecBuilder; let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)") .bounded_backtracking() diff --git a/src/expand.rs b/src/expand.rs index 70dbf91f42..fd9c2d05d4 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -1,12 +1,12 @@ use std::str; -use find_byte::find_byte; +use crate::find_byte::find_byte; -use re_bytes; -use re_unicode; +use crate::re_bytes; +use crate::re_unicode; pub fn expand_str( - caps: &re_unicode::Captures, + caps: &re_unicode::Captures<'_>, mut replacement: &str, dst: &mut String, ) { @@ -48,7 +48,7 @@ pub fn expand_str( } pub fn expand_bytes( - caps: &re_bytes::Captures, + caps: &re_bytes::Captures<'_>, mut replacement: &[u8], dst: &mut Vec, ) { @@ -125,7 +125,7 @@ impl From for Ref<'static> { /// starting at the beginning of `replacement`. /// /// If no such valid reference could be found, None is returned. -fn find_cap_ref(replacement: &[u8]) -> Option { +fn find_cap_ref(replacement: &[u8]) -> Option> { let mut i = 0; let rep: &[u8] = replacement.as_ref(); if rep.len() <= 1 || rep[0] != b'$' { @@ -157,7 +157,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option { }) } -fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option { +fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option> { let start = i; while rep.get(i).map_or(false, |&b| b != b'}') { i += 1; diff --git a/src/input.rs b/src/input.rs index 3afa2d0f6c..5d50ee3409 100644 --- a/src/input.rs +++ b/src/input.rs @@ -4,11 +4,9 @@ use std::fmt; use std::ops; use std::u32; -use syntax; - -use literal::LiteralSearcher; -use prog::InstEmptyLook; -use utf8::{decode_last_utf8, decode_utf8}; +use crate::literal::LiteralSearcher; +use crate::prog::InstEmptyLook; +use crate::utf8::{decode_last_utf8, decode_utf8}; /// Represents a location in the input. #[derive(Clone, Copy, Debug)] @@ -175,7 +173,7 @@ impl<'t> Input for CharInput<'t> { } fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - use prog::EmptyLook::*; + use crate::prog::EmptyLook::*; match empty.look { StartLine => { let c = self.previous_char(at); @@ -268,7 +266,7 @@ impl<'t> Input for ByteInput<'t> { } fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - use prog::EmptyLook::*; + use crate::prog::EmptyLook::*; match empty.look { StartLine => { let c = self.previous_char(at); @@ -348,7 +346,7 @@ impl<'t> Input for ByteInput<'t> { pub struct Char(u32); impl fmt::Debug for Char { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match char::from_u32(self.0) { None => write!(f, "Empty"), Some(c) => write!(f, "{:?}", c), @@ -379,7 +377,7 @@ impl Char { // available. However, our compiler ensures that if a Unicode word // boundary is used, then the data must also be available. If it isn't, // then the compiler returns an error. - char::from_u32(self.0).map_or(false, syntax::is_word_character) + char::from_u32(self.0).map_or(false, regex_syntax::is_word_character) } /// Returns true iff the byte is a word byte. @@ -387,7 +385,7 @@ impl Char { /// If the byte is absent, then false is returned. pub fn is_word_byte(self) -> bool { match char::from_u32(self.0) { - Some(c) if c <= '\u{7F}' => syntax::is_word_byte(c as u8), + Some(c) if c <= '\u{7F}' => regex_syntax::is_word_byte(c as u8), None | Some(_) => false, } } diff --git a/src/lib.rs b/src/lib.rs index 357ac0dd02..7f2dec815d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,12 +22,6 @@ used by adding `regex` to your dependencies in your project's `Cargo.toml`. regex = "1" ``` -If you're using Rust 2015, then you'll also need to add it to your crate root: - -```rust -extern crate regex; -``` - # Example: find a date General use of regular expressions in this package involves compiling an @@ -68,9 +62,7 @@ regular expressions are compiled exactly once. For example: ```rust -#[macro_use] extern crate lazy_static; -extern crate regex; - +use lazy_static::lazy_static; use regex::Regex; fn some_helper_function(text: &str) -> bool { @@ -94,7 +86,7 @@ matches. For example, to find all dates in a string and be able to access them by their component pieces: ```rust -# extern crate regex; use regex::Regex; +# use regex::Regex; # fn main() { let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap(); let text = "2012-03-14, 2013-01-01 and 2014-07-05"; @@ -119,7 +111,7 @@ clearer, we can *name* our capture groups and use those names as variables in our replacement text: ```rust -# extern crate regex; use regex::Regex; +# use regex::Regex; # fn main() { let re = Regex::new(r"(?P\d{4})-(?P\d{2})-(?P\d{2})").unwrap(); let before = "2012-03-14, 2013-01-01 and 2014-07-05"; @@ -136,7 +128,7 @@ Note that if your regex gets complicated, you can use the `x` flag to enable insignificant whitespace mode, which also lets you write comments: ```rust -# extern crate regex; use regex::Regex; +# use regex::Regex; # fn main() { let re = Regex::new(r"(?x) (?P\d{4}) # the year @@ -217,7 +209,7 @@ Unicode scalar values. This means you can use Unicode characters directly in your expression: ```rust -# extern crate regex; use regex::Regex; +# use regex::Regex; # fn main() { let re = Regex::new(r"(?i)Δ+").unwrap(); let mat = re.find("ΔδΔ").unwrap(); @@ -244,7 +236,7 @@ of boolean properties are available as character classes. For example, you can match a sequence of numerals, Greek or Cherokee letters: ```rust -# extern crate regex; use regex::Regex; +# use regex::Regex; # fn main() { let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap(); let mat = re.find("abcΔᎠβⅠᏴγδⅡxyz").unwrap(); @@ -391,7 +383,7 @@ Flags can be toggled within a pattern. Here's an example that matches case-insensitively for the first part but case-sensitively for the second part: ```rust -# extern crate regex; use regex::Regex; +# use regex::Regex; # fn main() { let re = Regex::new(r"(?i)a+(?-i)b+").unwrap(); let cap = re.captures("AaAaAbbBBBb").unwrap(); @@ -425,7 +417,7 @@ Here is an example that uses an ASCII word boundary instead of a Unicode word boundary: ```rust -# extern crate regex; use regex::Regex; +# use regex::Regex; # fn main() { let re = Regex::new(r"(?-u:\b).+(?-u:\b)").unwrap(); let cap = re.captures("$$abc$$").unwrap(); @@ -614,38 +606,30 @@ another matching engine with fixed memory requirements. */ #![deny(missing_docs)] -#![cfg_attr(test, deny(warnings))] #![cfg_attr(feature = "pattern", feature(pattern))] #![warn(missing_debug_implementations)] #[cfg(not(feature = "std"))] compile_error!("`std` feature is currently required to build this crate"); -#[cfg(feature = "perf-literal")] -extern crate aho_corasick; -// #[cfg(doctest)] -// extern crate doc_comment; -#[cfg(feature = "perf-literal")] -extern crate memchr; -#[cfg(test)] -#[cfg_attr(feature = "perf-literal", macro_use)] -extern crate quickcheck; -extern crate regex_syntax as syntax; - +// To check README's example +// TODO: Re-enable this once the MSRV is 1.43 or greater. +// See: https://github.com/rust-lang/regex/issues/684 +// See: https://github.com/rust-lang/regex/issues/685 // #[cfg(doctest)] // doc_comment::doctest!("../README.md"); #[cfg(feature = "std")] -pub use error::Error; +pub use crate::error::Error; #[cfg(feature = "std")] -pub use re_builder::set_unicode::*; +pub use crate::re_builder::set_unicode::*; #[cfg(feature = "std")] -pub use re_builder::unicode::*; +pub use crate::re_builder::unicode::*; #[cfg(feature = "std")] -pub use re_set::unicode::*; +pub use crate::re_set::unicode::*; #[cfg(feature = "std")] #[cfg(feature = "std")] -pub use re_unicode::{ +pub use crate::re_unicode::{ escape, CaptureLocations, CaptureMatches, CaptureNames, Captures, Locations, Match, Matches, NoExpand, Regex, Replacer, ReplacerRef, Split, SplitN, SubCaptureMatches, @@ -740,10 +724,10 @@ performance on `&str`. */ #[cfg(feature = "std")] pub mod bytes { - pub use re_builder::bytes::*; - pub use re_builder::set_bytes::*; - pub use re_bytes::*; - pub use re_set::bytes::*; + pub use crate::re_builder::bytes::*; + pub use crate::re_builder::set_bytes::*; + pub use crate::re_bytes::*; + pub use crate::re_set::bytes::*; } mod backtrack; @@ -754,8 +738,6 @@ mod error; mod exec; mod expand; mod find_byte; -#[cfg(feature = "perf-literal")] -mod freqs; mod input; mod literal; #[cfg(feature = "pattern")] @@ -777,9 +759,9 @@ mod utf8; #[doc(hidden)] #[cfg(feature = "std")] pub mod internal { - pub use compile::Compiler; - pub use exec::{Exec, ExecBuilder}; - pub use input::{Char, CharInput, Input, InputAt}; - pub use literal::LiteralSearcher; - pub use prog::{EmptyLook, Inst, InstRanges, Program}; + pub use crate::compile::Compiler; + pub use crate::exec::{Exec, ExecBuilder}; + pub use crate::input::{Char, CharInput, Input, InputAt}; + pub use crate::literal::LiteralSearcher; + pub use crate::prog::{EmptyLook, Inst, InstRanges, Program}; } diff --git a/src/literal/imp.rs b/src/literal/imp.rs index e4d04ed0a9..82f050a0da 100644 --- a/src/literal/imp.rs +++ b/src/literal/imp.rs @@ -1,11 +1,8 @@ -use std::cmp; use std::mem; use aho_corasick::{self, packed, AhoCorasick, AhoCorasickBuilder}; -use memchr::{memchr, memchr2, memchr3}; -use syntax::hir::literal::{Literal, Literals}; - -use freqs::BYTE_FREQUENCIES; +use memchr::{memchr, memchr2, memchr3, memmem}; +use regex_syntax::hir::literal::{Literal, Literals}; /// A prefix extracted from a compiled regular expression. /// @@ -15,8 +12,8 @@ use freqs::BYTE_FREQUENCIES; #[derive(Clone, Debug)] pub struct LiteralSearcher { complete: bool, - lcp: FreqyPacked, - lcs: FreqyPacked, + lcp: Memmem, + lcs: Memmem, matcher: Matcher, } @@ -26,10 +23,8 @@ enum Matcher { Empty, /// A set of four or more single byte literals. Bytes(SingleByteSet), - /// A single substring, find using memchr and frequency analysis. - FreqyPacked(FreqyPacked), - /// A single substring, find using Boyer-Moore. - BoyerMoore(BoyerMooreSearch), + /// A single substring, using vector accelerated routines when available. + Memmem(Memmem), /// An Aho-Corasick automaton. AC { ac: AhoCorasick, lits: Vec }, /// A packed multiple substring searcher, using SIMD. @@ -63,8 +58,8 @@ impl LiteralSearcher { let complete = lits.all_complete(); LiteralSearcher { complete: complete, - lcp: FreqyPacked::new(lits.longest_common_prefix().to_vec()), - lcs: FreqyPacked::new(lits.longest_common_suffix().to_vec()), + lcp: Memmem::new(lits.longest_common_prefix()), + lcs: Memmem::new(lits.longest_common_suffix()), matcher: matcher, } } @@ -86,8 +81,7 @@ impl LiteralSearcher { match self.matcher { Empty => Some((0, 0)), Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)), - FreqyPacked(ref s) => s.find(haystack).map(|i| (i, i + s.len())), - BoyerMoore(ref s) => s.find(haystack).map(|i| (i, i + s.len())), + Memmem(ref s) => s.find(haystack).map(|i| (i, i + s.len())), AC { ref ac, .. } => { ac.find(haystack).map(|m| (m.start(), m.end())) } @@ -124,24 +118,23 @@ impl LiteralSearcher { } /// Returns an iterator over all literals to be matched. - pub fn iter(&self) -> LiteralIter { + pub fn iter(&self) -> LiteralIter<'_> { match self.matcher { Matcher::Empty => LiteralIter::Empty, Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense), - Matcher::FreqyPacked(ref s) => LiteralIter::Single(&s.pat), - Matcher::BoyerMoore(ref s) => LiteralIter::Single(&s.pattern), + Matcher::Memmem(ref s) => LiteralIter::Single(&s.finder.needle()), Matcher::AC { ref lits, .. } => LiteralIter::AC(lits), Matcher::Packed { ref lits, .. } => LiteralIter::Packed(lits), } } /// Returns a matcher for the longest common prefix of this matcher. - pub fn lcp(&self) -> &FreqyPacked { + pub fn lcp(&self) -> &Memmem { &self.lcp } /// Returns a matcher for the longest common suffix of this matcher. - pub fn lcs(&self) -> &FreqyPacked { + pub fn lcs(&self) -> &Memmem { &self.lcs } @@ -156,8 +149,7 @@ impl LiteralSearcher { match self.matcher { Empty => 0, Bytes(ref sset) => sset.dense.len(), - FreqyPacked(_) => 1, - BoyerMoore(_) => 1, + Memmem(_) => 1, AC { ref ac, .. } => ac.pattern_count(), Packed { ref lits, .. } => lits.len(), } @@ -169,8 +161,7 @@ impl LiteralSearcher { match self.matcher { Empty => 0, Bytes(ref sset) => sset.approximate_size(), - FreqyPacked(ref single) => single.approximate_size(), - BoyerMoore(ref single) => single.approximate_size(), + Memmem(ref single) => single.approximate_size(), AC { ref ac, .. } => ac.heap_bytes(), Packed { ref s, .. } => s.heap_bytes(), } @@ -205,12 +196,7 @@ impl Matcher { return Matcher::Bytes(sset); } if lits.literals().len() == 1 { - let lit = lits.literals()[0].to_vec(); - if BoyerMooreSearch::should_use(lit.as_slice()) { - return Matcher::BoyerMoore(BoyerMooreSearch::new(lit)); - } else { - return Matcher::FreqyPacked(FreqyPacked::new(lit)); - } + return Matcher::Memmem(Memmem::new(&lits.literals()[0])); } let pats = lits.literals().to_owned(); @@ -367,116 +353,27 @@ impl SingleByteSet { } } -/// Provides an implementation of fast subtring search using frequency -/// analysis. +/// A simple wrapper around the memchr crate's memmem implementation. /// -/// memchr is so fast that we do everything we can to keep the loop in memchr -/// for as long as possible. The easiest way to do this is to intelligently -/// pick the byte to send to memchr. The best byte is the byte that occurs -/// least frequently in the haystack. Since doing frequency analysis on the -/// haystack is far too expensive, we compute a set of fixed frequencies up -/// front and hard code them in src/freqs.rs. Frequency analysis is done via -/// scripts/frequencies.py. +/// The API this exposes mirrors the API of previous substring searchers that +/// this supplanted. #[derive(Clone, Debug)] -pub struct FreqyPacked { - /// The pattern. - pat: Vec, - /// The number of Unicode characters in the pattern. This is useful for - /// determining the effective length of a pattern when deciding which - /// optimizations to perform. A trailing incomplete UTF-8 sequence counts - /// as one character. +pub struct Memmem { + finder: memmem::Finder<'static>, char_len: usize, - /// The rarest byte in the pattern, according to pre-computed frequency - /// analysis. - rare1: u8, - /// The offset of the rarest byte in `pat`. - rare1i: usize, - /// The second rarest byte in the pattern, according to pre-computed - /// frequency analysis. (This may be equivalent to the rarest byte.) - /// - /// The second rarest byte is used as a type of guard for quickly detecting - /// a mismatch after memchr locates an instance of the rarest byte. This - /// is a hedge against pathological cases where the pre-computed frequency - /// analysis may be off. (But of course, does not prevent *all* - /// pathological cases.) - rare2: u8, - /// The offset of the second rarest byte in `pat`. - rare2i: usize, } -impl FreqyPacked { - fn new(pat: Vec) -> FreqyPacked { - if pat.is_empty() { - return FreqyPacked::empty(); - } - - // Find the rarest two bytes. Try to make them distinct (but it's not - // required). - let mut rare1 = pat[0]; - let mut rare2 = pat[0]; - for b in pat[1..].iter().cloned() { - if freq_rank(b) < freq_rank(rare1) { - rare1 = b; - } - } - for &b in &pat { - if rare1 == rare2 { - rare2 = b - } else if b != rare1 && freq_rank(b) < freq_rank(rare2) { - rare2 = b; - } - } - - // And find the offsets of their last occurrences. - let rare1i = pat.iter().rposition(|&b| b == rare1).unwrap(); - let rare2i = pat.iter().rposition(|&b| b == rare2).unwrap(); - - let char_len = char_len_lossy(&pat); - FreqyPacked { - pat: pat, - char_len: char_len, - rare1: rare1, - rare1i: rare1i, - rare2: rare2, - rare2i: rare2i, - } - } - - fn empty() -> FreqyPacked { - FreqyPacked { - pat: vec![], - char_len: 0, - rare1: 0, - rare1i: 0, - rare2: 0, - rare2i: 0, +impl Memmem { + fn new(pat: &[u8]) -> Memmem { + Memmem { + finder: memmem::Finder::new(pat).into_owned(), + char_len: char_len_lossy(pat), } } #[cfg_attr(feature = "perf-inline", inline(always))] pub fn find(&self, haystack: &[u8]) -> Option { - let pat = &*self.pat; - if haystack.len() < pat.len() || pat.is_empty() { - return None; - } - let mut i = self.rare1i; - while i < haystack.len() { - i += match memchr(self.rare1, &haystack[i..]) { - None => return None, - Some(i) => i, - }; - let start = i - self.rare1i; - let end = start + pat.len(); - if end > haystack.len() { - return None; - } - let aligned = &haystack[start..end]; - if aligned[self.rare2i] == self.rare2 && aligned == &*self.pat { - return Some(start); - } - i += 1; - } - None + self.finder.find(haystack) } #[cfg_attr(feature = "perf-inline", inline(always))] @@ -484,11 +381,11 @@ impl FreqyPacked { if text.len() < self.len() { return false; } - text[text.len() - self.len()..] == *self.pat + &text[text.len() - self.len()..] == self.finder.needle() } pub fn len(&self) -> usize { - self.pat.len() + self.finder.needle().len() } pub fn char_len(&self) -> usize { @@ -496,627 +393,10 @@ impl FreqyPacked { } fn approximate_size(&self) -> usize { - self.pat.len() * mem::size_of::() + self.finder.needle().len() * mem::size_of::() } } fn char_len_lossy(bytes: &[u8]) -> usize { String::from_utf8_lossy(bytes).chars().count() } - -/// An implementation of Tuned Boyer-Moore as laid out by -/// Andrew Hume and Daniel Sunday in "Fast String Searching". -/// O(n) in the size of the input. -/// -/// Fast string searching algorithms come in many variations, -/// but they can generally be described in terms of three main -/// components. -/// -/// The skip loop is where the string searcher wants to spend -/// as much time as possible. Exactly which character in the -/// pattern the skip loop examines varies from algorithm to -/// algorithm, but in the simplest case this loop repeated -/// looks at the last character in the pattern and jumps -/// forward in the input if it is not in the pattern. -/// Robert Boyer and J Moore called this the "fast" loop in -/// their original paper. -/// -/// The match loop is responsible for actually examining the -/// whole potentially matching substring. In order to fail -/// faster, the match loop sometimes has a guard test attached. -/// The guard test uses frequency analysis of the different -/// characters in the pattern to choose the least frequency -/// occurring character and use it to find match failures -/// as quickly as possible. -/// -/// The shift rule governs how the algorithm will shuffle its -/// test window in the event of a failure during the match loop. -/// Certain shift rules allow the worst-case run time of the -/// algorithm to be shown to be O(n) in the size of the input -/// rather than O(nm) in the size of the input and the size -/// of the pattern (as naive Boyer-Moore is). -/// -/// "Fast String Searching", in addition to presenting a tuned -/// algorithm, provides a comprehensive taxonomy of the many -/// different flavors of string searchers. Under that taxonomy -/// TBM, the algorithm implemented here, uses an unrolled fast -/// skip loop with memchr fallback, a forward match loop with guard, -/// and the mini Sunday's delta shift rule. To unpack that you'll have to -/// read the paper. -#[derive(Clone, Debug)] -pub struct BoyerMooreSearch { - /// The pattern we are going to look for in the haystack. - pattern: Vec, - - /// The skip table for the skip loop. - /// - /// Maps the character at the end of the input - /// to a shift. - skip_table: Vec, - - /// The guard character (least frequently occurring char). - guard: u8, - /// The reverse-index of the guard character in the pattern. - guard_reverse_idx: usize, - - /// Daniel Sunday's mini generalized delta2 shift table. - /// - /// We use a skip loop, so we only have to provide a shift - /// for the skip char (last char). This is why it is a mini - /// shift rule. - md2_shift: usize, -} - -impl BoyerMooreSearch { - /// Create a new string searcher, performing whatever - /// compilation steps are required. - fn new(pattern: Vec) -> Self { - debug_assert!(!pattern.is_empty()); - - let (g, gi) = Self::select_guard(pattern.as_slice()); - let skip_table = Self::compile_skip_table(pattern.as_slice()); - let md2_shift = Self::compile_md2_shift(pattern.as_slice()); - BoyerMooreSearch { - pattern: pattern, - skip_table: skip_table, - guard: g, - guard_reverse_idx: gi, - md2_shift: md2_shift, - } - } - - /// Find the pattern in `haystack`, returning the offset - /// of the start of the first occurrence of the pattern - /// in `haystack`. - #[inline] - fn find(&self, haystack: &[u8]) -> Option { - if haystack.len() < self.pattern.len() { - return None; - } - - let mut window_end = self.pattern.len() - 1; - - // Inspired by the grep source. It is a way - // to do correct loop unrolling without having to place - // a crashpad of terminating charicters at the end in - // the way described in the Fast String Searching paper. - const NUM_UNROLL: usize = 10; - // 1 for the initial position, and 1 for the md2 shift - let short_circut = (NUM_UNROLL + 2) * self.pattern.len(); - - if haystack.len() > short_circut { - // just 1 for the md2 shift - let backstop = - haystack.len() - ((NUM_UNROLL + 1) * self.pattern.len()); - loop { - window_end = - match self.skip_loop(haystack, window_end, backstop) { - Some(i) => i, - None => return None, - }; - if window_end >= backstop { - break; - } - - if self.check_match(haystack, window_end) { - return Some(window_end - (self.pattern.len() - 1)); - } else { - let skip = self.skip_table[haystack[window_end] as usize]; - window_end += - if skip == 0 { self.md2_shift } else { skip }; - continue; - } - } - } - - // now process the input after the backstop - while window_end < haystack.len() { - let mut skip = self.skip_table[haystack[window_end] as usize]; - if skip == 0 { - if self.check_match(haystack, window_end) { - return Some(window_end - (self.pattern.len() - 1)); - } else { - skip = self.md2_shift; - } - } - window_end += skip; - } - - None - } - - fn len(&self) -> usize { - return self.pattern.len(); - } - - /// The key heuristic behind which the BoyerMooreSearch lives. - /// - /// See `rust-lang/regex/issues/408`. - /// - /// Tuned Boyer-Moore is actually pretty slow! It turns out a handrolled - /// platform-specific memchr routine with a bit of frequency - /// analysis sprinkled on top actually wins most of the time. - /// However, there are a few cases where Tuned Boyer-Moore still - /// wins. - /// - /// If the haystack is random, frequency analysis doesn't help us, - /// so Boyer-Moore will win for sufficiently large needles. - /// Unfortunately, there is no obvious way to determine this - /// ahead of time. - /// - /// If the pattern itself consists of very common characters, - /// frequency analysis won't get us anywhere. The most extreme - /// example of this is a pattern like `eeeeeeeeeeeeeeee`. Fortunately, - /// this case is wholly determined by the pattern, so we can actually - /// implement the heuristic. - /// - /// A third case is if the pattern is sufficiently long. The idea - /// here is that once the pattern gets long enough the Tuned - /// Boyer-Moore skip loop will start making strides long enough - /// to beat the asm deep magic that is memchr. - fn should_use(pattern: &[u8]) -> bool { - // The minimum pattern length required to use TBM. - const MIN_LEN: usize = 9; - // The minimum frequency rank (lower is rarer) that every byte in the - // pattern must have in order to use TBM. That is, if the pattern - // contains _any_ byte with a lower rank, then TBM won't be used. - const MIN_CUTOFF: usize = 150; - // The maximum frequency rank for any byte. - const MAX_CUTOFF: usize = 255; - // The scaling factor used to determine the actual cutoff frequency - // to use (keeping in mind that the minimum frequency rank is bounded - // by MIN_CUTOFF). This scaling factor is an attempt to make TBM more - // likely to be used as the pattern grows longer. That is, longer - // patterns permit somewhat less frequent bytes than shorter patterns, - // under the assumption that TBM gets better as the pattern gets - // longer. - const LEN_CUTOFF_PROPORTION: usize = 4; - - let scaled_rank = pattern.len().wrapping_mul(LEN_CUTOFF_PROPORTION); - let cutoff = cmp::max( - MIN_CUTOFF, - MAX_CUTOFF - cmp::min(MAX_CUTOFF, scaled_rank), - ); - // The pattern must be long enough to be worthwhile. e.g., memchr will - // be faster on `e` because it is short even though e is quite common. - pattern.len() > MIN_LEN - // all the bytes must be more common than the cutoff. - && pattern.iter().all(|c| freq_rank(*c) >= cutoff) - } - - /// Check to see if there is a match at the given position - #[inline] - fn check_match(&self, haystack: &[u8], window_end: usize) -> bool { - // guard test - if haystack[window_end - self.guard_reverse_idx] != self.guard { - return false; - } - - // match loop - let window_start = window_end - (self.pattern.len() - 1); - for i in 0..self.pattern.len() { - if self.pattern[i] != haystack[window_start + i] { - return false; - } - } - - true - } - - /// Skip forward according to the shift table. - /// - /// Returns the offset of the next occurrence - /// of the last char in the pattern, or the none - /// if it never reappears. If `skip_loop` hits the backstop - /// it will leave early. - #[inline] - fn skip_loop( - &self, - haystack: &[u8], - mut window_end: usize, - backstop: usize, - ) -> Option { - let window_end_snapshot = window_end; - let skip_of = |we: usize| -> usize { - // Unsafe might make this faster, but the benchmarks - // were hard to interpret. - self.skip_table[haystack[we] as usize] - }; - - loop { - let mut skip = skip_of(window_end); - window_end += skip; - skip = skip_of(window_end); - window_end += skip; - if skip != 0 { - skip = skip_of(window_end); - window_end += skip; - skip = skip_of(window_end); - window_end += skip; - skip = skip_of(window_end); - window_end += skip; - if skip != 0 { - skip = skip_of(window_end); - window_end += skip; - skip = skip_of(window_end); - window_end += skip; - skip = skip_of(window_end); - window_end += skip; - if skip != 0 { - skip = skip_of(window_end); - window_end += skip; - skip = skip_of(window_end); - window_end += skip; - - // If ten iterations did not make at least 16 words - // worth of progress, we just fall back on memchr. - if window_end - window_end_snapshot - > 16 * mem::size_of::() - { - // Returning a window_end >= backstop will - // immediatly break us out of the inner loop in - // `find`. - if window_end >= backstop { - return Some(window_end); - } - - continue; // we made enough progress - } else { - // In case we are already there, and so that - // we will catch the guard char. - window_end = window_end - .checked_sub(1 + self.guard_reverse_idx) - .unwrap_or(0); - - match memchr(self.guard, &haystack[window_end..]) { - None => return None, - Some(g_idx) => { - return Some( - window_end - + g_idx - + self.guard_reverse_idx, - ); - } - } - } - } - } - } - - return Some(window_end); - } - } - - /// Compute the ufast skip table. - fn compile_skip_table(pattern: &[u8]) -> Vec { - let mut tab = vec![pattern.len(); 256]; - - // For every char in the pattern, we write a skip - // that will line us up with the rightmost occurrence. - // - // N.B. the sentinel (0) is written by the last - // loop iteration. - for (i, c) in pattern.iter().enumerate() { - tab[*c as usize] = (pattern.len() - 1) - i; - } - - tab - } - - /// Select the guard character based off of the precomputed - /// frequency table. - fn select_guard(pattern: &[u8]) -> (u8, usize) { - let mut rarest = pattern[0]; - let mut rarest_rev_idx = pattern.len() - 1; - for (i, c) in pattern.iter().enumerate() { - if freq_rank(*c) < freq_rank(rarest) { - rarest = *c; - rarest_rev_idx = (pattern.len() - 1) - i; - } - } - - (rarest, rarest_rev_idx) - } - - /// If there is another occurrence of the skip - /// char, shift to it, otherwise just shift to - /// the next window. - fn compile_md2_shift(pattern: &[u8]) -> usize { - let shiftc = *pattern.last().unwrap(); - - // For a pattern of length 1 we will never apply the - // shift rule, so we use a poison value on the principle - // that failing fast is a good thing. - if pattern.len() == 1 { - return 0xDEADBEAF; - } - - let mut i = pattern.len() - 2; - while i > 0 { - if pattern[i] == shiftc { - return (pattern.len() - 1) - i; - } - i -= 1; - } - - // The skip char never re-occurs in the pattern, so - // we can just shift the whole window length. - pattern.len() - 1 - } - - fn approximate_size(&self) -> usize { - (self.pattern.len() * mem::size_of::()) - + (256 * mem::size_of::()) // skip table - } -} - -fn freq_rank(b: u8) -> usize { - BYTE_FREQUENCIES[b as usize] as usize -} - -#[cfg(test)] -mod tests { - use super::{BoyerMooreSearch, FreqyPacked}; - - // - // Unit Tests - // - - // The "hello, world" of string searching - #[test] - fn bm_find_subs() { - let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..])); - let haystack = b"I keep seeing patterns in this text"; - assert_eq!(14, searcher.find(haystack).unwrap()); - } - - #[test] - fn bm_find_no_subs() { - let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..])); - let haystack = b"I keep seeing needles in this text"; - assert_eq!(None, searcher.find(haystack)); - } - - // - // Regression Tests - // - - #[test] - fn bm_skip_reset_bug() { - let haystack = vec![0, 0, 0, 0, 0, 1, 1, 0]; - let needle = vec![0, 1, 1, 0]; - - let searcher = BoyerMooreSearch::new(needle); - let offset = searcher.find(haystack.as_slice()).unwrap(); - assert_eq!(4, offset); - } - - #[test] - fn bm_backstop_underflow_bug() { - let haystack = vec![0, 0]; - let needle = vec![0, 0]; - - let searcher = BoyerMooreSearch::new(needle); - let offset = searcher.find(haystack.as_slice()).unwrap(); - assert_eq!(0, offset); - } - - #[test] - fn bm_naive_off_by_one_bug() { - let haystack = vec![91]; - let needle = vec![91]; - - let naive_offset = naive_find(&needle, &haystack).unwrap(); - assert_eq!(0, naive_offset); - } - - #[test] - fn bm_memchr_fallback_indexing_bug() { - let mut haystack = vec![ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]; - let needle = vec![1, 1, 1, 1, 32, 32, 87]; - let needle_start = haystack.len(); - haystack.extend(needle.clone()); - - let searcher = BoyerMooreSearch::new(needle); - assert_eq!(needle_start, searcher.find(haystack.as_slice()).unwrap()); - } - - #[test] - fn bm_backstop_boundary() { - let haystack = b"\ -// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -e_data.clone_created(entity_id, entity_to_add.entity_id); -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -" - .to_vec(); - let needle = b"clone_created".to_vec(); - - let searcher = BoyerMooreSearch::new(needle); - let result = searcher.find(&haystack); - assert_eq!(Some(43), result); - } - - #[test] - fn bm_win_gnu_indexing_bug() { - let haystack_raw = vec![ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]; - let needle = vec![1, 1, 1, 1, 1, 1, 1]; - let haystack = haystack_raw.as_slice(); - - BoyerMooreSearch::new(needle.clone()).find(haystack); - } - - // - // QuickCheck Properties - // - - use quickcheck::TestResult; - - fn naive_find(needle: &[u8], haystack: &[u8]) -> Option { - assert!(needle.len() <= haystack.len()); - - for i in 0..(haystack.len() - (needle.len() - 1)) { - if haystack[i] == needle[0] - && &haystack[i..(i + needle.len())] == needle - { - return Some(i); - } - } - - None - } - - quickcheck! { - fn qc_bm_equals_nieve_find(pile1: Vec, pile2: Vec) -> TestResult { - if pile1.len() == 0 || pile2.len() == 0 { - return TestResult::discard(); - } - - let (needle, haystack) = if pile1.len() < pile2.len() { - (pile1, pile2.as_slice()) - } else { - (pile2, pile1.as_slice()) - }; - - let searcher = BoyerMooreSearch::new(needle.clone()); - TestResult::from_bool( - searcher.find(haystack) == naive_find(&needle, haystack)) - } - - fn qc_bm_equals_single(pile1: Vec, pile2: Vec) -> TestResult { - if pile1.len() == 0 || pile2.len() == 0 { - return TestResult::discard(); - } - - let (needle, haystack) = if pile1.len() < pile2.len() { - (pile1, pile2.as_slice()) - } else { - (pile2, pile1.as_slice()) - }; - - let bm_searcher = BoyerMooreSearch::new(needle.clone()); - let freqy_memchr = FreqyPacked::new(needle); - TestResult::from_bool( - bm_searcher.find(haystack) == freqy_memchr.find(haystack)) - } - - fn qc_bm_finds_trailing_needle( - haystack_pre: Vec, - needle: Vec - ) -> TestResult { - if needle.len() == 0 { - return TestResult::discard(); - } - - let mut haystack = haystack_pre.clone(); - let searcher = BoyerMooreSearch::new(needle.clone()); - - if haystack.len() >= needle.len() && - searcher.find(haystack.as_slice()).is_some() { - return TestResult::discard(); - } - - haystack.extend(needle.clone()); - - // What if the the tail of the haystack can start the - // needle? - let start = haystack_pre.len() - .checked_sub(needle.len()) - .unwrap_or(0); - for i in 0..(needle.len() - 1) { - if searcher.find(&haystack[(i + start)..]).is_some() { - return TestResult::discard(); - } - } - - TestResult::from_bool( - searcher.find(haystack.as_slice()) - .map(|x| x == haystack_pre.len()) - .unwrap_or(false)) - } - - // qc_equals_* is only testing the negative case as @burntsushi - // pointed out in https://github.com/rust-lang/regex/issues/446. - // This quickcheck prop represents an effort to force testing of - // the positive case. qc_bm_finds_first and qc_bm_finds_trailing_needle - // already check some of the positive cases, but they don't cover - // cases where the needle is in the middle of haystack. This prop - // fills that hole. - fn qc_bm_finds_subslice( - haystack: Vec, - needle_start: usize, - needle_length: usize - ) -> TestResult { - if haystack.len() == 0 { - return TestResult::discard(); - } - - let needle_start = needle_start % haystack.len(); - let needle_length = needle_length % (haystack.len() - needle_start); - - if needle_length == 0 { - return TestResult::discard(); - } - - let needle = &haystack[needle_start..(needle_start + needle_length)]; - - let bm_searcher = BoyerMooreSearch::new(needle.to_vec()); - - let start = naive_find(&needle, &haystack); - match start { - None => TestResult::from_bool(false), - Some(nf_start) => - TestResult::from_bool( - nf_start <= needle_start - && bm_searcher.find(&haystack) == start - ) - } - } - - fn qc_bm_finds_first(needle: Vec) -> TestResult { - if needle.len() == 0 { - return TestResult::discard(); - } - - let mut haystack = needle.clone(); - let searcher = BoyerMooreSearch::new(needle.clone()); - haystack.extend(needle); - - TestResult::from_bool( - searcher.find(haystack.as_slice()) - .map(|x| x == 0) - .unwrap_or(false)) - } - } -} diff --git a/src/pikevm.rs b/src/pikevm.rs index 299087da82..9a14240860 100644 --- a/src/pikevm.rs +++ b/src/pikevm.rs @@ -17,11 +17,11 @@ use std::mem; -use exec::ProgramCache; -use input::{Input, InputAt}; -use prog::{InstPtr, Program}; -use re_trait::Slot; -use sparse::SparseSet; +use crate::exec::ProgramCache; +use crate::input::{Input, InputAt}; +use crate::prog::{InstPtr, Program}; +use crate::re_trait::Slot; +use crate::sparse::SparseSet; /// An NFA simulation matching engine. #[derive(Debug)] @@ -231,7 +231,7 @@ impl<'r, I: Input> Fsm<'r, I> { at: InputAt, at_next: InputAt, ) -> bool { - use prog::Inst::*; + use crate::prog::Inst::*; match self.prog[ip] { Match(match_slot) => { if match_slot < matches.len() { @@ -300,7 +300,7 @@ impl<'r, I: Input> Fsm<'r, I> { // traverse the set of states. We only push to the stack when we // absolutely need recursion (restoring captures or following a // branch). - use prog::Inst::*; + use crate::prog::Inst::*; loop { // Don't visit states we've already added. if nlist.set.contains(ip) { diff --git a/src/pool.rs b/src/pool.rs index a506ee9fab..6a6f15b194 100644 --- a/src/pool.rs +++ b/src/pool.rs @@ -154,7 +154,7 @@ pub struct Pool { unsafe impl Sync for Pool {} impl ::std::fmt::Debug for Pool { - fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { f.debug_struct("Pool") .field("stack", &self.stack) .field("owner", &self.owner) @@ -168,7 +168,7 @@ impl ::std::fmt::Debug for Pool { /// The purpose of the guard is to use RAII to automatically put the value back /// in the pool once it's dropped. #[derive(Debug)] -pub struct PoolGuard<'a, T: 'a + Send> { +pub struct PoolGuard<'a, T: Send> { /// The pool that this guard is attached to. pool: &'a Pool, /// This is None when the guard represents the special "owned" value. In @@ -193,7 +193,7 @@ impl Pool { /// the value to go back into the pool) and then calling get again is NOT /// guaranteed to return the same value received in the first get call. #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn get(&self) -> PoolGuard { + pub fn get(&self) -> PoolGuard<'_, T> { // Our fast path checks if the caller is the thread that "owns" this // pool. Or stated differently, whether it is the first thread that // tried to extract a value from the pool. If it is, then we can return @@ -217,7 +217,7 @@ impl Pool { /// /// If the pool has no owner, then this will set the owner. #[cold] - fn get_slow(&self, caller: usize, owner: usize) -> PoolGuard { + fn get_slow(&self, caller: usize, owner: usize) -> PoolGuard<'_, T> { use std::sync::atomic::Ordering::Relaxed; if owner == 0 { @@ -284,7 +284,7 @@ mod tests { #[test] fn oibits() { - use exec::ProgramCache; + use crate::exec::ProgramCache; fn has_oibits() {} has_oibits::>(); diff --git a/src/prog.rs b/src/prog.rs index a2b89c99eb..475a8112aa 100644 --- a/src/prog.rs +++ b/src/prog.rs @@ -6,8 +6,8 @@ use std::ops::Deref; use std::slice; use std::sync::Arc; -use input::Char; -use literal::LiteralSearcher; +use crate::input::Char; +use crate::literal::LiteralSearcher; /// `InstPtr` represents the index of an instruction in a regex program. pub type InstPtr = usize; @@ -168,7 +168,7 @@ impl Deref for Program { } impl fmt::Debug for Program { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use self::Inst::*; fn with_goto(cur: usize, goto: usize, fmtd: String) -> String { diff --git a/src/re_builder.rs b/src/re_builder.rs index fc140f8bd0..ee6383690d 100644 --- a/src/re_builder.rs +++ b/src/re_builder.rs @@ -37,10 +37,10 @@ macro_rules! define_builder { ($name:ident, $regex_mod:ident, $only_utf8:expr) => { pub mod $name { use super::RegexOptions; - use error::Error; - use exec::ExecBuilder; + use crate::error::Error; + use crate::exec::ExecBuilder; - use $regex_mod::Regex; + use crate::$regex_mod::Regex; /// A configurable builder for a regular expression. /// @@ -235,10 +235,10 @@ macro_rules! define_set_builder { ($name:ident, $regex_mod:ident, $only_utf8:expr) => { pub mod $name { use super::RegexOptions; - use error::Error; - use exec::ExecBuilder; + use crate::error::Error; + use crate::exec::ExecBuilder; - use re_set::$regex_mod::RegexSet; + use crate::re_set::$regex_mod::RegexSet; /// A configurable builder for a set of regular expressions. /// diff --git a/src/re_bytes.rs b/src/re_bytes.rs index 204a70ad3e..ae55d6d256 100644 --- a/src/re_bytes.rs +++ b/src/re_bytes.rs @@ -6,13 +6,13 @@ use std::ops::{Index, Range}; use std::str::FromStr; use std::sync::Arc; -use find_byte::find_byte; +use crate::find_byte::find_byte; -use error::Error; -use exec::{Exec, ExecNoSync}; -use expand::expand_bytes; -use re_builder::bytes::RegexBuilder; -use re_trait::{self, RegularExpression, SubCapturesPosIter}; +use crate::error::Error; +use crate::exec::{Exec, ExecNoSync}; +use crate::expand::expand_bytes; +use crate::re_builder::bytes::RegexBuilder; +use crate::re_trait::{self, RegularExpression, SubCapturesPosIter}; /// Match represents a single match of a regex in a haystack. /// @@ -79,14 +79,14 @@ pub struct Regex(Exec); impl fmt::Display for Regex { /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.as_str()) } } impl fmt::Debug for Regex { /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self, f) } } @@ -133,7 +133,7 @@ impl Regex { /// bytes: /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// let text = b"I categorically deny having triskaidekaphobia."; /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text)); @@ -156,7 +156,7 @@ impl Regex { /// ASCII word bytes: /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// let text = b"I categorically deny having triskaidekaphobia."; /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap(); @@ -177,7 +177,7 @@ impl Regex { /// word bytes: /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// let text = b"Retroactively relinquishing remunerations is reprehensible."; /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) { @@ -205,7 +205,7 @@ impl Regex { /// year separately. /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); /// let text = b"Not my favorite movie: 'Citizen Kane' (1941)."; @@ -227,7 +227,7 @@ impl Regex { /// We can make this example a bit clearer by using *named* capture groups: /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// let re = Regex::new(r"'(?P[^']+)'\s+\((?P<year>\d{4})\)") /// .unwrap(); @@ -271,7 +271,7 @@ impl Regex { /// some text, where the movie is formatted like "'Title' (xxxx)": /// /// ```rust - /// # extern crate regex; use std::str; use regex::bytes::Regex; + /// # use std::str; use regex::bytes::Regex; /// # fn main() { /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") /// .unwrap(); @@ -305,7 +305,7 @@ impl Regex { /// To split a string delimited by arbitrary amounts of spaces or tabs: /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// let re = Regex::new(r"[ \t]+").unwrap(); /// let fields: Vec<&[u8]> = re.split(b"a b \t c\td e").collect(); @@ -331,7 +331,7 @@ impl Regex { /// Get the first two words in some text: /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// let re = Regex::new(r"\W+").unwrap(); /// let fields: Vec<&[u8]> = re.splitn(b"Hey! How are you?", 3).collect(); @@ -379,7 +379,7 @@ impl Regex { /// In typical usage, this can just be a normal byte string: /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// let re = Regex::new("[^01]+").unwrap(); /// assert_eq!(re.replace(b"1078910", &b""[..]), &b"1010"[..]); @@ -392,7 +392,7 @@ impl Regex { /// group matches easily: /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # use regex::bytes::Captures; fn main() { /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); /// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| { @@ -411,7 +411,7 @@ impl Regex { /// with named capture groups: /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap(); /// let result = re.replace(b"Springsteen, Bruce", &b"$first $last"[..]); @@ -428,7 +428,7 @@ impl Regex { /// underscore: /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap(); /// let result = re.replace(b"deep fried", &b"${first}_$second"[..]); @@ -445,7 +445,7 @@ impl Regex { /// byte string with `NoExpand`: /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// use regex::bytes::NoExpand; /// @@ -546,7 +546,7 @@ impl Regex { /// `a`. /// /// ```rust - /// # extern crate regex; use regex::bytes::Regex; + /// # use regex::bytes::Regex; /// # fn main() { /// let text = b"aaaaa"; /// let pos = Regex::new(r"a+").unwrap().shortest_match(text); @@ -658,7 +658,7 @@ impl Regex { } /// Returns an iterator over the capture names. - pub fn capture_names(&self) -> CaptureNames { + pub fn capture_names(&self) -> CaptureNames<'_> { CaptureNames(self.0.capture_names().iter()) } @@ -990,15 +990,15 @@ impl<'t> Captures<'t> { } impl<'t> fmt::Debug for Captures<'t> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("Captures").field(&CapturesDebug(self)).finish() } } -struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>); +struct CapturesDebug<'c, 't>(&'c Captures<'t>); impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn escape_bytes(bytes: &[u8]) -> String { let mut s = String::new(); for &b in bytes { @@ -1084,7 +1084,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> { /// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and /// the lifetime `'t` corresponds to the originally matched text. #[derive(Clone, Debug)] -pub struct SubCaptureMatches<'c, 't: 'c> { +pub struct SubCaptureMatches<'c, 't> { caps: &'c Captures<'t>, it: SubCapturesPosIter<'c>, } @@ -1116,7 +1116,7 @@ pub trait Replacer { /// /// For example, a no-op replacement would be /// `dst.extend(&caps[0])`. - fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>); + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>); /// Return a fixed unchanging replacement byte string. /// @@ -1159,10 +1159,10 @@ pub trait Replacer { /// /// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref). #[derive(Debug)] -pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R); +pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { - fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { self.0.replace_append(caps, dst) } fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { @@ -1171,56 +1171,56 @@ impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { } impl<'a> Replacer for &'a [u8] { - fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { caps.expand(*self, dst); } - fn no_expansion(&mut self) -> Option<Cow<[u8]>> { + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { no_expansion(self) } } impl<'a> Replacer for &'a Vec<u8> { - fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { caps.expand(*self, dst); } - fn no_expansion(&mut self) -> Option<Cow<[u8]>> { + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { no_expansion(self) } } impl Replacer for Vec<u8> { - fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { caps.expand(self, dst); } - fn no_expansion(&mut self) -> Option<Cow<[u8]>> { + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { no_expansion(self) } } impl<'a> Replacer for Cow<'a, [u8]> { - fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { caps.expand(self.as_ref(), dst); } - fn no_expansion(&mut self) -> Option<Cow<[u8]>> { + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { no_expansion(self) } } impl<'a> Replacer for &'a Cow<'a, [u8]> { - fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { caps.expand(self.as_ref(), dst); } - fn no_expansion(&mut self) -> Option<Cow<[u8]>> { + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { no_expansion(self) } } -fn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<[u8]>> { +fn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<'_, [u8]>> { let s = t.as_ref(); match find_byte(b'$', s) { Some(_) => None, @@ -1230,10 +1230,10 @@ fn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<[u8]>> { impl<F, T> Replacer for F where - F: FnMut(&Captures) -> T, + F: FnMut(&Captures<'_>) -> T, T: AsRef<[u8]>, { - fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { dst.extend_from_slice((*self)(caps).as_ref()); } } @@ -1250,11 +1250,11 @@ where pub struct NoExpand<'t>(pub &'t [u8]); impl<'t> Replacer for NoExpand<'t> { - fn replace_append(&mut self, _: &Captures, dst: &mut Vec<u8>) { + fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) { dst.extend_from_slice(self.0); } - fn no_expansion(&mut self) -> Option<Cow<[u8]>> { + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { Some(Cow::Borrowed(self.0)) } } diff --git a/src/re_set.rs b/src/re_set.rs index 5cb47addb1..73d59532ed 100644 --- a/src/re_set.rs +++ b/src/re_set.rs @@ -7,10 +7,10 @@ macro_rules! define_set { use std::slice; use std::vec; - use error::Error; - use exec::Exec; - use re_builder::$builder_mod::RegexSetBuilder; - use re_trait::RegularExpression; + use crate::error::Error; + use crate::exec::Exec; + use crate::re_builder::$builder_mod::RegexSetBuilder; + use crate::re_trait::RegularExpression; /// Match multiple (possibly overlapping) regular expressions in a single scan. /// @@ -292,7 +292,7 @@ impl SetMatches { /// This will always produces matches in ascending order of index, where /// the index corresponds to the index of the regex that matched with /// respect to its position when initially building the set. - pub fn iter(&self) -> SetMatchesIter { + pub fn iter(&self) -> SetMatchesIter<'_> { SetMatchesIter((&*self.matches).into_iter().enumerate()) } } @@ -405,7 +405,7 @@ impl From<Exec> for RegexSet { } impl fmt::Debug for RegexSet { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "RegexSet({:?})", self.0.regex_strings()) } } diff --git a/src/re_trait.rs b/src/re_trait.rs index ea6be9c7c5..680aa54594 100644 --- a/src/re_trait.rs +++ b/src/re_trait.rs @@ -30,7 +30,7 @@ impl Locations { /// Creates an iterator of all the capture group positions in order of /// appearance in the regular expression. Positions are byte indices /// in terms of the original string matched. - pub fn iter(&self) -> SubCapturesPosIter { + pub fn iter(&self) -> SubCapturesPosIter<'_> { SubCapturesPosIter { idx: 0, locs: self } } @@ -138,13 +138,13 @@ pub trait RegularExpression: Sized + fmt::Debug { /// Returns an iterator over all non-overlapping successive leftmost-first /// matches. - fn find_iter(self, text: &Self::Text) -> Matches<Self> { + fn find_iter(self, text: &Self::Text) -> Matches<'_, Self> { Matches { re: self, text: text, last_end: 0, last_match: None } } /// Returns an iterator over all non-overlapping successive leftmost-first /// matches with captures. - fn captures_iter(self, text: &Self::Text) -> CaptureMatches<Self> { + fn captures_iter(self, text: &Self::Text) -> CaptureMatches<'_, Self> { CaptureMatches(self.find_iter(text)) } } diff --git a/src/re_unicode.rs b/src/re_unicode.rs index 1b478cdbad..142c78fb1c 100644 --- a/src/re_unicode.rs +++ b/src/re_unicode.rs @@ -6,21 +6,20 @@ use std::ops::{Index, Range}; use std::str::FromStr; use std::sync::Arc; -use find_byte::find_byte; -use syntax; +use crate::find_byte::find_byte; -use error::Error; -use exec::{Exec, ExecNoSyncStr}; -use expand::expand_str; -use re_builder::unicode::RegexBuilder; -use re_trait::{self, RegularExpression, SubCapturesPosIter}; +use crate::error::Error; +use crate::exec::{Exec, ExecNoSyncStr}; +use crate::expand::expand_str; +use crate::re_builder::unicode::RegexBuilder; +use crate::re_trait::{self, RegularExpression, SubCapturesPosIter}; /// Escapes all regular expression meta characters in `text`. /// /// The string returned may be safely used as a literal in a regular /// expression. pub fn escape(text: &str) -> String { - syntax::escape(text) + regex_syntax::escape(text) } /// Match represents a single match of a regex in a haystack. @@ -138,14 +137,14 @@ pub struct Regex(Exec); impl fmt::Display for Regex { /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.as_str()) } } impl fmt::Debug for Regex { /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self, f) } } @@ -189,7 +188,7 @@ impl Regex { /// Unicode word characters: /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let text = "I categorically deny having triskaidekaphobia."; /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text)); @@ -212,7 +211,7 @@ impl Regex { /// Unicode word characters: /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let text = "I categorically deny having triskaidekaphobia."; /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap(); @@ -234,7 +233,7 @@ impl Regex { /// word characters: /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let text = "Retroactively relinquishing remunerations is reprehensible."; /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) { @@ -262,7 +261,7 @@ impl Regex { /// year separately. /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); /// let text = "Not my favorite movie: 'Citizen Kane' (1941)."; @@ -284,7 +283,7 @@ impl Regex { /// We can make this example a bit clearer by using *named* capture groups: /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") /// .unwrap(); @@ -328,7 +327,7 @@ impl Regex { /// some text, where the movie is formatted like "'Title' (xxxx)": /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") /// .unwrap(); @@ -361,7 +360,7 @@ impl Regex { /// To split a string delimited by arbitrary amounts of spaces or tabs: /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let re = Regex::new(r"[ \t]+").unwrap(); /// let fields: Vec<&str> = re.split("a b \t c\td e").collect(); @@ -385,7 +384,7 @@ impl Regex { /// Get the first two words in some text: /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let re = Regex::new(r"\W+").unwrap(); /// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect(); @@ -432,7 +431,7 @@ impl Regex { /// In typical usage, this can just be a normal string: /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let re = Regex::new("[^01]+").unwrap(); /// assert_eq!(re.replace("1078910", ""), "1010"); @@ -445,7 +444,7 @@ impl Regex { /// capturing group matches easily: /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # use regex::Captures; fn main() { /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| { @@ -461,7 +460,7 @@ impl Regex { /// with named capture groups: /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap(); /// let result = re.replace("Springsteen, Bruce", "$first $last"); @@ -478,7 +477,7 @@ impl Regex { /// underscore: /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap(); /// let result = re.replace("deep fried", "${first}_$second"); @@ -495,7 +494,7 @@ impl Regex { /// byte string with `NoExpand`: /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// use regex::NoExpand; /// @@ -605,7 +604,7 @@ impl Regex { /// `a`. /// /// ```rust - /// # extern crate regex; use regex::Regex; + /// # use regex::Regex; /// # fn main() { /// let text = "aaaaa"; /// let pos = Regex::new(r"a+").unwrap().shortest_match(text); @@ -717,7 +716,7 @@ impl Regex { } /// Returns an iterator over the capture names. - pub fn capture_names(&self) -> CaptureNames { + pub fn capture_names(&self) -> CaptureNames<'_> { CaptureNames(self.0.capture_names().iter()) } @@ -1001,15 +1000,15 @@ impl<'t> Captures<'t> { } impl<'t> fmt::Debug for Captures<'t> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("Captures").field(&CapturesDebug(self)).finish() } } -struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>); +struct CapturesDebug<'c, 't>(&'c Captures<'t>); impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // We'd like to show something nice here, even if it means an // allocation to build a reverse index. let slot_to_name: HashMap<&usize, &String> = @@ -1080,7 +1079,7 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> { /// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and /// the lifetime `'t` corresponds to the originally matched text. #[derive(Clone, Debug)] -pub struct SubCaptureMatches<'c, 't: 'c> { +pub struct SubCaptureMatches<'c, 't> { caps: &'c Captures<'t>, it: SubCapturesPosIter<'c>, } @@ -1158,7 +1157,7 @@ pub trait Replacer { /// /// For example, a no-op replacement would be /// `dst.push_str(caps.get(0).unwrap().as_str())`. - fn replace_append(&mut self, caps: &Captures, dst: &mut String); + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String); /// Return a fixed unchanging replacement string. /// @@ -1201,68 +1200,68 @@ pub trait Replacer { /// /// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref). #[derive(Debug)] -pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R); +pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { - fn replace_append(&mut self, caps: &Captures, dst: &mut String) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { self.0.replace_append(caps, dst) } - fn no_expansion(&mut self) -> Option<Cow<str>> { + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { self.0.no_expansion() } } impl<'a> Replacer for &'a str { - fn replace_append(&mut self, caps: &Captures, dst: &mut String) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { caps.expand(*self, dst); } - fn no_expansion(&mut self) -> Option<Cow<str>> { + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { no_expansion(self) } } impl<'a> Replacer for &'a String { - fn replace_append(&mut self, caps: &Captures, dst: &mut String) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { self.as_str().replace_append(caps, dst) } - fn no_expansion(&mut self) -> Option<Cow<str>> { + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { no_expansion(self) } } impl Replacer for String { - fn replace_append(&mut self, caps: &Captures, dst: &mut String) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { self.as_str().replace_append(caps, dst) } - fn no_expansion(&mut self) -> Option<Cow<str>> { + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { no_expansion(self) } } impl<'a> Replacer for Cow<'a, str> { - fn replace_append(&mut self, caps: &Captures, dst: &mut String) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { self.as_ref().replace_append(caps, dst) } - fn no_expansion(&mut self) -> Option<Cow<str>> { + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { no_expansion(self) } } impl<'a> Replacer for &'a Cow<'a, str> { - fn replace_append(&mut self, caps: &Captures, dst: &mut String) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { self.as_ref().replace_append(caps, dst) } - fn no_expansion(&mut self) -> Option<Cow<str>> { + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { no_expansion(self) } } -fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<str>> { +fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<'_, str>> { let s = t.as_ref(); match find_byte(b'$', s.as_bytes()) { Some(_) => None, @@ -1272,10 +1271,10 @@ fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<str>> { impl<F, T> Replacer for F where - F: FnMut(&Captures) -> T, + F: FnMut(&Captures<'_>) -> T, T: AsRef<str>, { - fn replace_append(&mut self, caps: &Captures, dst: &mut String) { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { dst.push_str((*self)(caps).as_ref()); } } @@ -1292,11 +1291,11 @@ where pub struct NoExpand<'t>(pub &'t str); impl<'t> Replacer for NoExpand<'t> { - fn replace_append(&mut self, _: &Captures, dst: &mut String) { + fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) { dst.push_str(self.0); } - fn no_expansion(&mut self) -> Option<Cow<str>> { + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { Some(Cow::Borrowed(self.0)) } } diff --git a/src/sparse.rs b/src/sparse.rs index 421d6b6f1e..98b726613d 100644 --- a/src/sparse.rs +++ b/src/sparse.rs @@ -62,7 +62,7 @@ impl SparseSet { } impl fmt::Debug for SparseSet { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "SparseSet({:?})", self.dense) } } diff --git a/tests/replace.rs b/tests/replace.rs index 700aff2430..1dc6106357 100644 --- a/tests/replace.rs +++ b/tests/replace.rs @@ -94,7 +94,7 @@ replace!( replace, r"([0-9]+)", "age: 26", - |captures: &Captures| { + |captures: &Captures<'_>| { match_text!(captures.get(1).unwrap())[0..1].to_owned() }, "age: 2" @@ -104,7 +104,7 @@ replace!( replace, r"[0-9]+", "age: 26", - |_captures: &Captures| t!("Z").to_owned(), + |_captures: &Captures<'_>| t!("Z").to_owned(), "age: Z" ); diff --git a/tests/test_backtrack.rs b/tests/test_backtrack.rs index 617185f46f..fb934e2d8f 100644 --- a/tests/test_backtrack.rs +++ b/tests/test_backtrack.rs @@ -1,8 +1,5 @@ #![cfg_attr(feature = "pattern", feature(pattern))] -extern crate rand; -extern crate regex; - macro_rules! regex_new { ($re:expr) => {{ use regex::internal::ExecBuilder; diff --git a/tests/test_backtrack_bytes.rs b/tests/test_backtrack_bytes.rs index 17df4d85e4..a59426c949 100644 --- a/tests/test_backtrack_bytes.rs +++ b/tests/test_backtrack_bytes.rs @@ -1,6 +1,3 @@ -extern crate rand; -extern crate regex; - macro_rules! regex_new { ($re:expr) => {{ use regex::internal::ExecBuilder; diff --git a/tests/test_backtrack_utf8bytes.rs b/tests/test_backtrack_utf8bytes.rs index 78a0135bd9..6d308e9e1c 100644 --- a/tests/test_backtrack_utf8bytes.rs +++ b/tests/test_backtrack_utf8bytes.rs @@ -1,8 +1,5 @@ #![cfg_attr(feature = "pattern", feature(pattern))] -extern crate rand; -extern crate regex; - macro_rules! regex_new { ($re:expr) => {{ use regex::internal::ExecBuilder; diff --git a/tests/test_crates_regex.rs b/tests/test_crates_regex.rs index d6976831ec..a681604727 100644 --- a/tests/test_crates_regex.rs +++ b/tests/test_crates_regex.rs @@ -1,6 +1,3 @@ -extern crate quickcheck; -extern crate regex; - /* * This test is a minimal version of <rofl_0> and <subdiff_0> * diff --git a/tests/test_default.rs b/tests/test_default.rs index af634a0c57..d4365fbb34 100644 --- a/tests/test_default.rs +++ b/tests/test_default.rs @@ -1,7 +1,6 @@ #![cfg_attr(feature = "pattern", feature(pattern))] -extern crate rand; -extern crate regex; +use regex; // Due to macro scoping rules, this definition only applies for the modules // defined below. Effectively, it allows us to use the same tests for both diff --git a/tests/test_default_bytes.rs b/tests/test_default_bytes.rs index e4a25dc408..f200596ba1 100644 --- a/tests/test_default_bytes.rs +++ b/tests/test_default_bytes.rs @@ -1,6 +1,3 @@ -extern crate rand; -extern crate regex; - macro_rules! regex_new { ($re:expr) => {{ use regex::bytes::Regex; diff --git a/tests/test_nfa.rs b/tests/test_nfa.rs index 05dad2311c..e5a67d180a 100644 --- a/tests/test_nfa.rs +++ b/tests/test_nfa.rs @@ -1,8 +1,5 @@ #![cfg_attr(feature = "pattern", feature(pattern))] -extern crate rand; -extern crate regex; - macro_rules! regex_new { ($re:expr) => {{ use regex::internal::ExecBuilder; diff --git a/tests/test_nfa_bytes.rs b/tests/test_nfa_bytes.rs index 104231852c..0a10e032a2 100644 --- a/tests/test_nfa_bytes.rs +++ b/tests/test_nfa_bytes.rs @@ -1,6 +1,3 @@ -extern crate rand; -extern crate regex; - macro_rules! regex_new { ($re:expr) => {{ use regex::internal::ExecBuilder; diff --git a/tests/test_nfa_utf8bytes.rs b/tests/test_nfa_utf8bytes.rs index 86487a1ee4..36a572b5fc 100644 --- a/tests/test_nfa_utf8bytes.rs +++ b/tests/test_nfa_utf8bytes.rs @@ -1,8 +1,5 @@ #![cfg_attr(feature = "pattern", feature(pattern))] -extern crate rand; -extern crate regex; - macro_rules! regex_new { ($re:expr) => {{ use regex::internal::ExecBuilder;