Skip to content

Commit

Permalink
Upgrade regex and regex-syntax
Browse files Browse the repository at this point in the history
Supersedes: #1334
Supersedes: #1343
  • Loading branch information
tmccombs committed Jul 18, 2023
1 parent 686d194 commit e2b8514
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 19 deletions.
24 changes: 15 additions & 9 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ argmax = "0.3.1"
atty = "0.2"
ignore = "0.4.20"
num_cpus = "1.15"
regex = "1.8.3"
regex-syntax = "0.6"
regex = "1.9.1"
regex-syntax = "0.7"
ctrlc = "3.2"
humantime = "2.1"
globset = "0.4"
Expand Down
21 changes: 13 additions & 8 deletions src/regex_helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use regex_syntax::ParserBuilder;

/// Determine if a regex pattern contains a literal uppercase character.
pub fn pattern_has_uppercase_char(pattern: &str) -> bool {
let mut parser = ParserBuilder::new().allow_invalid_utf8(true).build();
let mut parser = ParserBuilder::new().utf8(false).build();

parser
.parse(pattern)
Expand All @@ -16,16 +16,18 @@ fn hir_has_uppercase_char(hir: &Hir) -> bool {
use regex_syntax::hir::*;

match hir.kind() {
HirKind::Literal(Literal::Unicode(c)) => c.is_uppercase(),
HirKind::Literal(Literal::Byte(b)) => char::from(*b).is_uppercase(),
HirKind::Literal(Literal(bytes)) => match std::str::from_utf8(&bytes) {
Ok(s) => s.chars().any(|c| c.is_uppercase()),
Err(_) => bytes.iter().any(|b| char::from(*b).is_uppercase()),
},
HirKind::Class(Class::Unicode(ranges)) => ranges
.iter()
.any(|r| r.start().is_uppercase() || r.end().is_uppercase()),
HirKind::Class(Class::Bytes(ranges)) => ranges
.iter()
.any(|r| char::from(r.start()).is_uppercase() || char::from(r.end()).is_uppercase()),
HirKind::Group(Group { hir, .. }) | HirKind::Repetition(Repetition { hir, .. }) => {
hir_has_uppercase_char(hir)
HirKind::Capture(Capture { sub, .. }) | HirKind::Repetition(Repetition { sub, .. }) => {
hir_has_uppercase_char(sub)
}
HirKind::Concat(hirs) | HirKind::Alternation(hirs) => {
hirs.iter().any(hir_has_uppercase_char)
Expand All @@ -36,7 +38,7 @@ fn hir_has_uppercase_char(hir: &Hir) -> bool {

/// Determine if a regex pattern only matches strings starting with a literal dot (hidden files)
pub fn pattern_matches_strings_with_leading_dot(pattern: &str) -> bool {
let mut parser = ParserBuilder::new().allow_invalid_utf8(true).build();
let mut parser = ParserBuilder::new().utf8(false).build();

parser
.parse(pattern)
Expand All @@ -56,15 +58,18 @@ fn hir_matches_strings_with_leading_dot(hir: &Hir) -> bool {
HirKind::Concat(hirs) => {
let mut hirs = hirs.iter();
if let Some(hir) = hirs.next() {
if hir.kind() != &HirKind::Anchor(Anchor::StartText) {
if hir.kind() != &HirKind::Look(Look::Start) {
return false;
}
} else {
return false;
}

if let Some(hir) = hirs.next() {
hir.kind() == &HirKind::Literal(Literal::Unicode('.'))
match hir.kind() {
HirKind::Literal(Literal(bytes)) => bytes.starts_with(&[b'.']),
_ => false,
}
} else {
false
}
Expand Down

0 comments on commit e2b8514

Please sign in to comment.