refactor!: remove redundant escape regex & curly brace regex preproce…

…ssing The regex-syntax crate now natively supports literal escapes for all ASCII characters except those in [0-9A-Za-z<>].
tree-sitter · Mar 10, 2024 · 14bbf68 · 14bbf68
1 parent b60b248
commit 14bbf68
Showing 1 changed file with 1 addition and 29 deletions.
diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs
@@ -4,7 +4,6 @@ use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
 use crate::generate::rules::{Precedence, Rule};
 use anyhow::{anyhow, Context, Result};
 use lazy_static::lazy_static;
-use regex::Regex;
 use regex_syntax::ast::{
     parse, Ast, ClassPerlKind, ClassSet, ClassSetBinaryOpKind, ClassSetItem, ClassUnicodeKind,
     RepetitionKind, RepetitionRange,
@@ -13,8 +12,6 @@ use std::collections::HashMap;
 use std::i32;
 
 lazy_static! {
-    static ref CURLY_BRACE_REGEX: Regex =
-        Regex::new(r"(^|[^\\pP])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}").unwrap();
     static ref UNICODE_CATEGORIES: HashMap<&'static str, Vec<u32>> =
         serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap();
     static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec<u32>> =
@@ -29,7 +26,6 @@ const UNICODE_CATEGORIES_JSON: &str = include_str!("./unicode-categories.json");
 const UNICODE_PROPERTIES_JSON: &str = include_str!("./unicode-properties.json");
 const UNICODE_CATEGORY_ALIASES_JSON: &str = include_str!("./unicode-category-aliases.json");
 const UNICODE_PROPERTY_ALIASES_JSON: &str = include_str!("./unicode-property-aliases.json");
-const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];
 
 struct NfaBuilder {
     nfa: Nfa,
@@ -60,29 +56,6 @@ const fn get_completion_precedence(rule: &Rule) -> i32 {
     0
 }
 
-fn preprocess_regex(content: &str) -> String {
-    let content = CURLY_BRACE_REGEX.replace(content, "$1\\{$2\\}");
-    let mut result = String::with_capacity(content.len());
-    let mut is_escaped = false;
-    for c in content.chars() {
-        if is_escaped {
-            if !ALLOWED_REDUNDANT_ESCAPED_CHARS.contains(&c) {
-                result.push('\\');
-            }
-            result.push(c);
-            is_escaped = false;
-        } else if c == '\\' {
-            is_escaped = true;
-        } else {
-            result.push(c);
-        }
-    }
-    if is_escaped {
-        result.push('\\');
-    }
-    result
-}
-
 pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
     let mut builder = NfaBuilder {
         nfa: Nfa::new(),
@@ -138,8 +111,7 @@ impl NfaBuilder {
     fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
         match rule {
             Rule::Pattern(s, f) => {
-                let s = preprocess_regex(s);
-                let ast = parse::Parser::new().parse(&s)?;
+                let ast = parse::Parser::new().parse(s)?;
                 self.expand_regex(&ast, next_state_id, f.contains('i'))
             }
             Rule::String(s) => {