Skip to content

Commit

Permalink
refactor!: remove redundant escape regex & curly brace regex preproce…
Browse files Browse the repository at this point in the history
…ssing

The regex-syntax crate now natively supports literal escapes for all
ASCII characters except those in [0-9A-Za-z<>].
  • Loading branch information
CAD97 authored and amaanq committed Mar 10, 2024
1 parent b60b248 commit 14bbf68
Showing 1 changed file with 1 addition and 29 deletions.
30 changes: 1 addition & 29 deletions cli/src/generate/prepare_grammar/expand_tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
use crate::generate::rules::{Precedence, Rule};
use anyhow::{anyhow, Context, Result};
use lazy_static::lazy_static;
use regex::Regex;
use regex_syntax::ast::{
parse, Ast, ClassPerlKind, ClassSet, ClassSetBinaryOpKind, ClassSetItem, ClassUnicodeKind,
RepetitionKind, RepetitionRange,
Expand All @@ -13,8 +12,6 @@ use std::collections::HashMap;
use std::i32;

lazy_static! {
static ref CURLY_BRACE_REGEX: Regex =
Regex::new(r"(^|[^\\pP])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}").unwrap();
static ref UNICODE_CATEGORIES: HashMap<&'static str, Vec<u32>> =
serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap();
static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec<u32>> =
Expand All @@ -29,7 +26,6 @@ const UNICODE_CATEGORIES_JSON: &str = include_str!("./unicode-categories.json");
const UNICODE_PROPERTIES_JSON: &str = include_str!("./unicode-properties.json");
const UNICODE_CATEGORY_ALIASES_JSON: &str = include_str!("./unicode-category-aliases.json");
const UNICODE_PROPERTY_ALIASES_JSON: &str = include_str!("./unicode-property-aliases.json");
const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];

struct NfaBuilder {
nfa: Nfa,
Expand Down Expand Up @@ -60,29 +56,6 @@ const fn get_completion_precedence(rule: &Rule) -> i32 {
0
}

fn preprocess_regex(content: &str) -> String {
let content = CURLY_BRACE_REGEX.replace(content, "$1\\{$2\\}");
let mut result = String::with_capacity(content.len());
let mut is_escaped = false;
for c in content.chars() {
if is_escaped {
if !ALLOWED_REDUNDANT_ESCAPED_CHARS.contains(&c) {
result.push('\\');
}
result.push(c);
is_escaped = false;
} else if c == '\\' {
is_escaped = true;
} else {
result.push(c);
}
}
if is_escaped {
result.push('\\');
}
result
}

pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
let mut builder = NfaBuilder {
nfa: Nfa::new(),
Expand Down Expand Up @@ -138,8 +111,7 @@ impl NfaBuilder {
fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
match rule {
Rule::Pattern(s, f) => {
let s = preprocess_regex(s);
let ast = parse::Parser::new().parse(&s)?;
let ast = parse::Parser::new().parse(s)?;
self.expand_regex(&ast, next_state_id, f.contains('i'))
}
Rule::String(s) => {
Expand Down

0 comments on commit 14bbf68

Please sign in to comment.