From d4963505500e7ee1aa8ac561be2cab77f98dea5a Mon Sep 17 00:00:00 2001 From: "Peter M. Stahl" Date: Mon, 15 Mar 2021 22:06:38 +0100 Subject: [PATCH] Refactor colorization code again (#17) --- src/ast/format.rs | 106 ++++++-------- src/char/grapheme.rs | 95 ++++++------- src/regexp/component.rs | 170 +++++++++++++++++++++++ src/regexp/mod.rs | 2 + src/regexp/regexp.rs | 245 +++++++++++++++++---------------- tests/lib_integration_tests.rs | 13 ++ 6 files changed, 398 insertions(+), 233 deletions(-) create mode 100644 src/regexp/component.rs diff --git a/src/ast/format.rs b/src/ast/format.rs index ab1956e..587e4e1 100644 --- a/src/ast/format.rs +++ b/src/ast/format.rs @@ -16,7 +16,7 @@ use crate::ast::{Expression, Quantifier}; use crate::char::GraphemeCluster; -use crate::regexp::RegExpConfig; +use crate::regexp::{Component, RegExpConfig}; use itertools::Itertools; use std::collections::BTreeSet; use std::fmt::{Display, Formatter, Result}; @@ -52,33 +52,22 @@ fn format_alternation( options: &[Expression], config: &RegExpConfig, ) -> Result { - let left_parenthesis = if config.is_capturing_group_enabled() { - "(" - } else { - "(?:" - }; - let pipe = if config.is_output_colorized { - "\u{1b}[1;31m|\u{1b}[0m" - } else { - "|" - }; let alternation_str = options .iter() .map(|option| { if option.precedence() < expr.precedence() && !option.is_single_codepoint() { - if config.is_output_colorized { - format!( - "\u{1b}[1;32m{}\u{1b}[0m{}\u{1b}[1;32m)\u{1b}[0m", - left_parenthesis, option - ) + if config.is_capturing_group_enabled() { + Component::CapturedParenthesizedExpression(option.to_string()) + .to_repr(config.is_output_colorized) } else { - format!("{}{})", left_parenthesis, option) + Component::UncapturedParenthesizedExpression(option.to_string()) + .to_repr(config.is_output_colorized) } } else { format!("{}", option) } }) - .join(pipe); + .join(&Component::Pipe.to_repr(config.is_output_colorized)); write!(f, "{}", alternation_str) } @@ -138,31 +127,22 @@ fn format_character_class( char_class_strs.push((*c).to_string()); } } else { - let frmt = if config.is_output_colorized { - format!( - "{}\u{1b}[1;36m-\u{1b}[0m{}", - subset.first().unwrap(), - subset.last().unwrap() - ) - } else { - format!("{}-{}", subset.first().unwrap(), subset.last().unwrap()) - }; - - char_class_strs.push(frmt); + char_class_strs.push(format!( + "{}{}{}", + subset.first().unwrap(), + Component::Hyphen.to_repr(config.is_output_colorized), + subset.last().unwrap() + )); } } - let joined_classes = char_class_strs.join(""); - - if config.is_output_colorized { - write!( - f, - "\u{1b}[1;36m[\u{1b}[0m{}\u{1b}[1;36m]\u{1b}[0m", - joined_classes, - ) - } else { - write!(f, "[{}]", joined_classes) - } + write!( + f, + "{}{}{}", + Component::LeftBracket.to_repr(config.is_output_colorized), + char_class_strs.join(""), + Component::RightBracket.to_repr(config.is_output_colorized) + ) } fn format_concatenation( @@ -172,22 +152,16 @@ fn format_concatenation( expr2: &Expression, config: &RegExpConfig, ) -> Result { - let left_parenthesis = if config.is_capturing_group_enabled() { - "(" - } else { - "(?:" - }; let expr_strs = vec![expr1, expr2] .iter() .map(|&it| { if it.precedence() < expr.precedence() && !it.is_single_codepoint() { - if config.is_output_colorized { - format!( - "\u{1b}[1;32m{}\u{1b}[0m{}\u{1b}[1;32m)\u{1b}[0m", - left_parenthesis, it - ) + if config.is_capturing_group_enabled() { + Component::CapturedParenthesizedExpression(it.to_string()) + .to_repr(config.is_output_colorized) } else { - format!("{}{})", left_parenthesis, it) + Component::UncapturedParenthesizedExpression(it.to_string()) + .to_repr(config.is_output_colorized) } } else { format!("{}", it) @@ -243,24 +217,30 @@ fn format_repetition( quantifier: &Quantifier, config: &RegExpConfig, ) -> Result { - let left_parenthesis = if config.is_capturing_group_enabled() { - "(" - } else { - "(?:" - }; if expr1.precedence() < expr.precedence() && !expr1.is_single_codepoint() { - if config.is_output_colorized { + if config.is_capturing_group_enabled() { write!( f, - "\u{1b}[1;32m{}\u{1b}[0m{}\u{1b}[1;32m)\u{1b}[0m\u{1b}[1;35m{}\u{1b}[0m", - left_parenthesis, expr1, quantifier + "{}{}", + Component::CapturedParenthesizedExpression(expr1.to_string()) + .to_repr(config.is_output_colorized), + Component::Quantifier(quantifier.clone()).to_repr(config.is_output_colorized) ) } else { - write!(f, "{}{}){}", left_parenthesis, expr1, quantifier) + write!( + f, + "{}{}", + Component::UncapturedParenthesizedExpression(expr1.to_string()) + .to_repr(config.is_output_colorized), + Component::Quantifier(quantifier.clone()).to_repr(config.is_output_colorized) + ) } - } else if config.is_output_colorized { - write!(f, "{}\u{1b}[1;35m{}\u{1b}[0m", expr1, quantifier) } else { - write!(f, "{}{}", expr1, quantifier) + write!( + f, + "{}{}", + expr1, + Component::Quantifier(quantifier.clone()).to_repr(config.is_output_colorized) + ) } } diff --git a/src/char/grapheme.rs b/src/char/grapheme.rs index c971531..faebaad 100644 --- a/src/char/grapheme.rs +++ b/src/char/grapheme.rs @@ -14,7 +14,7 @@ * limitations under the License. */ -use crate::regexp::RegExpConfig; +use crate::regexp::{Component, RegExpConfig}; use itertools::Itertools; use std::fmt::{Display, Formatter, Result}; @@ -22,6 +22,8 @@ const CHARS_TO_ESCAPE: [&str; 14] = [ "(", ")", "[", "]", "{", "}", "+", "*", "-", ".", "?", "|", "^", "$", ]; +const CHAR_CLASSES: [&str; 6] = ["\\d", "\\s", "\\w", "\\D", "\\S", "\\W"]; + #[derive(Clone, Debug, Hash, Ord, PartialOrd, Eq, PartialEq)] pub struct Grapheme { pub(crate) chars: Vec, @@ -162,65 +164,58 @@ impl Display for Grapheme { || (self.chars.len() == 1 && self.chars[0].matches('\\').count() == 1); let is_range = self.min < self.max; let is_repetition = self.min > 1; - let value = if self.repetitions.is_empty() { + let mut value = if self.repetitions.is_empty() { self.value() } else { self.repetitions.iter().map(|it| it.to_string()).join("") }; - let left_parenthesis = if self.config.is_capturing_group_enabled() { - "(" - } else { - "(?:" - }; - let char_classes = vec!["\\d", "\\s", "\\w", "\\D", "\\S", "\\W"]; - let colored_value = if self.config.is_output_colorized && char_classes.contains(&&*value) { - format!("\u{1b}[103;30m{}\u{1b}[0m", value) - } else { - value - }; + value = Component::CharClass(value.clone()) + .to_repr(self.config.is_output_colorized && CHAR_CLASSES.contains(&&*value)); if !is_range && is_repetition && is_single_char { - if self.config.is_output_colorized { - write!(f, "{}\u{1b}[104;37m{{{}}}\u{1b}[0m", colored_value, self.min) - } else { - write!(f, "{}{{{}}}", colored_value, self.min) - } + write!( + f, + "{}{}", + value, + Component::Repetition(self.min).to_repr(self.config.is_output_colorized) + ) } else if !is_range && is_repetition && !is_single_char { - if self.config.is_output_colorized { - write!( - f, - "\u{1b}[1;32m{}\u{1b}[0m{}\u{1b}[1;32m)\u{1b}[0m\u{1b}[104;37m{{{}}}\u{1b}[0m", - left_parenthesis, colored_value, self.min - ) - } else { - write!(f, "{}{}){{{}}}", left_parenthesis, colored_value, self.min) - } + write!( + f, + "{}{}", + if self.config.is_capturing_group_enabled() { + Component::CapturedParenthesizedExpression(value) + .to_repr(self.config.is_output_colorized) + } else { + Component::UncapturedParenthesizedExpression(value) + .to_repr(self.config.is_output_colorized) + }, + Component::Repetition(self.min).to_repr(self.config.is_output_colorized) + ) } else if is_range && is_single_char { - if self.config.is_output_colorized { - write!( - f, - "{}\u{1b}[104;37m{{{},{}}}\u{1b}[0m", - colored_value, self.min, self.max - ) - } else { - write!(f, "{}{{{},{}}}", colored_value, self.min, self.max) - } + write!( + f, + "{}{}", + value, + Component::RepetitionRange(self.min, self.max) + .to_repr(self.config.is_output_colorized) + ) } else if is_range && !is_single_char { - if self.config.is_output_colorized { - write!( - f, - "\u{1b}[1;32m{}\u{1b}[0m{}\u{1b}[1;32m)\u{1b}[0m\u{1b}[104;37m{{{},{}}}\u{1b}[0m", - left_parenthesis, colored_value, self.min, self.max - ) - } else { - write!( - f, - "{}{}){{{},{}}}", - left_parenthesis, colored_value, self.min, self.max - ) - } + write!( + f, + "{}{}", + if self.config.is_capturing_group_enabled() { + Component::CapturedParenthesizedExpression(value) + .to_repr(self.config.is_output_colorized) + } else { + Component::UncapturedParenthesizedExpression(value) + .to_repr(self.config.is_output_colorized) + }, + Component::RepetitionRange(self.min, self.max) + .to_repr(self.config.is_output_colorized) + ) } else { - write!(f, "{}", colored_value) + write!(f, "{}", value) } } } diff --git a/src/regexp/component.rs b/src/regexp/component.rs new file mode 100644 index 0000000..fe4d1c2 --- /dev/null +++ b/src/regexp/component.rs @@ -0,0 +1,170 @@ +/* + * Copyright © 2019-2020 Peter M. Stahl pemistahl@gmail.com + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use crate::ast::Quantifier; +use std::fmt::{Display, Formatter, Result}; + +pub enum Component { + Asterisk, + CapturedParenthesizedExpression(String), + Caret, + CharClass(String), + DollarSign, + Hyphen, + IgnoreCaseFlag, + IgnoreCaseAndVerboseModeFlag, + LeftBracket, + Pipe, + Quantifier(Quantifier), + QuestionMark, + Repetition(u32), + RepetitionRange(u32, u32), + RightBracket, + UncapturedParenthesizedExpression(String), + VerboseModeFlag, +} + +impl Component { + pub fn to_repr(&self, is_output_colorized: bool) -> String { + match is_output_colorized { + true => self.to_colored_string(false), + false => self.to_string(), + } + } + + pub fn to_colored_string(&self, is_escaped: bool) -> String { + match self { + Component::Asterisk => Self::purple_bold(&self.to_string(), is_escaped), + Component::CapturedParenthesizedExpression(expr) => { + format!( + "{}{}{}", + Self::green_bold("(", is_escaped), + expr, + Self::green_bold(")", is_escaped) + ) + } + Component::Caret => Self::yellow_bold(&self.to_string(), is_escaped), + Component::CharClass(value) => Self::black_on_bright_yellow(value, is_escaped), + Component::DollarSign => Self::yellow_bold(&self.to_string(), is_escaped), + Component::Hyphen => Self::cyan_bold(&self.to_string(), is_escaped), + Component::IgnoreCaseFlag => { + Self::bright_yellow_on_black(&self.to_string(), is_escaped) + } + Component::IgnoreCaseAndVerboseModeFlag => { + Self::bright_yellow_on_black(&self.to_string(), is_escaped) + } + Component::LeftBracket => Self::cyan_bold(&self.to_string(), is_escaped), + Component::Pipe => Self::red_bold(&self.to_string(), is_escaped), + Component::Quantifier(_) => Self::purple_bold(&self.to_string(), is_escaped), + Component::QuestionMark => Self::purple_bold(&self.to_string(), is_escaped), + Component::Repetition(_) => Self::white_on_bright_blue(&self.to_string(), is_escaped), + Component::RepetitionRange(_, _) => { + Self::white_on_bright_blue(&self.to_string(), is_escaped) + } + Component::RightBracket => Self::cyan_bold(&self.to_string(), is_escaped), + Component::UncapturedParenthesizedExpression(expr) => { + format!( + "{}{}{}", + Self::green_bold("(?:", is_escaped), + expr, + Self::green_bold(")", is_escaped) + ) + } + Component::VerboseModeFlag => { + Self::bright_yellow_on_black(&self.to_string(), is_escaped) + } + } + } + + fn black_on_bright_yellow(value: &str, is_escaped: bool) -> String { + Self::color_code("103;30", value, is_escaped) + } + + fn bright_yellow_on_black(value: &str, is_escaped: bool) -> String { + Self::color_code("40;93", value, is_escaped) + } + + fn cyan_bold(value: &str, is_escaped: bool) -> String { + Self::color_code("1;36", value, is_escaped) + } + + fn green_bold(value: &str, is_escaped: bool) -> String { + Self::color_code("1;32", value, is_escaped) + } + + fn purple_bold(value: &str, is_escaped: bool) -> String { + Self::color_code("1;35", value, is_escaped) + } + + fn red_bold(value: &str, is_escaped: bool) -> String { + Self::color_code("1;31", value, is_escaped) + } + + fn white_on_bright_blue(value: &str, is_escaped: bool) -> String { + Self::color_code("104;37", value, is_escaped) + } + + fn yellow_bold(value: &str, is_escaped: bool) -> String { + Self::color_code("1;33", value, is_escaped) + } + + fn color_code(code: &str, value: &str, is_escaped: bool) -> String { + if is_escaped { + format!("\u{1b}\\[{}m\\{}\u{1b}\\[0m", code, value) + } else { + format!("\u{1b}[{}m{}\u{1b}[0m", code, value) + } + } +} + +impl Display for Component { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!( + f, + "{}", + match self { + Component::Asterisk => "*".to_string(), + Component::CapturedParenthesizedExpression(expr) => + format!("({})", expr.to_string()), + Component::Caret => "^".to_string(), + Component::CharClass(value) => value.clone(), + Component::DollarSign => "$".to_string(), + Component::Hyphen => "-".to_string(), + Component::IgnoreCaseFlag => "(?i)".to_string(), + Component::IgnoreCaseAndVerboseModeFlag => "(?ix)".to_string(), + Component::LeftBracket => "[".to_string(), + Component::Pipe => "|".to_string(), + Component::Quantifier(quantifier) => quantifier.to_string(), + Component::QuestionMark => "?".to_string(), + Component::Repetition(num) => + if *num == 0 { + "{\\d+\\}".to_string() + } else { + format!("{{{}}}", num) + }, + Component::RepetitionRange(min, max) => + if *min == 0 && *max == 0 { + "{\\d+,\\d+\\}".to_string() + } else { + format!("{{{},{}}}", min, max) + }, + Component::RightBracket => "]".to_string(), + Component::UncapturedParenthesizedExpression(expr) => format!("(?:{})", expr), + Component::VerboseModeFlag => "(?x)".to_string(), + } + ) + } +} diff --git a/src/regexp/mod.rs b/src/regexp/mod.rs index c62cc15..327065e 100644 --- a/src/regexp/mod.rs +++ b/src/regexp/mod.rs @@ -15,6 +15,7 @@ */ mod builder; +mod component; mod config; mod feature; @@ -22,6 +23,7 @@ mod feature; mod regexp; pub use builder::RegExpBuilder; +pub use component::Component; pub use config::RegExpConfig; pub use feature::Feature; pub use regexp::RegExp; diff --git a/src/regexp/regexp.rs b/src/regexp/regexp.rs index 06a566f..d7cb277 100644 --- a/src/regexp/regexp.rs +++ b/src/regexp/regexp.rs @@ -18,6 +18,7 @@ use crate::ast::Expression; use crate::char::GraphemeCluster; use crate::fsm::DFA; use crate::regexp::config::RegExpConfig; +use crate::regexp::Component; use itertools::Itertools; use lazy_static::lazy_static; use regex::Regex; @@ -86,47 +87,36 @@ impl RegExp { impl Display for RegExp { fn fmt(&self, f: &mut Formatter<'_>) -> Result { let ignore_case_flag = if self.config.is_case_insensitive_matching() { - "(?i)" + Component::IgnoreCaseFlag.to_repr(self.config.is_output_colorized) } else { - "" + String::new() }; - let verbose_mode_flag = if self.config.is_case_insensitive_matching() { - "(?ix)" - } else { - "(?x)" - }; - let left_parenthesis = if self.config.is_capturing_group_enabled() { - "(" - } else { - "(?:" - }; - + let caret = Component::Caret.to_repr(self.config.is_output_colorized); + let dollar_sign = Component::DollarSign.to_repr(self.config.is_output_colorized); let mut regexp = match self.ast { Expression::Alternation(_, _) => { - if self.config.is_output_colorized { - format!( - "\u{1b}[40;93m{}\u{1b}[0m\u{1b}[1;33m^\u{1b}[0m\u{1b}[1;32m{}\u{1b}[0m{}\u{1b}[1;32m)\u{1b}[0m\u{1b}[1;33m$\u{1b}[0m", - ignore_case_flag, left_parenthesis, self.ast.to_string() - ) - } else { - format!( - "{}^{}{})$", - ignore_case_flag, - left_parenthesis, - self.ast.to_string() - ) - } + format!( + "{}{}{}{}", + ignore_case_flag, + caret, + if self.config.is_capturing_group_enabled() { + Component::CapturedParenthesizedExpression(self.ast.to_string()) + .to_repr(self.config.is_output_colorized) + } else { + Component::UncapturedParenthesizedExpression(self.ast.to_string()) + .to_repr(self.config.is_output_colorized) + }, + dollar_sign + ) } _ => { - if self.config.is_output_colorized { - format!( - "\u{1b}[40;93m{}\u{1b}[0m\u{1b}[1;33m^\u{1b}[0m{}\u{1b}[1;33m$\u{1b}[0m", - ignore_case_flag, - self.ast.to_string() - ) - } else { - format!("{}^{}$", ignore_case_flag, self.ast.to_string()) - } + format!( + "{}{}{}{}", + ignore_case_flag, + caret, + self.ast.to_string(), + dollar_sign + ) } }; @@ -135,76 +125,72 @@ impl Display for RegExp { } if self.config.is_verbose_mode_enabled { - let colored_flag = if self.config.is_output_colorized { - format!("\u{1b}[40;93m{}\u{1b}[0m", verbose_mode_flag) - } else { - verbose_mode_flag.to_string() - }; - write!(f, "{}", apply_verbose_mode(regexp, colored_flag)) + write!(f, "{}", apply_verbose_mode(regexp, &self.config)) } else { write!(f, "{}", regexp) } } } -fn apply_verbose_mode(regexp: String, verbose_mode_flag: String) -> String { +fn apply_verbose_mode(regexp: String, config: &RegExpConfig) -> String { lazy_static! { + static ref ASTERISK: String = Component::Asterisk.to_colored_string(true); + static ref LEFT_BRACKET: String = Component::LeftBracket.to_colored_string(true); + static ref QUESTION_MARK: String = Component::QuestionMark.to_colored_string(true); + static ref REPETITION: String = Component::Repetition(0).to_colored_string(true); + static ref REPETITION_RANGE: String = + Component::RepetitionRange(0, 0).to_colored_string(true); + static ref RIGHT_BRACKET: String = Component::RightBracket.to_colored_string(true); + static ref INDENTATION_REVERSAL_REGEX_ONE: Regex = Regex::new(&format!( + "\n\\s+(?P{}|{}|{}|{}|{})", + *ASTERISK, *QUESTION_MARK, *REPETITION, *REPETITION_RANGE, *RIGHT_BRACKET + )) + .unwrap(); + static ref INDENTATION_REVERSAL_REGEX_TWO: Regex = Regex::new(&format!( + "(?P{}|{})\n\\s+", + *LEFT_BRACKET, *RIGHT_BRACKET + )) + .unwrap(); + static ref INDENTATION_REVERSAL_REGEX_THREE: Regex = + Regex::new(r"(?P\[[^\]]+\])\n\s+(?P[^\)\s]+)").unwrap(); + static ref COLOR_MODE_REGEX: Regex = + Regex::new(r"\u{1b}\[\d+;\d+m[^\u{1b}]+\u{1b}\[0m|[^\u{1b}]+").unwrap(); static ref VERBOSE_MODE_REGEX: Regex = Regex::new( r#"(?x) - ( - (?: - \u{1b}\[ - (?: - 1;31m # red bold - | - 1;35m # purple bold - | - 1;33m # yellow bold - | - 1;32m # green bold - | - 1;36m # cyan bold - | - 104;37m # white on bright blue - | - 40;93m # bright yellow on black - | - 103;30m # black on bright yellow - | - 0m # color reset - ) - )* - (?: - \(\?i\) - | - \( (?: \?: )? - | - \[.*\] - | - \) - (?: - \? - | - \{ \d+ (?: ,\d+ )? \} - ) - | - (?: - (?: \\[\^$()|DdSsWw\\\ ] )+ - (?: \\* [^\^$|()\\] )* - )+ - | - (?: - (?: \\* [^\^$()|\\] )+ - (?: \\[\^$()|DdSsWw\\\ ] )* - )+ - ) - ) + \(\?i\) + | + \[[^\]]+\] + | + \( (?: \?: )? + | + \) (?: \? | \{ \d+ (?: ,\d+ )? \} )? + | + [\^|$] + | + (?: + (?: \\[\^$()|DdSsWw\\\ ] )+ + (?: \\* [^\^$|()\\] )* + )+ + | + (?: + (?: \\* [^\^$()|\\] )+ + (?: \\[\^$()|DdSsWw\\\ ] )* + )+ "# ) .unwrap(); } - let regexp_with_static_replacements = regexp + let verbose_mode_flag = if config.is_case_insensitive_matching() { + Component::IgnoreCaseAndVerboseModeFlag.to_repr(config.is_output_colorized) + } else { + Component::VerboseModeFlag.to_repr(config.is_output_colorized) + }; + + let mut verbose_regexp = vec![verbose_mode_flag]; + let mut nesting_level = 0; + + let regexp_with_replacements = regexp .replace("(?i)", "") .replace("#", "\\#") .replace(" ", "\\s") @@ -218,40 +204,59 @@ fn apply_verbose_mode(regexp: String, verbose_mode_flag: String) -> String { .replace("\u{2028}", "\\s") .replace(" ", "\\ "); - let regexp_with_dynamic_replacements = VERBOSE_MODE_REGEX - .replace_all(®exp_with_static_replacements, "\n$1\n") - .to_string() - .replace("]\n\n", "]") - .replace("\n^", "^") - .replace("\n$\n", "\n$") - .replace("\n)$", "\n)\n$") - .replace("^$", "^\n$") - .replace(")\n\u{1b}[0m\u{1b}[1;35m?", ")\u{1b}[0m\u{1b}[1;35m?") - .replace(")\n\u{1b}[0m\u{1b}[1;33m\n$", ")\n\u{1b}[0m\u{1b}[1;33m$") - .replace(")\n\u{1b}[0m\u{1b}[104;37m{", ")\u{1b}[0m\u{1b}[104;37m{") - .replace( - "\u{1b}[1;33m^\n\u{1b}[0m\u{1b}[1;33m\n$\u{1b}[0m", - "\u{1b}[1;33m^\n\u{1b}[0m\u{1b}[1;33m$\u{1b}[0m", - ); + if config.is_output_colorized { + for regexp_match in COLOR_MODE_REGEX.find_iter(®exp_with_replacements) { + let element = regexp_match.as_str(); + if element.is_empty() { + continue; + } - let mut verbose_regexp = vec![verbose_mode_flag]; - let mut nesting_level = 0; + let is_colored_element = element.starts_with("\u{1b}["); + if is_colored_element && (element.contains('$') || element.contains(')')) { + nesting_level -= 1; + } - for line in regexp_with_dynamic_replacements.lines() { - if line.is_empty() { - continue; - } - if line == "$" || line.ends_with("$\u{1b}[0m") || line.starts_with(')') { - nesting_level -= 1; + let indentation = " ".repeat(nesting_level); + verbose_regexp.push(format!("{}{}", indentation, element)); + + if is_colored_element && (element.contains('^') || element.contains('(')) { + nesting_level += 1; + } } - let indentation = " ".repeat(nesting_level); - verbose_regexp.push(format!("{}{}", indentation, line)); + let joined_regexp = verbose_regexp.join("\n"); + let mut joined_regexp_with_replacements = INDENTATION_REVERSAL_REGEX_ONE + .replace_all(&joined_regexp, "$component") + .to_string(); - if line.ends_with('^') || line.ends_with("(?:") || line.ends_with('(') { - nesting_level += 1; + joined_regexp_with_replacements = INDENTATION_REVERSAL_REGEX_TWO + .replace_all(&joined_regexp_with_replacements, "$component") + .to_string(); + + joined_regexp_with_replacements + } else { + for regexp_match in VERBOSE_MODE_REGEX.find_iter(®exp_with_replacements) { + let element = regexp_match.as_str(); + if element.is_empty() { + continue; + } + if element == "$" || element.starts_with(')') { + nesting_level -= 1; + } + let indentation = " ".repeat(nesting_level); + verbose_regexp.push(format!("{}{}", indentation, element)); + + if element == "^" || element.starts_with('(') { + nesting_level += 1; + } } - } - verbose_regexp.join("\n") + let joined_regexp = verbose_regexp.join("\n"); + + let joined_regexp_with_replacements = INDENTATION_REVERSAL_REGEX_THREE + .replace_all(&joined_regexp, "$component1$component2") + .to_string(); + + joined_regexp_with_replacements + } } diff --git a/tests/lib_integration_tests.rs b/tests/lib_integration_tests.rs index 175aa99..6706876 100644 --- a/tests/lib_integration_tests.rs +++ b/tests/lib_integration_tests.rs @@ -530,6 +530,19 @@ mod no_conversion { [ac] ) $"# + )), + case(vec!["My ♥♥♥ is yours.", "My 💩💩 is yours."], indoc!( + r#" + (?x) + ^ + My\ + (?: + 💩{2} + | + ♥{3} + ) + \ is\ yours\. + $"# )) )] fn succeeds_with_verbose_mode_option(test_cases: Vec<&str>, expected_output: &str) {