From ffd1b4ec6340264a53ec4545c033dc2eda31bcfa Mon Sep 17 00:00:00 2001
From: Daniel Burgener <daniel.burgener@microsoft.com>
Date: Fri, 17 May 2024 10:02:34 -0400
Subject: [PATCH] Detect deep recursion when expanding macros and error out
 (#896)

* Detect deep recursion when expanding macros and error out

* Add a config knob for max recursion depth

* Just unwrap on unreachable block

* Add documentation comment to macro_recursion_limit definition
---
 lalrpop/src/api/mod.rs                     |  6 ++++
 lalrpop/src/normalize/macro_expand/mod.rs  | 22 +++++++++++---
 lalrpop/src/normalize/macro_expand/test.rs | 35 ++++++++++++++++++++--
 lalrpop/src/normalize/mod.rs               |  2 +-
 lalrpop/src/normalize/tyinfer/test.rs      |  8 ++---
 lalrpop/src/session.rs                     |  6 ++++
 6 files changed, 67 insertions(+), 12 deletions(-)
diff --git a/lalrpop/src/api/mod.rs b/lalrpop/src/api/mod.rs
index 40c795637..684915038 100644
--- a/lalrpop/src/api/mod.rs
+++ b/lalrpop/src/api/mod.rs
@@ -158,6 +158,12 @@ impl Configuration {
         self
     }
 
+    /// Set the max macro recursion depth.  As lalrpop is resolving a macro, it may discover new macros uses in the macro definition to resolve.  Typically deep recursion indicates a recursive macro use that is non-resolvable.  The default resolution depth is 200.
+    pub fn set_macro_recursion_limit(&mut self, val: u16) -> &mut Configuration {
+        self.session.macro_recursion_limit = val;
+        self
+    }
+
     /// Sets the features used during compilation, disables the use of cargo features.
     /// (Default: Loaded from `CARGO_FEATURE_{}` environment variables).
     pub fn set_features<I>(&mut self, iterable: I) -> &mut Configuration
diff --git a/lalrpop/src/normalize/macro_expand/mod.rs b/lalrpop/src/normalize/macro_expand/mod.rs
index d8090d8d0..d9d7dad1a 100644
--- a/lalrpop/src/normalize/macro_expand/mod.rs
+++ b/lalrpop/src/normalize/macro_expand/mod.rs
@@ -15,7 +15,7 @@ use string_cache::DefaultAtom as Atom;
 #[cfg(test)]
 mod test;
 
-pub fn expand_macros(input: Grammar) -> NormResult<Grammar> {
+pub fn expand_macros(input: Grammar, recursion_limit: u16) -> NormResult<Grammar> {
     let input = resolve::resolve(input)?;
 
     let items = input.items;
@@ -32,7 +32,7 @@ pub fn expand_macros(input: Grammar) -> NormResult<Grammar> {
         .collect();
 
     let mut expander = MacroExpander::new(macro_defs);
-    expander.expand(&mut items)?;
+    expander.expand(&mut items, recursion_limit)?;
 
     Ok(Grammar { items, ..input })
 }
@@ -52,8 +52,10 @@ impl MacroExpander {
         }
     }
 
-    fn expand(&mut self, items: &mut Vec<GrammarItem>) -> NormResult<()> {
-        let mut counter = 0;
+    fn expand(&mut self, items: &mut Vec<GrammarItem>, recursion_limit: u16) -> NormResult<()> {
+        let mut counter = 0; // Number of items
+        let mut loop_counter = 0;
+
         loop {
             // Find any macro uses in items added since last round and
             // replace them in place with the expanded version:
@@ -61,12 +63,24 @@ impl MacroExpander {
                 self.replace_item(item);
             }
             counter = items.len();
+            loop_counter += 1;
 
             // No more expansion to do.
             if self.expansion_stack.is_empty() {
                 return Ok(());
             }
 
+            if loop_counter > recursion_limit {
+                // Too much recursion
+                // We know unwrap() is safe, because we just checked is_empty()
+                let sym = self.expansion_stack.pop().unwrap();
+                return_err!(
+                            sym.span,
+                            "Exceeded recursion cap ({}) while expanding this macro.  This typically is a symptom of infinite recursion during macro resolution.  If you believe the recursion will complete eventually, you can increase this limit using Configuration::set_macro_recursion_limit().",
+                            recursion_limit
+                        );
+            }
+
             // Drain expansion stack:
             while let Some(sym) = self.expansion_stack.pop() {
                 match sym.kind {
diff --git a/lalrpop/src/normalize/macro_expand/test.rs b/lalrpop/src/normalize/macro_expand/test.rs
index 72371f0d8..adc54a5ed 100644
--- a/lalrpop/src/normalize/macro_expand/test.rs
+++ b/lalrpop/src/normalize/macro_expand/test.rs
@@ -17,7 +17,7 @@ grammar;
     )
     .unwrap();
 
-    let actual = expand_macros(grammar).unwrap();
+    let actual = expand_macros(grammar, 20).unwrap();
 
     let expected = parser::parse_grammar(
         r##"
@@ -74,7 +74,7 @@ grammar;
     )
     .unwrap();
 
-    let actual = expand_macros(grammar).unwrap();
+    let actual = expand_macros(grammar, 20).unwrap();
 
     let expected = parser::parse_grammar(
         r#"
@@ -103,7 +103,7 @@ fn test_lookahead() {
     )
     .unwrap();
 
-    let actual = expand_macros(grammar).unwrap();
+    let actual = expand_macros(grammar, 20).unwrap();
 
     let expected = parser::parse_grammar(
         r#"
@@ -116,3 +116,32 @@ fn test_lookahead() {
 
     compare(actual, expected);
 }
+
+#[test]
+fn test_excessive_recursion() {
+    let grammar = parser::parse_grammar(
+        r#"
+        grammar;
+        A<I> = { "x" I "y" I "z", A<("." I)> }
+        pub P = A<()>;
+        "#,
+    )
+    .unwrap();
+
+    assert!(expand_macros(grammar, 20).is_err());
+
+    let grammar2 = parser::parse_grammar(
+        r#"
+         grammar;
+         A<I> = { "a" B<("." I)> };
+         B<I> = { "b" C<("," I)> };
+         C<I> = { "c" I };
+         pub D = A<"d"> B<"d"> C<"d">;
+         "#,
+    )
+    .unwrap();
+
+    assert!(expand_macros(grammar2.clone(), 2).is_err());
+
+    assert!(expand_macros(grammar2, 3).is_ok());
+}
diff --git a/lalrpop/src/normalize/mod.rs b/lalrpop/src/normalize/mod.rs
index 3d5ca7e70..6c3f5f361 100644
--- a/lalrpop/src/normalize/mod.rs
+++ b/lalrpop/src/normalize/mod.rs
@@ -60,7 +60,7 @@ fn lower_helper(session: &Session, grammar: pt::Grammar, validate: bool) -> Norm
     let grammar = profile!(
         session,
         "Macro expansion",
-        macro_expand::expand_macros(grammar)?
+        macro_expand::expand_macros(grammar, session.macro_recursion_limit)?
     );
     let grammar = profile!(session, "Token check", token_check::validate(grammar)?);
     let types = profile!(session, "Infer types", tyinfer::infer_types(&grammar)?);
diff --git a/lalrpop/src/normalize/tyinfer/test.rs b/lalrpop/src/normalize/tyinfer/test.rs
index 5eff79b3e..c63eb00e7 100644
--- a/lalrpop/src/normalize/tyinfer/test.rs
+++ b/lalrpop/src/normalize/tyinfer/test.rs
@@ -13,7 +13,7 @@ fn type_repr(s: &str) -> TypeRepr {
 
 fn compare(g1: &str, expected: Vec<(&'static str, &'static str)>) {
     let grammar = parser::parse_grammar(g1).unwrap();
-    let grammar = expand_macros(grammar).unwrap();
+    let grammar = expand_macros(grammar, 20).unwrap();
     let grammar = token_check::validate(grammar).unwrap();
     let types = infer_types(&grammar).unwrap();
 
@@ -56,7 +56,7 @@ grammar;
     )
     .unwrap();
 
-    let actual = expand_macros(grammar).unwrap();
+    let actual = expand_macros(grammar, 20).unwrap();
     assert!(infer_types(&actual).is_err());
 }
 
@@ -74,7 +74,7 @@ grammar;
     )
     .unwrap();
 
-    let actual = expand_macros(grammar).unwrap();
+    let actual = expand_macros(grammar, 20).unwrap();
     assert!(infer_types(&actual).is_err());
 }
 
@@ -202,7 +202,7 @@ grammar;
     )
     .unwrap();
 
-    let actual = expand_macros(grammar).unwrap();
+    let actual = expand_macros(grammar, 20).unwrap();
     assert!(infer_types(&actual).is_err());
 }
 
diff --git a/lalrpop/src/session.rs b/lalrpop/src/session.rs
index b51b19d72..9b0f6e9f9 100644
--- a/lalrpop/src/session.rs
+++ b/lalrpop/src/session.rs
@@ -58,6 +58,10 @@ pub struct Session {
     /// this value if we so choose.
     pub max_errors: usize,
 
+    /// Limit of depth to discover macros needing resolution.  Ensures that compilation terminates
+    /// in a finite number of steps.
+    pub macro_recursion_limit: u16,
+
     // Styles to use when formatting error reports
     /// Applied to the heading in a message.
     pub heading: Style,
@@ -104,6 +108,7 @@ impl Session {
             emit_report: false,
             color_config: ColorConfig::default(),
             max_errors: 1,
+            macro_recursion_limit: 200,
             heading: style::FG_WHITE.with(style::BOLD),
             ambig_symbols: style::FG_WHITE,
             observed_symbols: style::FG_BRIGHT_GREEN,
@@ -131,6 +136,7 @@ impl Session {
             emit_report: false,
             color_config: ColorConfig::IfTty,
             max_errors: 1,
+            macro_recursion_limit: 200,
             heading: Style::new(),
             ambig_symbols: Style::new(),
             observed_symbols: Style::new(),