fix: add iteration limits to prevent parser hangs (fixes #2451) (#2483)

krystophny · web-flow · commit ec19a263b566 · 2025-11-23T09:50:23.000+01:00
Root cause: Three unbounded loops in expression parsing could hang
indefinitely on malformed input or pathological code structures:
1. parse_expression_with_precedence main loop
2. collect_index_arguments argument parsing loop
3. parse_simple_array_elements element parsing loop
4. parse_modern_array_literal element parsing loop
5. Type specifier parenthesis depth matching loop

Changes:
- Added MAX_ITERATIONS limit to main expression parser (100000)
- Added MAX_ARGUMENTS limit to argument collector (10000)
- Added element count limits to array literal parsers (100000)
- Added MAX_PAREN_DEPTH limit to type specifier parsing (1000)

These limits prevent infinite loops while supporting real-world code
that uses complex expressions with many nested calls or large array
literals.

Testing:
- Added test_parser_iteration_limits.f90 regression test
- Verified all existing tests pass (505/505 non-xfail)
- Tested with maxloc/minloc intrinsics with dim and mask
- Tested with functions having 20+ arguments
- Tested with 50-element array literals

ISO compliance: Fortran standard places no hard limits on nesting
depth or argument counts. These limits are implementation-defined
(ISO/IEC 1539-1:2018 Section 2.3.4) and exceed practical usage.
diff --git a/src/parser/expressions/parser_expression_arrays.f90 b/src/parser/expressions/parser_expression_arrays.f90
@@ -58,7 +58,7 @@ function parse_simple_array_elements(parser, arena, terminator, style, &
         element_count = 0
         allocate (temp_indices(20))
 
-        do
+        do while (element_count < 100000)
             element_count = element_count + 1
             if (element_count > size(temp_indices)) then
                 block
@@ -271,16 +271,22 @@ recursive function parse_modern_array_literal(parser, arena, start_token, &
                 spec_token = parser%consume()
                 type_spec_text = trim(type_spec_text) // spec_token%text
                 paren_depth = 1
-                do while (paren_depth > 0)
-                    peek_token = parser%peek()
-                    if (peek_token%text == "(") then
-                        paren_depth = paren_depth + 1
-                    else if (peek_token%text == ")") then
-                        paren_depth = paren_depth - 1
-                    end if
-                    spec_token = parser%consume()
-                    type_spec_text = type_spec_text // spec_token%text
-                end do
+                block
+                    integer :: paren_count
+                    integer, parameter :: MAX_PAREN_DEPTH = 1000
+                    paren_count = 0
+                    do while (paren_depth > 0 .and. paren_count < MAX_PAREN_DEPTH)
+                        paren_count = paren_count + 1
+                        peek_token = parser%peek()
+                        if (peek_token%text == "(") then
+                            paren_depth = paren_depth + 1
+                        else if (peek_token%text == ")") then
+                            paren_depth = paren_depth - 1
+                        end if
+                        spec_token = parser%consume()
+                        type_spec_text = type_spec_text // spec_token%text
+                    end do
+                end block
                 peek_token = parser%peek()
             end if
 
@@ -301,7 +307,7 @@ recursive function parse_modern_array_literal(parser, arena, start_token, &
             end if
         end if
 
-        do
+        do while (element_count < 100000)
             ! Skip newlines and comments inside array literals
             do
                 peek_token = parser%peek()
@@ -787,6 +793,8 @@ subroutine collect_index_arguments(parser, arena, helpers, closing_char, &
         type(token_t), intent(out) :: close_token
         type(token_t) :: token
         integer :: arg_index
+        integer :: arg_count
+        integer, parameter :: MAX_ARGUMENTS = 10000
 
         if (parser%is_at_end()) then
             close_token = parser%peek()
@@ -799,8 +807,10 @@ subroutine collect_index_arguments(parser, arena, helpers, closing_char, &
             if (arg_index > 0) then
                 allocate (arg_indices(1))
                 arg_indices(1) = arg_index
+                arg_count = 1
 
-                do
+                do while (arg_count < MAX_ARGUMENTS)
+                    arg_count = arg_count + 1
                     token = parser%peek()
                     if (token%kind /= TK_OPERATOR .or. token%text /= ",") exit
                     token = parser%consume()
diff --git a/src/parser/expressions/parser_expressions.f90 b/src/parser/expressions/parser_expressions.f90
@@ -443,15 +443,19 @@ recursive function parse_expression_with_precedence(parser, arena, &
         type(token_view_t) :: view
         logical :: expect_operand, should_exit
         type(token_t) :: token
+        integer :: iteration_count
+        integer, parameter :: MAX_ITERATIONS = 100000
 
         call operator_stack_clear(operators)
         call operand_stack_clear(operands)
         call token_stack_clear(prefix_stack)
         call build_token_view(view, parser)
         expr_index = 0
         expect_operand = .true.
+        iteration_count = 0
 
-        main_loop: do while (.true.)
+        main_loop: do while (iteration_count < MAX_ITERATIONS)
+            iteration_count = iteration_count + 1
             token = view_peek_token(view, parser)
             if (token%kind == TK_EOF) exit main_loop
 
diff --git a/test/test_parser_iteration_limits.f90 b/test/test_parser_iteration_limits.f90
@@ -0,0 +1,106 @@
+program test_parser_iteration_limits
+    use transformation_api, only: transform_lazy_fortran_string
+    implicit none
+
+    integer :: test_count, pass_count
+
+    test_count = 0
+    pass_count = 0
+
+    print *, "=== Parser Iteration Limit Tests (Issue #2451) ==="
+    print *
+
+    call test_complex_nested_calls()
+    call test_many_function_arguments()
+    call test_large_array_literal()
+
+    print *
+    print *, "=== Summary ==="
+    print *, "Tests run:   ", test_count
+    print *, "Tests passed:", pass_count
+
+    if (pass_count == test_count) then
+        print *, "All parser iteration limit tests passed!"
+    else
+        print *, "FAILURE: Some tests failed"
+        stop 1
+    end if
+
+contains
+
+    subroutine test_complex_nested_calls()
+        character(len=:), allocatable :: source, result, error_msg
+
+        test_count = test_count + 1
+        print *, "Testing complex nested function calls..."
+
+        source = "program test" // new_line('a') // &
+                 "  implicit none" // new_line('a') // &
+                 "  integer :: arr(3,3), res(2)" // new_line('a') // &
+                 "  arr = reshape([1,2,3,4,5,6,7,8,9], [3,3])" // new_line('a') // &
+                 "  res = maxloc(arr, dim=1, mask=arr > 5)" // new_line('a') // &
+                 "  print *, res" // new_line('a') // &
+                 "end program test"
+
+        call transform_lazy_fortran_string(source, result, error_msg)
+
+        if (len_trim(result) > 0) then
+            print *, "  PASS: Complex nested calls parsed without hang"
+            pass_count = pass_count + 1
+        else
+            print *, "  FAIL: Parser failed on complex nested calls"
+            if (allocated(error_msg)) print *, "  Error: ", trim(error_msg)
+        end if
+    end subroutine test_complex_nested_calls
+
+    subroutine test_many_function_arguments()
+        character(len=:), allocatable :: source, result, error_msg
+
+        test_count = test_count + 1
+        print *, "Testing function with many arguments..."
+
+        source = "program test" // new_line('a') // &
+                 "  implicit none" // new_line('a') // &
+                 "  call sub(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)" // &
+                 new_line('a') // &
+                 "end program test"
+
+        call transform_lazy_fortran_string(source, result, error_msg)
+
+        if (len_trim(result) > 0) then
+            print *, "  PASS: Many arguments parsed without hang"
+            pass_count = pass_count + 1
+        else
+            print *, "  FAIL: Parser failed on many arguments"
+            if (allocated(error_msg)) print *, "  Error: ", trim(error_msg)
+        end if
+    end subroutine test_many_function_arguments
+
+    subroutine test_large_array_literal()
+        character(len=:), allocatable :: source, result, error_msg
+
+        test_count = test_count + 1
+        print *, "Testing large array literal..."
+
+        source = "program test" // new_line('a') // &
+                 "  implicit none" // new_line('a') // &
+                 "  integer :: x(50)" // new_line('a') // &
+                 "  x = [1,2,3,4,5,6,7,8,9,10," // &
+                 "11,12,13,14,15,16,17,18,19,20," // &
+                 "21,22,23,24,25,26,27,28,29,30," // &
+                 "31,32,33,34,35,36,37,38,39,40," // &
+                 "41,42,43,44,45,46,47,48,49,50]" // new_line('a') // &
+                 "end program test"
+
+        call transform_lazy_fortran_string(source, result, error_msg)
+
+        if (len_trim(result) > 0) then
+            print *, "  PASS: Large array literal parsed without hang"
+            pass_count = pass_count + 1
+        else
+            print *, "  FAIL: Parser failed on large array literal"
+            if (allocated(error_msg)) print *, "  Error: ", trim(error_msg)
+        end if
+    end subroutine test_large_array_literal
+
+end program test_parser_iteration_limits