From 2f1d41687762519e7776685246087a9cf390c1f7 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Fri, 12 Sep 2025 11:13:47 +1200
Subject: [PATCH] [FileFormats.LP] fix handling of required new lines

---
 src/FileFormats/LP/read.jl | 86 ++++++++++++++++++++++++++------------
 test/FileFormats/LP/LP.jl  | 80 ++++++++++++++++++++++++++++++++++-
 2 files changed, 139 insertions(+), 27 deletions(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index 950d6561b6..f778df8cee 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -29,6 +29,13 @@ struct _ReadCache{T}
     end
 end
 
+function _read_newline_or_eof(state)
+    if (p = peek(state, _Token)) !== nothing
+        _ = read(state, _Token, _TOKEN_NEWLINE)
+    end
+    return
+end
+
 """
     Base.read!(io::IO, model::FileFormats.LP.Model)
 
@@ -53,10 +60,9 @@ function Base.read!(io::IO, model::Model{T}) where {T}
         if token.kind == _TOKEN_KEYWORD
             _ = read(state, _Token)
             keyword = Symbol(token.value)
-            continue
+            _read_newline_or_eof(state)
         elseif token.kind == _TOKEN_NEWLINE
-            _ = read(state, _Token)
-            continue
+            _ = read(state, _Token, _TOKEN_NEWLINE)
         elseif keyword == :MINIMIZE
             MOI.set(cache.model, MOI.ObjectiveSense(), MOI.MIN_SENSE)
             _parse_objective(state, cache)
@@ -347,6 +353,15 @@ end
 
 _is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-')
 
+# We want an efficient way to check if `test.value` is a case-insensitive
+# version of `target`. Thsi is run for every identifier, so it needs to be fast.
+function _compare_case_insenstive(test::_Token, target::String)
+    if test.kind != _TOKEN_IDENTIFIER || length(test.value) != length(target)
+        return false
+    end
+    return all(lowercase(a) == b for (a, b) in zip(test.value, target))
+end
+
 function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
     @assert n >= 1
     while length(state.peek_tokens) < n
@@ -355,6 +370,23 @@ function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
             return nothing
         end
         push!(state.peek_tokens, token)
+        if _compare_case_insenstive(token, "subject")
+            t = _peek_inner(state)
+            if _compare_case_insenstive(t, "to")
+                state.peek_tokens[end] =
+                    _Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos)
+            else
+                push!(state.peek_tokens, t)
+            end
+        elseif _compare_case_insenstive(token, "such")
+            t = _peek_inner(state)
+            if _compare_case_insenstive(t, "that")
+                state.peek_tokens[end] =
+                    _Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos)
+            else
+                push!(state.peek_tokens, t)
+            end
+        end
     end
     return state.peek_tokens[n]
 end
@@ -369,7 +401,8 @@ function _peek_inner(state::_LexerState)
         elseif isspace(c)  # Whitespace
             _ = read(state, Char)
         elseif c == '\\'  # Comment: backslash until newline
-            while (c = read(state, Char)) !== nothing && c != '\n'
+            while (c = peek(state, Char)) !== nothing && c != '\n'
+                _ = read(state, Char)
             end
         elseif isdigit(c) || (c == '-' && isdigit(peek(state, Char))) # Number
             buf = IOBuffer()
@@ -385,21 +418,7 @@ function _peek_inner(state::_LexerState)
                 _ = read(state, Char)
             end
             val = String(take!(buf))
-            l_val = lowercase(val)
-            if l_val == "subject"
-                t = peek(state, _Token)
-                if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "to"
-                    _ = read(state, _Token)  # Skip "to"
-                    return _Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos)
-                end
-            elseif l_val == "such"
-                t = peek(state, _Token)
-                if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "that"
-                    _ = read(state, _Token)  # Skip "such"
-                    return _Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos)
-                end
-            end
-            if (kw = get(_KEYWORDS, l_val, nothing)) !== nothing
+            if (kw = get(_KEYWORDS, lowercase(val), nothing)) !== nothing
                 return _Token(_TOKEN_KEYWORD, string(kw), pos)
             end
             return _Token(_TOKEN_IDENTIFIER, val, pos)
@@ -579,7 +598,12 @@ function _parse_quad_expression(
             )
         end
     end
-    _skip_newlines(state)
+    while _next_token_is(state, _TOKEN_NEWLINE)
+        if _next_token_is(state, _TOKEN_KEYWORD, 2)
+            break
+        end
+        _ = read(state, _Token, _TOKEN_NEWLINE)
+    end
     if _next_token_is(state, _TOKEN_DIVISION)
         _ = read(state, _Token) # /
         # Must be /2
@@ -691,6 +715,9 @@ function _parse_expression(state::_LexerState, cache::_ReadCache{T}) where {T}
             p = read(state, _Token)
             _add_to_expression!(f, _parse_term(state, cache, -one(T)))
         elseif p.kind == _TOKEN_NEWLINE
+            if _next_token_is(state, _TOKEN_KEYWORD, 2)
+                break
+            end
             _ = read(state, _Token)
         else
             break
@@ -782,6 +809,7 @@ function _parse_objective(state::_LexerState, cache::_ReadCache)
     end
     f = _parse_expression(state, cache)
     MOI.set(cache.model, MOI.ObjectiveFunction{typeof(f)}(), f)
+    _read_newline_or_eof(state)
     return
 end
 
@@ -828,6 +856,7 @@ function _parse_bound(state, cache)
         x = _parse_variable(state, cache)
         set = _parse_set_suffix(state, cache)
         _add_bound(cache, x, set)
+        _read_newline_or_eof(state)
         return
     end
     # `a op x` or `a op x op b`
@@ -842,6 +871,7 @@ function _parse_bound(state, cache)
         rhs_set = _parse_set_suffix(state, cache)
         _add_bound(cache, x, rhs_set)
     end
+    _read_newline_or_eof(state)
     return
 end
 
@@ -852,10 +882,11 @@ function _is_sos_constraint(state)
 end
 
 # SOS_CONSTRAINT :=
-#   [NAME] S1:: (IDENTIFIER:NUMBER)+ \n
-#   | [NAME] S2:: (IDENTIFIER:NUMBER)+ \n
+#   [NAME] S1:: (IDENTIFIER:NUMBER)+
+#   | [NAME] S2:: (IDENTIFIER:NUMBER)+
 #
-# The newline character is required.
+# New lines are not supported within the line.
+# Terminating new lines are handled in _parse_constraint
 function _parse_sos_constraint(
     state::_LexerState,
     cache::_ReadCache{T},
@@ -904,6 +935,8 @@ end
 # INDICATOR_CONSTRAINT :=
 #   IDENTIFIER "=" "0" "->" EXPRESSION SET_SUFFIX
 #   | IDENTIFIER "=" "1" "->" EXPRESSION SET_SUFFIX
+#
+# Terminating new lines are handled in _parse_constraint
 function _parse_indicator_constraint(
     state::_LexerState,
     cache::_ReadCache{T},
@@ -929,9 +962,9 @@ function _parse_indicator_constraint(
 end
 
 # CONSTRAINT :=
-#   [NAME] EXPRESSION SET_SUFFIX
-#   | [NAME] SOS_CONSTRAINT
-#   | [NAME] INDICATOR_CONSTRAINT
+#   [NAME] EXPRESSION SET_SUFFIX \n
+#   | [NAME] SOS_CONSTRAINT \n
+#   | [NAME] INDICATOR_CONSTRAINT \n
 function _parse_constraint(state::_LexerState, cache::_ReadCache)
     name = _parse_optional_name(state, cache)
     # Check if this is an SOS constraint
@@ -947,5 +980,6 @@ function _parse_constraint(state::_LexerState, cache::_ReadCache)
     if name !== nothing
         MOI.set(cache.model, MOI.ConstraintName(), c, name)
     end
+    _read_newline_or_eof(state)
     return
 end
diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl
index cb3beae1c1..1dc33db5f9 100644
--- a/test/FileFormats/LP/LP.jl
+++ b/test/FileFormats/LP/LP.jl
@@ -589,7 +589,7 @@ function test_read_objective_sense()
     )
     for (sense, result) in cases
         model = LP.Model()
-        io = IOBuffer("$sense x")
+        io = IOBuffer("$sense\nx")
         seekstart(io)
         read!(io, model)
         @test MOI.get(model, MOI.ObjectiveSense()) == result
@@ -1476,6 +1476,84 @@ function test_parse_set_sufffix()
     return
 end
 
+function test_new_line_edge_cases()
+    target = "minimize\nobj: 1 x + 1 y\nsubject to\nc: 1 x <= 1\nBounds\nx >= 0\ny >= 0\nEnd\n"
+    for input in [
+        # Good
+        "minimize\nobj: x + y\nsubject to\nc: x <= 1\nend",
+        # Blank lines at the start
+        "\n\nminimize\nobj: x + y\nsubject to\nc: x <= 1\nend",
+        # Blank lines after minimize
+        "minimize\n\n\nobj: x + y\nsubject to\nc: x <= 1\nend",
+        # Blank lines between obj: and expression
+        "minimize\nobj:\n\nx + y\nsubject to\nc: x <= 1\nend",
+        # Blank lines throughout objective expression
+        "minimize\nobj:\nx \n+ \ny\nsubject to\nc: x <= 1\nend",
+        # Blank lines after objective expression
+        "minimize\nobj: x + y\n\nsubject to\nc: x <= 1\nend",
+        # Blank lines after subject to
+        "minimize\nobj: x + y\nsubject to\n\n\nc: x <= 1\nend",
+        # Blank lines in constraint
+        "minimize\nobj: x + y\nsubject to\nc:\nx\n<=\n1\nend",
+        # Blank lines around end
+        "minimize\nobj: x + y\nsubject to\nc: x <= 1\n\nend\n\n\n",
+        # Comment before newline
+        "minimize\\comment\nobj: x + y\nsubject to\nc: x <= 1\nend",
+        "minimize\nobj: x + y\\comment\nsubject to\nc: x <= 1\nend",
+        "minimize\nobj: x + y\nsubject to\\comment\nc: x <= 1\nend",
+        "minimize\nobj: x + y\nsubject to\nc: x <= 1\\comment\nend",
+        "minimize\nobj: x + y\nsubject to\nc: x <= 1\nend\\comment",
+    ]
+        io = IOBuffer(input)
+        seekstart(io)
+        model = LP.Model()
+        MOI.read!(io, model)
+        @test sprint(MOI.write, model) == target
+    end
+    return
+end
+
+function test_new_line_edge_cases_sos()
+    target = "minimize\nobj: 1 x + 1 y\nsubject to\nBounds\nx >= 0\ny >= 0\nSOS\nc: S1:: x:1.0 y:2.0\nEnd\n"
+    for input in [
+        "minimize\nobj: x + y\nsubject to\nc: S1:: x:1 y:2\nend",
+        "minimize\nobj: x + y\nsubject to\nc: S1:: x:1 y:2\n\nend",
+        "minimize\nobj: x + y\nsubject to\n\nc: S1:: x:1 y:2\n\nend",
+        "minimize\nobj: x + y\nsubject to\n\nc: S1:: x:1 y:2\\comment\nend",
+    ]
+        io = IOBuffer(input)
+        seekstart(io)
+        model = LP.Model()
+        MOI.read!(io, model)
+        @test sprint(MOI.write, model) == target
+    end
+    return
+end
+
+function test_new_line_edge_case_fails()
+    for input in [
+        # No newline between objective sense and objective
+        "minimize x",
+        "maximize x",
+        "maximize c: x",
+        # No new line between objective and subject to
+        "maximize\nobj: x subject to",
+        # No new line between subject to and constraint
+        "maximize\nobj: x\nsubject to c: x >= 0",
+        # No new line between multiple constraints
+        "maximize\nobj: x\nsubject to\nc: x >= 0 x <= 1",
+        # New lines in bounds section
+        "maximize\nobj: x\nsubject to\nbounds x >= 0\bx <= 1",
+        "maximize\nobj: x\nsubject to\nbounds\nx >= 0 x <= 1",
+    ]
+        io = IOBuffer(input)
+        seekstart(io)
+        model = LP.Model()
+        @test_throws LP.ParseError MOI.read!(io, model)
+    end
+    return
+end
+
 end  # module
 
 TestLP.runtests()