From 3d89c033e68e93ffeee81cb4b8c214cdc97354a1 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Mon, 15 Sep 2025 11:46:16 +1200
Subject: [PATCH 1/4] [FileFormats.LP] allow newline in term and improve
 keyword identification

---
 src/FileFormats/LP/read.jl | 123 +++++++++++++++++++++++++++----------
 test/FileFormats/LP/LP.jl  |  59 ++++++++++++++++++
 2 files changed, 149 insertions(+), 33 deletions(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index 0291d9e87c..6326c69264 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -280,13 +280,15 @@ A struct that is used to manage state when lexing. It stores:
    error message to the user on a parse error
  * `peek_char`: the next `Char` in the `io`
  * `peek_tokens`: the list of upcoming tokens that we have already peeked
+ * `current_token`: the most recent token that we have `read`
 """
 mutable struct _LexerState{O<:IO}
     io::O
     line::Int
     peek_char::Union{Nothing,Char}
     peek_tokens::Vector{_Token}
-    _LexerState(io::IO) = new{typeof(io)}(io, 1, nothing, _Token[])
+    current_token::Union{Nothing,_Token}
+    _LexerState(io::IO) = new{typeof(io)}(io, 1, nothing, _Token[], nothing)
 end
 
 """
@@ -351,6 +353,7 @@ function Base.read(state::_LexerState, ::Type{_Token})
         )
     end
     popfirst!(state.peek_tokens)
+    state.current_token = token
     return token
 end
 
@@ -371,6 +374,16 @@ end
 
 _is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-')
 
+_nothing_or_newline(::Nothing) = true
+_nothing_or_newline(t::_Token) = t.kind == _TOKEN_NEWLINE
+
+function _prior_token(state::_LexerState)
+    if length(state.peek_tokens) <= 1
+        return state.current_token
+    end
+    return state.peek_tokens[end-1]
+end
+
 function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
     @assert n >= 1
     while length(state.peek_tokens) < n
@@ -379,22 +392,47 @@ function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
             return nothing
         end
         push!(state.peek_tokens, token)
-        if _compare_case_insenstive(token, "subject")
+        if token.kind != _TOKEN_IDENTIFIER
+            continue
+        end
+        # Here we have a _TOKEN_IDENTIFIER. If it is preceeded by a
+        # _TOKEN_NEWLINE, it may be a _TOKEN_KEYWORD.
+        if !_nothing_or_newline(_prior_token(state))
+            continue  # It can't be a keyword
+        end
+        # It might be a _TOKEN_KEYWORD.
+        kw = _case_insenstive_identifier_to_keyword(token.value)
+        if kw !== nothing
+            # The token matches a single word keyword. All keywords are followed
+            # by a new line, or an EOF.
             t = _peek_inner(state)
-            if _compare_case_insenstive(t, "to")
-                state.peek_tokens[end] =
-                    _Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos)
-            else
-                push!(state.peek_tokens, t)
+            if _nothing_or_newline(t)
+                state.peek_tokens[end] = _Token(_TOKEN_KEYWORD, kw, token.pos)
             end
-        elseif _compare_case_insenstive(token, "such")
-            t = _peek_inner(state)
-            if _compare_case_insenstive(t, "that")
-                state.peek_tokens[end] =
-                    _Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos)
-            else
+            if t !== nothing
                 push!(state.peek_tokens, t)
             end
+            continue
+        end
+        for (a, b) in ["subject" => "to", "such" => "that"]
+            if _compare_case_insenstive(token, a)
+                # This _might_ be `subject to`, or it might just be a variable
+                # named `subject`, like `obj:\n subject\n`.
+                t = _peek_inner(state)
+                if t !== nothing
+                    t2 = _peek_inner(state)
+                    if _compare_case_insenstive(t, b) && _nothing_or_newline(t2)
+                        state.peek_tokens[end] =
+                        _Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos)
+                    else
+                        push!(state.peek_tokens, t)
+                    end
+                    if t2 !== nothing
+                        push!(state.peek_tokens, t2)
+                    end
+                end
+                continue
+            end
         end
     end
     return state.peek_tokens[n]
@@ -426,11 +464,7 @@ function _peek_inner(state::_LexerState)
                 write(buf, c)
                 _ = read(state, Char)
             end
-            val = String(take!(buf))
-            if (kw = _case_insenstive_identifier_to_keyword(val)) !== nothing
-                return _Token(_TOKEN_KEYWORD, kw, pos)
-            end
-            return _Token(_TOKEN_IDENTIFIER, val, pos)
+            return _Token(_TOKEN_IDENTIFIER, String(take!(buf)), pos)
         elseif (op = get(_OPERATORS, c, nothing)) !== nothing
             _ = read(state, Char) # Skip c
             if c == '-' && peek(state, Char) == '>'
@@ -473,6 +507,19 @@ function _skip_newlines(state::_LexerState)
     return
 end
 
+function _next_non_newline(state::_LexerState)
+    n = 1
+    while true
+        t = peek(state, _Token, n)
+        if t === nothing
+            return nothing
+        elseif t.kind != _TOKEN_NEWLINE
+            return t
+        end
+        n += 1
+    end
+end
+
 # IDENTIFIER := "string"
 #
 #   There _are_ rules to what an identifier can be. We handle these when lexing.
@@ -605,14 +652,10 @@ function _parse_quad_expression(
             )
         end
     end
-    while _next_token_is(state, _TOKEN_NEWLINE)
-        if _next_token_is(state, _TOKEN_KEYWORD, 2)
-            break
-        end
-        _ = read(state, _Token, _TOKEN_NEWLINE)
-    end
-    if _next_token_is(state, _TOKEN_DIVISION)
-        _ = read(state, _Token) # /
+    t = _next_non_newline(state)
+    if t.kind == _TOKEN_DIVISION
+        _skip_newlines(state)
+        _ = read(state, _Token, _TOKEN_DIVISION) # /
         # Must be /2
         n = read(state, _Token, _TOKEN_NUMBER)
         if n.value != "2"
@@ -634,10 +677,11 @@ function _parse_quad_expression(
 end
 
 # TERM :=
-#   "+" TERM
+#   [\n*] TERM
+#   | "+" TERM
 #   | "-" TERM
-#   | NUMBER
 #   | IDENTIFIER
+#   | NUMBER
 #   | NUMBER IDENTIFIER
 #   | NUMBER "*" IDENTIFIER
 #   | QUADRATIC_EXPRESSION
@@ -670,12 +714,25 @@ function _parse_term(
             _ = read(state, _Token, _TOKEN_MULTIPLICATION)
             x = _parse_variable(state, cache)
             return MOI.ScalarAffineTerm(coef, x)
-        elseif _next_token_is(state, _TOKEN_NEWLINE) ||
-               _next_token_is(state, _TOKEN_ADDITION) ||
-               _next_token_is(state, _TOKEN_SUBTRACTION)
-            # NUMBER
-            return coef
+        elseif _next_token_is(state, _TOKEN_NEWLINE)
+            # This could either be NUMBER \nEND-OF-TERM, or it could be a term
+            # split by a new line, like `2\nx`.
+            t = _next_non_newline(state)
+            if t.kind == _TOKEN_MULTIPLICATION
+                # NUMBER \n * [\n] IDENTIFIER
+                _skip_newlines(state)
+                _ = read(state, _Token, _TOKEN_MULTIPLICATION)
+                _skip_newlines(state)
+                x = _parse_variable(state, cache)
+                return MOI.ScalarAffineTerm(coef, x)
+            elseif t.kind == _TOKEN_IDENTIFIER
+                # NUMBER \n IDENTIFIER
+                x = _parse_variable(state, cache)
+                return MOI.ScalarAffineTerm(coef, x)
+            end
         end
+        # NUMBER
+        return coef
     elseif _next_token_is(state, _TOKEN_OPEN_BRACKET)
         # QUADRATIC_EXPRESSION
         return _parse_quad_expression(state, cache, prefix)
diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl
index 1dc33db5f9..a0b9a0b619 100644
--- a/test/FileFormats/LP/LP.jl
+++ b/test/FileFormats/LP/LP.jl
@@ -1554,6 +1554,65 @@ function test_new_line_edge_case_fails()
     return
 end
 
+function test_parse_keyword_edge_cases_identifier_is_keyword()
+    for name in ["max", "min", "st", "such", "bounds", "obj", "free"]
+        io = IOBuffer("""
+        maximize
+        obj: $name
+        subject to
+        $name <= 1
+        bounds
+        $name free
+        end
+        """)
+        seekstart(io)
+        model = LP.Model()
+        MOI.read!(io, model)
+        x = only(MOI.get(model, MOI.ListOfVariableIndices()))
+        @test MOI.get(model, MOI.VariableName(), x) == name
+    end
+    return
+end
+
+function test_parse_keyword_subject_to_errors()
+    for line in ["subject", "subject too", "subject to a:"]
+        io = IOBuffer("""
+        maximize
+        obj: x
+        $line
+        x <= 1
+        bounds
+        x free
+        end
+        """)
+        seekstart(io)
+        model = LP.Model()
+        @test_throws LP.ParseError MOI.read!(io, model)
+    end
+    return
+end
+
+function test_parse_keyword_subject_to_errors()
+    for obj in ["2 x", "\n2 x", "2\nx", "2*\nx", "2\n*x", "2\n\n*\n\n\nx\n"]
+        io = IOBuffer("""
+        maximize
+        obj: $obj
+        subject to
+        bounds
+        x free
+        end
+        """)
+        seekstart(io)
+        model = LP.Model()
+        MOI.read!(io, model)
+        x = MOI.get(model, MOI.VariableIndex, "x")
+        f = 2.0 * x
+        g = MOI.get(model, MOI.ObjectiveFunction{typeof(f)}())
+        @test isapprox(f, g)
+    end
+    return
+end
+
 end  # module
 
 TestLP.runtests()

From 002fd04d497799a819f99b6dab1f0bdb2c825535 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Mon, 15 Sep 2025 12:08:11 +1200
Subject: [PATCH 2/4] Update

---
 test/FileFormats/LP/LP.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl
index a0b9a0b619..dc1db045a5 100644
--- a/test/FileFormats/LP/LP.jl
+++ b/test/FileFormats/LP/LP.jl
@@ -1592,7 +1592,7 @@ function test_parse_keyword_subject_to_errors()
     return
 end
 
-function test_parse_keyword_subject_to_errors()
+function test_parse_newline_in_objective_expression()
     for obj in ["2 x", "\n2 x", "2\nx", "2*\nx", "2\n*x", "2\n\n*\n\n\nx\n"]
         io = IOBuffer("""
         maximize

From 0cbd79ee81ba085c4f6dedacf9e37ce44523718b Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Mon, 15 Sep 2025 12:18:43 +1200
Subject: [PATCH 3/4] Update

---
 src/FileFormats/LP/read.jl | 53 +++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index 6326c69264..96b852974d 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -395,13 +395,13 @@ function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
         if token.kind != _TOKEN_IDENTIFIER
             continue
         end
-        # Here we have a _TOKEN_IDENTIFIER. If it is preceeded by a
-        # _TOKEN_NEWLINE, it may be a _TOKEN_KEYWORD.
+        # Here we have a _TOKEN_IDENTIFIER. But if it is not preceeded by a
+        # _TOKEN_NEWLINE, it cannot be a _TOKEN_KEYWORD.
         if !_nothing_or_newline(_prior_token(state))
-            continue  # It can't be a keyword
+            continue
         end
         # It might be a _TOKEN_KEYWORD.
-        kw = _case_insenstive_identifier_to_keyword(token.value)
+        (kw = _case_insenstive_identifier_to_keyword(token.value))
         if kw !== nothing
             # The token matches a single word keyword. All keywords are followed
             # by a new line, or an EOF.
@@ -414,25 +414,36 @@ function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1)
             end
             continue
         end
-        for (a, b) in ["subject" => "to", "such" => "that"]
-            if _compare_case_insenstive(token, a)
-                # This _might_ be `subject to`, or it might just be a variable
-                # named `subject`, like `obj:\n subject\n`.
-                t = _peek_inner(state)
-                if t !== nothing
-                    t2 = _peek_inner(state)
-                    if _compare_case_insenstive(t, b) && _nothing_or_newline(t2)
-                        state.peek_tokens[end] =
-                        _Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos)
-                    else
-                        push!(state.peek_tokens, t)
-                    end
-                    if t2 !== nothing
-                        push!(state.peek_tokens, t2)
-                    end
-                end
+        # There are two keyword that contain whitespace: `subject to` and
+        # `such that`
+        for (a, b) in ("subject" => "to", "such" => "that")
+            if !_compare_case_insenstive(token, a)
                 continue
             end
+            # This _might_ be `subject to`, or it might just be a variable
+            # named `subject`, like `obj:\n subject\n`.
+            token_b = _peek_inner(state)
+            if token_b === nothing
+                # The next token is EOF. Nothing to do here.
+                break
+            elseif !_compare_case_insenstive(token_b, b)
+                # The second token doesn't match. Store `token_b` and break
+                push!(state.peek_tokens, token_b)
+                break
+            end
+            # We have something that matches (a, b), but a TOKEN_KEYWORD needs
+            # to be followed by a new line.
+            token_nl = _peek_inner(state)
+            if _nothing_or_newline(token_nl)
+                state.peek_tokens[end] =
+                    _Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos)
+            else
+                push!(state.peek_tokens, token_b)
+            end
+            if token_nl !== nothing
+                push!(state.peek_tokens, token_nl)
+            end
+            break
         end
     end
     return state.peek_tokens[n]

From 0d1ba918cd0bba411a5f53141d178f6977d39bf5 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Mon, 15 Sep 2025 13:13:19 +1200
Subject: [PATCH 4/4] Update

---
 src/FileFormats/LP/read.jl |  7 +++++--
 test/FileFormats/LP/LP.jl  | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index 96b852974d..f3101ca8d8 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -664,7 +664,7 @@ function _parse_quad_expression(
         end
     end
     t = _next_non_newline(state)
-    if t.kind == _TOKEN_DIVISION
+    if t !== nothing && t.kind == _TOKEN_DIVISION
         _skip_newlines(state)
         _ = read(state, _Token, _TOKEN_DIVISION) # /
         # Must be /2
@@ -729,7 +729,10 @@ function _parse_term(
             # This could either be NUMBER \nEND-OF-TERM, or it could be a term
             # split by a new line, like `2\nx`.
             t = _next_non_newline(state)
-            if t.kind == _TOKEN_MULTIPLICATION
+            if t === nothing
+                # NUMBER
+                return coef
+            elseif t.kind == _TOKEN_MULTIPLICATION
                 # NUMBER \n * [\n] IDENTIFIER
                 _skip_newlines(state)
                 _ = read(state, _Token, _TOKEN_MULTIPLICATION)
diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl
index dc1db045a5..3d1a788958 100644
--- a/test/FileFormats/LP/LP.jl
+++ b/test/FileFormats/LP/LP.jl
@@ -1613,6 +1613,40 @@ function test_parse_newline_in_objective_expression()
     return
 end
 
+function test_parse_subject_eof()
+    io = IOBuffer("maximize\nobj:\nsubject")
+    seekstart(io)
+    model = LP.Model()
+    MOI.read!(io, model)
+    x = MOI.get(model, MOI.VariableIndex, "subject")
+    @test x isa MOI.VariableIndex
+    return
+end
+
+function test_parse_expr_eof()
+    io = IOBuffer("maximize\nobj: x + 2\n")
+    seekstart(io)
+    model = LP.Model()
+    MOI.read!(io, model)
+    x = MOI.get(model, MOI.VariableIndex, "x")
+    f = 1.0 * x + 2.0
+    g = MOI.get(model, MOI.ObjectiveFunction{typeof(f)}())
+    @test isapprox(f, g)
+    return
+end
+
+function test_parse_quadratic_expr_eof()
+    io = IOBuffer("maximize\nobj: [x * x]\n")
+    seekstart(io)
+    model = LP.Model()
+    MOI.read!(io, model)
+    x = MOI.get(model, MOI.VariableIndex, "x")
+    f = 1.0 * x * x
+    g = MOI.get(model, MOI.ObjectiveFunction{typeof(f)}())
+    @test isapprox(f, g)
+    return
+end
+
 end  # module
 
 TestLP.runtests()