From 2f1d41687762519e7776685246087a9cf390c1f7 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Fri, 12 Sep 2025 11:13:47 +1200 Subject: [PATCH] [FileFormats.LP] fix handling of required new lines --- src/FileFormats/LP/read.jl | 86 ++++++++++++++++++++++++++------------ test/FileFormats/LP/LP.jl | 80 ++++++++++++++++++++++++++++++++++- 2 files changed, 139 insertions(+), 27 deletions(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index 950d6561b6..f778df8cee 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -29,6 +29,13 @@ struct _ReadCache{T} end end +function _read_newline_or_eof(state) + if (p = peek(state, _Token)) !== nothing + _ = read(state, _Token, _TOKEN_NEWLINE) + end + return +end + """ Base.read!(io::IO, model::FileFormats.LP.Model) @@ -53,10 +60,9 @@ function Base.read!(io::IO, model::Model{T}) where {T} if token.kind == _TOKEN_KEYWORD _ = read(state, _Token) keyword = Symbol(token.value) - continue + _read_newline_or_eof(state) elseif token.kind == _TOKEN_NEWLINE - _ = read(state, _Token) - continue + _ = read(state, _Token, _TOKEN_NEWLINE) elseif keyword == :MINIMIZE MOI.set(cache.model, MOI.ObjectiveSense(), MOI.MIN_SENSE) _parse_objective(state, cache) @@ -347,6 +353,15 @@ end _is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-') +# We want an efficient way to check if `test.value` is a case-insensitive +# version of `target`. Thsi is run for every identifier, so it needs to be fast. +function _compare_case_insenstive(test::_Token, target::String) + if test.kind != _TOKEN_IDENTIFIER || length(test.value) != length(target) + return false + end + return all(lowercase(a) == b for (a, b) in zip(test.value, target)) +end + function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1) @assert n >= 1 while length(state.peek_tokens) < n @@ -355,6 +370,23 @@ function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1) return nothing end push!(state.peek_tokens, token) + if _compare_case_insenstive(token, "subject") + t = _peek_inner(state) + if _compare_case_insenstive(t, "to") + state.peek_tokens[end] = + _Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos) + else + push!(state.peek_tokens, t) + end + elseif _compare_case_insenstive(token, "such") + t = _peek_inner(state) + if _compare_case_insenstive(t, "that") + state.peek_tokens[end] = + _Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos) + else + push!(state.peek_tokens, t) + end + end end return state.peek_tokens[n] end @@ -369,7 +401,8 @@ function _peek_inner(state::_LexerState) elseif isspace(c) # Whitespace _ = read(state, Char) elseif c == '\\' # Comment: backslash until newline - while (c = read(state, Char)) !== nothing && c != '\n' + while (c = peek(state, Char)) !== nothing && c != '\n' + _ = read(state, Char) end elseif isdigit(c) || (c == '-' && isdigit(peek(state, Char))) # Number buf = IOBuffer() @@ -385,21 +418,7 @@ function _peek_inner(state::_LexerState) _ = read(state, Char) end val = String(take!(buf)) - l_val = lowercase(val) - if l_val == "subject" - t = peek(state, _Token) - if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "to" - _ = read(state, _Token) # Skip "to" - return _Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos) - end - elseif l_val == "such" - t = peek(state, _Token) - if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "that" - _ = read(state, _Token) # Skip "such" - return _Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos) - end - end - if (kw = get(_KEYWORDS, l_val, nothing)) !== nothing + if (kw = get(_KEYWORDS, lowercase(val), nothing)) !== nothing return _Token(_TOKEN_KEYWORD, string(kw), pos) end return _Token(_TOKEN_IDENTIFIER, val, pos) @@ -579,7 +598,12 @@ function _parse_quad_expression( ) end end - _skip_newlines(state) + while _next_token_is(state, _TOKEN_NEWLINE) + if _next_token_is(state, _TOKEN_KEYWORD, 2) + break + end + _ = read(state, _Token, _TOKEN_NEWLINE) + end if _next_token_is(state, _TOKEN_DIVISION) _ = read(state, _Token) # / # Must be /2 @@ -691,6 +715,9 @@ function _parse_expression(state::_LexerState, cache::_ReadCache{T}) where {T} p = read(state, _Token) _add_to_expression!(f, _parse_term(state, cache, -one(T))) elseif p.kind == _TOKEN_NEWLINE + if _next_token_is(state, _TOKEN_KEYWORD, 2) + break + end _ = read(state, _Token) else break @@ -782,6 +809,7 @@ function _parse_objective(state::_LexerState, cache::_ReadCache) end f = _parse_expression(state, cache) MOI.set(cache.model, MOI.ObjectiveFunction{typeof(f)}(), f) + _read_newline_or_eof(state) return end @@ -828,6 +856,7 @@ function _parse_bound(state, cache) x = _parse_variable(state, cache) set = _parse_set_suffix(state, cache) _add_bound(cache, x, set) + _read_newline_or_eof(state) return end # `a op x` or `a op x op b` @@ -842,6 +871,7 @@ function _parse_bound(state, cache) rhs_set = _parse_set_suffix(state, cache) _add_bound(cache, x, rhs_set) end + _read_newline_or_eof(state) return end @@ -852,10 +882,11 @@ function _is_sos_constraint(state) end # SOS_CONSTRAINT := -# [NAME] S1:: (IDENTIFIER:NUMBER)+ \n -# | [NAME] S2:: (IDENTIFIER:NUMBER)+ \n +# [NAME] S1:: (IDENTIFIER:NUMBER)+ +# | [NAME] S2:: (IDENTIFIER:NUMBER)+ # -# The newline character is required. +# New lines are not supported within the line. +# Terminating new lines are handled in _parse_constraint function _parse_sos_constraint( state::_LexerState, cache::_ReadCache{T}, @@ -904,6 +935,8 @@ end # INDICATOR_CONSTRAINT := # IDENTIFIER "=" "0" "->" EXPRESSION SET_SUFFIX # | IDENTIFIER "=" "1" "->" EXPRESSION SET_SUFFIX +# +# Terminating new lines are handled in _parse_constraint function _parse_indicator_constraint( state::_LexerState, cache::_ReadCache{T}, @@ -929,9 +962,9 @@ function _parse_indicator_constraint( end # CONSTRAINT := -# [NAME] EXPRESSION SET_SUFFIX -# | [NAME] SOS_CONSTRAINT -# | [NAME] INDICATOR_CONSTRAINT +# [NAME] EXPRESSION SET_SUFFIX \n +# | [NAME] SOS_CONSTRAINT \n +# | [NAME] INDICATOR_CONSTRAINT \n function _parse_constraint(state::_LexerState, cache::_ReadCache) name = _parse_optional_name(state, cache) # Check if this is an SOS constraint @@ -947,5 +980,6 @@ function _parse_constraint(state::_LexerState, cache::_ReadCache) if name !== nothing MOI.set(cache.model, MOI.ConstraintName(), c, name) end + _read_newline_or_eof(state) return end diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl index cb3beae1c1..1dc33db5f9 100644 --- a/test/FileFormats/LP/LP.jl +++ b/test/FileFormats/LP/LP.jl @@ -589,7 +589,7 @@ function test_read_objective_sense() ) for (sense, result) in cases model = LP.Model() - io = IOBuffer("$sense x") + io = IOBuffer("$sense\nx") seekstart(io) read!(io, model) @test MOI.get(model, MOI.ObjectiveSense()) == result @@ -1476,6 +1476,84 @@ function test_parse_set_sufffix() return end +function test_new_line_edge_cases() + target = "minimize\nobj: 1 x + 1 y\nsubject to\nc: 1 x <= 1\nBounds\nx >= 0\ny >= 0\nEnd\n" + for input in [ + # Good + "minimize\nobj: x + y\nsubject to\nc: x <= 1\nend", + # Blank lines at the start + "\n\nminimize\nobj: x + y\nsubject to\nc: x <= 1\nend", + # Blank lines after minimize + "minimize\n\n\nobj: x + y\nsubject to\nc: x <= 1\nend", + # Blank lines between obj: and expression + "minimize\nobj:\n\nx + y\nsubject to\nc: x <= 1\nend", + # Blank lines throughout objective expression + "minimize\nobj:\nx \n+ \ny\nsubject to\nc: x <= 1\nend", + # Blank lines after objective expression + "minimize\nobj: x + y\n\nsubject to\nc: x <= 1\nend", + # Blank lines after subject to + "minimize\nobj: x + y\nsubject to\n\n\nc: x <= 1\nend", + # Blank lines in constraint + "minimize\nobj: x + y\nsubject to\nc:\nx\n<=\n1\nend", + # Blank lines around end + "minimize\nobj: x + y\nsubject to\nc: x <= 1\n\nend\n\n\n", + # Comment before newline + "minimize\\comment\nobj: x + y\nsubject to\nc: x <= 1\nend", + "minimize\nobj: x + y\\comment\nsubject to\nc: x <= 1\nend", + "minimize\nobj: x + y\nsubject to\\comment\nc: x <= 1\nend", + "minimize\nobj: x + y\nsubject to\nc: x <= 1\\comment\nend", + "minimize\nobj: x + y\nsubject to\nc: x <= 1\nend\\comment", + ] + io = IOBuffer(input) + seekstart(io) + model = LP.Model() + MOI.read!(io, model) + @test sprint(MOI.write, model) == target + end + return +end + +function test_new_line_edge_cases_sos() + target = "minimize\nobj: 1 x + 1 y\nsubject to\nBounds\nx >= 0\ny >= 0\nSOS\nc: S1:: x:1.0 y:2.0\nEnd\n" + for input in [ + "minimize\nobj: x + y\nsubject to\nc: S1:: x:1 y:2\nend", + "minimize\nobj: x + y\nsubject to\nc: S1:: x:1 y:2\n\nend", + "minimize\nobj: x + y\nsubject to\n\nc: S1:: x:1 y:2\n\nend", + "minimize\nobj: x + y\nsubject to\n\nc: S1:: x:1 y:2\\comment\nend", + ] + io = IOBuffer(input) + seekstart(io) + model = LP.Model() + MOI.read!(io, model) + @test sprint(MOI.write, model) == target + end + return +end + +function test_new_line_edge_case_fails() + for input in [ + # No newline between objective sense and objective + "minimize x", + "maximize x", + "maximize c: x", + # No new line between objective and subject to + "maximize\nobj: x subject to", + # No new line between subject to and constraint + "maximize\nobj: x\nsubject to c: x >= 0", + # No new line between multiple constraints + "maximize\nobj: x\nsubject to\nc: x >= 0 x <= 1", + # New lines in bounds section + "maximize\nobj: x\nsubject to\nbounds x >= 0\bx <= 1", + "maximize\nobj: x\nsubject to\nbounds\nx >= 0 x <= 1", + ] + io = IOBuffer(input) + seekstart(io) + model = LP.Model() + @test_throws LP.ParseError MOI.read!(io, model) + end + return +end + end # module TestLP.runtests()