diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index 0291d9e87c..f3101ca8d8 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -280,13 +280,15 @@ A struct that is used to manage state when lexing. It stores: error message to the user on a parse error * `peek_char`: the next `Char` in the `io` * `peek_tokens`: the list of upcoming tokens that we have already peeked + * `current_token`: the most recent token that we have `read` """ mutable struct _LexerState{O<:IO} io::O line::Int peek_char::Union{Nothing,Char} peek_tokens::Vector{_Token} - _LexerState(io::IO) = new{typeof(io)}(io, 1, nothing, _Token[]) + current_token::Union{Nothing,_Token} + _LexerState(io::IO) = new{typeof(io)}(io, 1, nothing, _Token[], nothing) end """ @@ -351,6 +353,7 @@ function Base.read(state::_LexerState, ::Type{_Token}) ) end popfirst!(state.peek_tokens) + state.current_token = token return token end @@ -371,6 +374,16 @@ end _is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-') +_nothing_or_newline(::Nothing) = true +_nothing_or_newline(t::_Token) = t.kind == _TOKEN_NEWLINE + +function _prior_token(state::_LexerState) + if length(state.peek_tokens) <= 1 + return state.current_token + end + return state.peek_tokens[end-1] +end + function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1) @assert n >= 1 while length(state.peek_tokens) < n @@ -379,22 +392,58 @@ function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1) return nothing end push!(state.peek_tokens, token) - if _compare_case_insenstive(token, "subject") + if token.kind != _TOKEN_IDENTIFIER + continue + end + # Here we have a _TOKEN_IDENTIFIER. But if it is not preceeded by a + # _TOKEN_NEWLINE, it cannot be a _TOKEN_KEYWORD. + if !_nothing_or_newline(_prior_token(state)) + continue + end + # It might be a _TOKEN_KEYWORD. + (kw = _case_insenstive_identifier_to_keyword(token.value)) + if kw !== nothing + # The token matches a single word keyword. All keywords are followed + # by a new line, or an EOF. t = _peek_inner(state) - if _compare_case_insenstive(t, "to") - state.peek_tokens[end] = - _Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos) - else + if _nothing_or_newline(t) + state.peek_tokens[end] = _Token(_TOKEN_KEYWORD, kw, token.pos) + end + if t !== nothing push!(state.peek_tokens, t) end - elseif _compare_case_insenstive(token, "such") - t = _peek_inner(state) - if _compare_case_insenstive(t, "that") + continue + end + # There are two keyword that contain whitespace: `subject to` and + # `such that` + for (a, b) in ("subject" => "to", "such" => "that") + if !_compare_case_insenstive(token, a) + continue + end + # This _might_ be `subject to`, or it might just be a variable + # named `subject`, like `obj:\n subject\n`. + token_b = _peek_inner(state) + if token_b === nothing + # The next token is EOF. Nothing to do here. + break + elseif !_compare_case_insenstive(token_b, b) + # The second token doesn't match. Store `token_b` and break + push!(state.peek_tokens, token_b) + break + end + # We have something that matches (a, b), but a TOKEN_KEYWORD needs + # to be followed by a new line. + token_nl = _peek_inner(state) + if _nothing_or_newline(token_nl) state.peek_tokens[end] = _Token(_TOKEN_KEYWORD, "CONSTRAINTS", token.pos) else - push!(state.peek_tokens, t) + push!(state.peek_tokens, token_b) + end + if token_nl !== nothing + push!(state.peek_tokens, token_nl) end + break end end return state.peek_tokens[n] @@ -426,11 +475,7 @@ function _peek_inner(state::_LexerState) write(buf, c) _ = read(state, Char) end - val = String(take!(buf)) - if (kw = _case_insenstive_identifier_to_keyword(val)) !== nothing - return _Token(_TOKEN_KEYWORD, kw, pos) - end - return _Token(_TOKEN_IDENTIFIER, val, pos) + return _Token(_TOKEN_IDENTIFIER, String(take!(buf)), pos) elseif (op = get(_OPERATORS, c, nothing)) !== nothing _ = read(state, Char) # Skip c if c == '-' && peek(state, Char) == '>' @@ -473,6 +518,19 @@ function _skip_newlines(state::_LexerState) return end +function _next_non_newline(state::_LexerState) + n = 1 + while true + t = peek(state, _Token, n) + if t === nothing + return nothing + elseif t.kind != _TOKEN_NEWLINE + return t + end + n += 1 + end +end + # IDENTIFIER := "string" # # There _are_ rules to what an identifier can be. We handle these when lexing. @@ -605,14 +663,10 @@ function _parse_quad_expression( ) end end - while _next_token_is(state, _TOKEN_NEWLINE) - if _next_token_is(state, _TOKEN_KEYWORD, 2) - break - end - _ = read(state, _Token, _TOKEN_NEWLINE) - end - if _next_token_is(state, _TOKEN_DIVISION) - _ = read(state, _Token) # / + t = _next_non_newline(state) + if t !== nothing && t.kind == _TOKEN_DIVISION + _skip_newlines(state) + _ = read(state, _Token, _TOKEN_DIVISION) # / # Must be /2 n = read(state, _Token, _TOKEN_NUMBER) if n.value != "2" @@ -634,10 +688,11 @@ function _parse_quad_expression( end # TERM := -# "+" TERM +# [\n*] TERM +# | "+" TERM # | "-" TERM -# | NUMBER # | IDENTIFIER +# | NUMBER # | NUMBER IDENTIFIER # | NUMBER "*" IDENTIFIER # | QUADRATIC_EXPRESSION @@ -670,12 +725,28 @@ function _parse_term( _ = read(state, _Token, _TOKEN_MULTIPLICATION) x = _parse_variable(state, cache) return MOI.ScalarAffineTerm(coef, x) - elseif _next_token_is(state, _TOKEN_NEWLINE) || - _next_token_is(state, _TOKEN_ADDITION) || - _next_token_is(state, _TOKEN_SUBTRACTION) - # NUMBER - return coef + elseif _next_token_is(state, _TOKEN_NEWLINE) + # This could either be NUMBER \nEND-OF-TERM, or it could be a term + # split by a new line, like `2\nx`. + t = _next_non_newline(state) + if t === nothing + # NUMBER + return coef + elseif t.kind == _TOKEN_MULTIPLICATION + # NUMBER \n * [\n] IDENTIFIER + _skip_newlines(state) + _ = read(state, _Token, _TOKEN_MULTIPLICATION) + _skip_newlines(state) + x = _parse_variable(state, cache) + return MOI.ScalarAffineTerm(coef, x) + elseif t.kind == _TOKEN_IDENTIFIER + # NUMBER \n IDENTIFIER + x = _parse_variable(state, cache) + return MOI.ScalarAffineTerm(coef, x) + end end + # NUMBER + return coef elseif _next_token_is(state, _TOKEN_OPEN_BRACKET) # QUADRATIC_EXPRESSION return _parse_quad_expression(state, cache, prefix) diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl index 1dc33db5f9..3d1a788958 100644 --- a/test/FileFormats/LP/LP.jl +++ b/test/FileFormats/LP/LP.jl @@ -1554,6 +1554,99 @@ function test_new_line_edge_case_fails() return end +function test_parse_keyword_edge_cases_identifier_is_keyword() + for name in ["max", "min", "st", "such", "bounds", "obj", "free"] + io = IOBuffer(""" + maximize + obj: $name + subject to + $name <= 1 + bounds + $name free + end + """) + seekstart(io) + model = LP.Model() + MOI.read!(io, model) + x = only(MOI.get(model, MOI.ListOfVariableIndices())) + @test MOI.get(model, MOI.VariableName(), x) == name + end + return +end + +function test_parse_keyword_subject_to_errors() + for line in ["subject", "subject too", "subject to a:"] + io = IOBuffer(""" + maximize + obj: x + $line + x <= 1 + bounds + x free + end + """) + seekstart(io) + model = LP.Model() + @test_throws LP.ParseError MOI.read!(io, model) + end + return +end + +function test_parse_newline_in_objective_expression() + for obj in ["2 x", "\n2 x", "2\nx", "2*\nx", "2\n*x", "2\n\n*\n\n\nx\n"] + io = IOBuffer(""" + maximize + obj: $obj + subject to + bounds + x free + end + """) + seekstart(io) + model = LP.Model() + MOI.read!(io, model) + x = MOI.get(model, MOI.VariableIndex, "x") + f = 2.0 * x + g = MOI.get(model, MOI.ObjectiveFunction{typeof(f)}()) + @test isapprox(f, g) + end + return +end + +function test_parse_subject_eof() + io = IOBuffer("maximize\nobj:\nsubject") + seekstart(io) + model = LP.Model() + MOI.read!(io, model) + x = MOI.get(model, MOI.VariableIndex, "subject") + @test x isa MOI.VariableIndex + return +end + +function test_parse_expr_eof() + io = IOBuffer("maximize\nobj: x + 2\n") + seekstart(io) + model = LP.Model() + MOI.read!(io, model) + x = MOI.get(model, MOI.VariableIndex, "x") + f = 1.0 * x + 2.0 + g = MOI.get(model, MOI.ObjectiveFunction{typeof(f)}()) + @test isapprox(f, g) + return +end + +function test_parse_quadratic_expr_eof() + io = IOBuffer("maximize\nobj: [x * x]\n") + seekstart(io) + model = LP.Model() + MOI.read!(io, model) + x = MOI.get(model, MOI.VariableIndex, "x") + f = 1.0 * x * x + g = MOI.get(model, MOI.ObjectiveFunction{typeof(f)}()) + @test isapprox(f, g) + return +end + end # module TestLP.runtests()