From 215d019467ba701a6408811e8e1dd8c5b605e83b Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Fri, 12 Sep 2025 08:40:20 +1200 Subject: [PATCH 1/4] [FileFormats.LP] some renaming in the LP reader --- src/FileFormats/LP/read.jl | 322 ++++++++++++++++++++----------------- test/FileFormats/LP/LP.jl | 80 ++++----- 2 files changed, 220 insertions(+), 182 deletions(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index ceeed12e4a..79661f5151 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -4,11 +4,23 @@ # Use of this source code is governed by an MIT-style license that can be found # in the LICENSE.md file or at https://opensource.org/licenses/MIT. -struct Cache{T} +""" + _ReadCache(model::Model{T}) where {T} + +This struct stores a few things to help reading the file: + + * `variable_name_to_index`: this maps variable names to their MOI index + * `variable_with_default_bound`: by default, variables have a lower bound of + `0`. When we read through the `Bounds` section, we remove from this set any + variable that explicitly sets the lower bound, or that has an upper bound + that is negative. At the end of `read!` we iterate through the remaining + variables and add a lower bound of `0`. +""" +struct _ReadCache{T} model::Model{T} variable_name_to_index::Dict{String,MOI.VariableIndex} variable_with_default_bound::Set{MOI.VariableIndex} - function Cache(model::Model{T}) where {T} + function _ReadCache(model::Model{T}) where {T} return new{T}( model, Dict{String,MOI.VariableIndex}(), @@ -26,6 +38,7 @@ This reader attempts to follow the CPLEX LP format, because others like the lpsolve version are very...flexible...in how they accept input. Read more about the format here: + * http://lpsolve.sourceforge.net * https://web.mit.edu/lpsolve/doc/CPLEX-format.htm """ @@ -33,16 +46,16 @@ function Base.read!(io::IO, model::Model{T}) where {T} if !MOI.is_empty(model) error("Cannot read in file because model is not empty.") end - state = LexerState(io) - cache = Cache(model) + state = _LexerState(io) + cache = _ReadCache(model) keyword = :UNKNOWN - while (token = peek(state, Token)) !== nothing + while (token = peek(state, _Token)) !== nothing if token.kind == _TOKEN_KEYWORD - _ = read(state, Token) + _ = read(state, _Token) keyword = Symbol(token.value) continue elseif token.kind == _TOKEN_NEWLINE - _ = read(state, Token) + _ = read(state, _Token) continue elseif keyword == :MINIMIZE MOI.set(cache.model, MOI.ObjectiveSense(), MOI.MIN_SENSE) @@ -65,13 +78,13 @@ function Base.read!(io::IO, model::Model{T}) where {T} elseif keyword == :SOS _parse_constraint(state, cache) elseif keyword == :END - _throw_unexpected_token( + _throw_parse_error( state, token, "No file contents are allowed after `end`.", ) else - _throw_unexpected_token( + _throw_parse_error( state, token, "Parsing this section is not supported by the current reader.", @@ -134,6 +147,12 @@ const _KEYWORDS = Dict( "end" => :END, ) +""" + _TokenKind + +This enum is the list of tokens that we might encounter when lexing the file. +Hopefully they're all self-explanatory. +""" @enum( _TokenKind, _TOKEN_KEYWORD, @@ -155,6 +174,12 @@ const _KEYWORDS = Dict( _TOKEN_UNKNOWN, ) +""" + const _KIND_TO_MSG::Dict{_TokenKind,String} + +This dictionary makes `_TokenKind` to a string that is used when printing error +messages. The string must complete the sentence "We expected this token to be ". +""" const _KIND_TO_MSG = Dict{_TokenKind,String}( _TOKEN_KEYWORD => "a keyword", _TOKEN_IDENTIFIER => "a variable name", @@ -198,78 +223,78 @@ const _OPERATORS = Dict{Char,_TokenKind}( ) """ - struct Token + struct _Token kind::_TokenKind value::Union{Nothing,String} + pos::Int end This struct is used to represent each token from the lexer. The `value` is the unprocessed value. + +`pos` is the position of the `io::IO` in the lexer that begins this token. We +use the `pos` to provide nice error messages. """ -struct Token +struct _Token kind::_TokenKind value::Union{Nothing,String} pos::Int end """ - mutable struct LexerState + mutable struct _LexerState io::IO + line::Int peek_char::Union{Nothing,Char} - peek_tokens::Vector{Token} + peek_tokens::Vector{_Token} end -A struct that is used to manage state when lexing. - -It stores: +A struct that is used to manage state when lexing. It stores: * `io`: the IO object that we are streaming + * `line`: counts the number of `\n` characters, so that we can provide a nice + error message to the user on a parse error * `peek_char`: the next `Char` in the `io` - * `peek_tokens`: the list of upcoming tokens that we have already peeked. + * `peek_tokens`: the list of upcoming tokens that we have already peeked """ -mutable struct LexerState +mutable struct _LexerState io::IO line::Int peek_char::Union{Nothing,Char} - peek_tokens::Vector{Token} - LexerState(io::IO) = new(io, 1, nothing, Token[]) + peek_tokens::Vector{_Token} + _LexerState(io::IO) = new(io, 1, nothing, _Token[]) end """ - struct UnexpectedToken <: Exception - token::Token + struct ParseError <: Exception + line::Int + msg::String end -This error is thrown when we encounter an unexpected token when parsing the LP -file. No other information is available. +This error is thrown when we encounter an error parsing the LP file. """ -struct UnexpectedToken <: Exception - token::Token +struct ParseError <: Exception line::Int msg::String end -function _throw_unexpected_token(state::LexerState, token::Token, msg::String) +function _throw_parse_error(state::_LexerState, token::_Token, msg::String) offset = min(40, token.pos) seek(state.io, token.pos - offset) line = String(read(state.io, 2 * offset)) i = something(findprev('\n', line, offset-1), 0) j = something(findnext('\n', line, offset), length(line) + 1) help = string(line[(i+1):(j-1)], "\n", " "^(offset - i + - 1), "^\n", msg) - return throw(UnexpectedToken(token, state.line, help)) + return throw(ParseError(state.line, help)) end -function Base.showerror(io::IO, err::UnexpectedToken) - return print( - io, - "Error parsing LP file. Got an unexpected token on line $(err.line):\n", - err.msg, - ) +function Base.showerror(io::IO, err::ParseError) + return print(io, "Error parsing LP file on line $(err.line):\n", err.msg) end -function _expect(state::LexerState, token::Token, kind::_TokenKind) +function _expect(state::_LexerState, token::_Token, kind::_TokenKind) if token.kind != kind - _throw_unexpected_token( + _throw_parse_error( state, token, string("We expected this token to be ", _KIND_TO_MSG[kind]), @@ -278,25 +303,25 @@ function _expect(state::LexerState, token::Token, kind::_TokenKind) return token end -function Base.peek(state::LexerState, ::Type{Char}) +function Base.peek(state::_LexerState, ::Type{Char}) if state.peek_char === nothing && !eof(state.io) state.peek_char = read(state.io, Char) end return state.peek_char end -function Base.read(state::LexerState, ::Type{Char}) +function Base.read(state::_LexerState, ::Type{Char}) c = peek(state, Char) state.peek_char = nothing return c end -function Base.read(state::LexerState, ::Type{Token}) - token = peek(state, Token, 1) +function Base.read(state::_LexerState, ::Type{_Token}) + token = peek(state, _Token, 1) if isempty(state.peek_tokens) - _throw_unexpected_token( + _throw_parse_error( state, - Token(_TOKEN_UNKNOWN, "EOF", position(state.io)), + _Token(_TOKEN_UNKNOWN, "EOF", position(state.io)), "Unexpected end to the file. We weren't finished yet.", ) end @@ -304,8 +329,8 @@ function Base.read(state::LexerState, ::Type{Token}) return token end -function Base.read(state::LexerState, ::Type{Token}, kind::_TokenKind) - token = read(state, Token) +function Base.read(state::_LexerState, ::Type{_Token}, kind::_TokenKind) + token = read(state, _Token) return _expect(state, token, kind) end @@ -321,7 +346,7 @@ end _is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-') -function Base.peek(state::LexerState, ::Type{Token}, n::Int = 1) +function Base.peek(state::_LexerState, ::Type{_Token}, n::Int = 1) @assert n >= 1 while length(state.peek_tokens) < n token = _peek_inner(state) @@ -333,13 +358,13 @@ function Base.peek(state::LexerState, ::Type{Token}, n::Int = 1) return state.peek_tokens[n] end -function _peek_inner(state::LexerState) +function _peek_inner(state::_LexerState) while (c = peek(state, Char)) !== nothing pos = position(state.io) if c == '\n' state.line += 1 _ = read(state, Char) - return Token(_TOKEN_NEWLINE, nothing, pos) + return _Token(_TOKEN_NEWLINE, nothing, pos) elseif isspace(c) # Whitespace _ = read(state, Char) elseif c == '\\' # Comment: backslash until newline @@ -351,7 +376,7 @@ function _peek_inner(state::LexerState) write(buf, c) _ = read(state, Char) end - return Token(_TOKEN_NUMBER, String(take!(buf)), pos) + return _Token(_TOKEN_NUMBER, String(take!(buf)), pos) elseif _is_starting_identifier(c) # Identifier / keyword buf = IOBuffer() while (c = peek(state, Char)) !== nothing && _is_identifier(c) @@ -361,38 +386,38 @@ function _peek_inner(state::LexerState) val = String(take!(buf)) l_val = lowercase(val) if l_val == "subject" - t = peek(state, Token) + t = peek(state, _Token) if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "to" - _ = read(state, Token) # Skip "to" - return Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos) + _ = read(state, _Token) # Skip "to" + return _Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos) end elseif l_val == "such" - t = peek(state, Token) + t = peek(state, _Token) if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "that" - _ = read(state, Token) # Skip "such" - return Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos) + _ = read(state, _Token) # Skip "such" + return _Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos) end end if (kw = get(_KEYWORDS, l_val, nothing)) !== nothing - return Token(_TOKEN_KEYWORD, string(kw), pos) + return _Token(_TOKEN_KEYWORD, string(kw), pos) end - return Token(_TOKEN_IDENTIFIER, val, pos) + return _Token(_TOKEN_IDENTIFIER, val, pos) elseif (op = get(_OPERATORS, c, nothing)) !== nothing _ = read(state, Char) # Skip c if c == '-' && peek(state, Char) == '>' _ = read(state, Char) - return Token(_TOKEN_IMPLIES, nothing, pos) + return _Token(_TOKEN_IMPLIES, nothing, pos) elseif c == '=' && peek(state, Char) in ('<', '>') c = read(state, Char) # Allow =< and => as <= and >= - return Token(_OPERATORS[c], nothing, pos) + return _Token(_OPERATORS[c], nothing, pos) elseif c in ('<', '>', '=') && peek(state, Char) == '=' _ = read(state, Char) # Allow <=, >=, and == end - return Token(op, nothing, pos) + return _Token(op, nothing, pos) else - _throw_unexpected_token( + _throw_parse_error( state, - Token(_TOKEN_UNKNOWN, "$c", pos), + _Token(_TOKEN_UNKNOWN, "$c", pos), "This character is not supported an LP file.", ) end @@ -401,20 +426,20 @@ function _peek_inner(state::LexerState) end """ - _next_token_is(state::LexerState, kind::_TokenKind, n::Int = 1) + _next_token_is(state::_LexerState, kind::_TokenKind, n::Int = 1) A helper function to check if the token in `n` steps is of kind `kind`. """ -function _next_token_is(state::LexerState, kind::_TokenKind, n::Int = 1) - if (t = peek(state, Token, n)) !== nothing +function _next_token_is(state::_LexerState, kind::_TokenKind, n::Int = 1) + if (t = peek(state, _Token, n)) !== nothing return t.kind == kind end return false end -function _skip_newlines(state::LexerState) +function _skip_newlines(state::_LexerState) while _next_token_is(state, _TOKEN_NEWLINE) - _ = read(state, Token) + _ = read(state, _Token, _TOKEN_NEWLINE) end return end @@ -423,16 +448,24 @@ end # # There _are_ rules to what an identifier can be. We handle these when lexing. # Anything that makes it here is deemed acceptable. -function _parse_variable(state::LexerState, cache::Cache)::MOI.VariableIndex +function _parse_variable( + state::_LexerState, + cache::_ReadCache, +)::MOI.VariableIndex _skip_newlines(state) - token = read(state, Token, _TOKEN_IDENTIFIER) + token = read(state, _Token, _TOKEN_IDENTIFIER) x = get(cache.variable_name_to_index, token.value, nothing) if x !== nothing return x end x = MOI.add_variable(cache.model) - if length(token.value) > get_options(cache.model).maximum_length - error("Name exceeds maximum length: $(token.value)") + len = get_options(cache.model).maximum_length + if length(token.value) > len + _throw_parse_error( + state, + token, + "Name ($(token.value)) exceeds maximum length ($len)", + ) end MOI.set(cache.model, MOI.VariableName(), x, token.value) cache.variable_name_to_index[token.value] = x @@ -446,9 +479,9 @@ end # | "inf" # | "infinity" # | :(parse(T, x)) -function _parse_number(state::LexerState, cache::Cache{T})::T where {T} +function _parse_number(state::_LexerState, cache::_ReadCache{T})::T where {T} _skip_newlines(state) - token = read(state, Token) + token = read(state, _Token) if token.kind == _TOKEN_ADDITION return _parse_number(state, cache) elseif token.kind == _TOKEN_SUBTRACTION @@ -458,21 +491,13 @@ function _parse_number(state::LexerState, cache::Cache{T})::T where {T} if v == "inf" || v == "infinity" return typemax(T) else - _throw_unexpected_token( - state, - token, - "We expected this to be a number.", - ) + _throw_parse_error(state, token, "We expected this to be a number.") end end _expect(state, token, _TOKEN_NUMBER) ret = tryparse(T, token.value) if ret === nothing - _throw_unexpected_token( - state, - token, - "We expected this to be a number.", - ) + _throw_parse_error(state, token, "We expected this to be a number.") end return ret end @@ -483,16 +508,16 @@ end # | [NUMBER] [*] IDENTIFIER "^" "2" # | [NUMBER] [*] IDENTIFIER "*" IDENTIFIER function _parse_quad_term( - state::LexerState, - cache::Cache{T}, + state::_LexerState, + cache::_ReadCache{T}, prefix::T, ) where {T} _skip_newlines(state) if _next_token_is(state, _TOKEN_ADDITION) - _ = read(state, Token) + _ = read(state, _Token) return _parse_quad_term(state, cache, prefix) elseif _next_token_is(state, _TOKEN_SUBTRACTION) - _ = read(state, Token) + _ = read(state, _Token) return _parse_quad_term(state, cache, -prefix) end coef = prefix @@ -501,20 +526,20 @@ function _parse_quad_term( end if _next_token_is(state, _TOKEN_MULTIPLICATION) _skip_newlines(state) - _ = read(state, Token) # Skip optional multiplication + _ = read(state, _Token) # Skip optional multiplication end x1 = _parse_variable(state, cache) _skip_newlines(state) if _next_token_is(state, _TOKEN_EXPONENT) - _ = read(state, Token) # ^ + _ = read(state, _Token) # ^ _skip_newlines(state) - n = read(state, Token, _TOKEN_NUMBER) + n = read(state, _Token, _TOKEN_NUMBER) if n.value != "2" - _throw_unexpected_token(state, n, "Only `^ 2` is supported.") + _throw_parse_error(state, n, "Only `^ 2` is supported.") end return MOI.ScalarQuadraticTerm(T(2) * coef, x1, x1) end - token = read(state, Token, _TOKEN_MULTIPLICATION) + token = read(state, _Token, _TOKEN_MULTIPLICATION) x2 = _parse_variable(state, cache) if x1 == x2 coef *= T(2) @@ -526,27 +551,27 @@ end # "[" QUAD_TERM (("+" | "-") QUAD_TERM)* "]" # | "[" QUAD_TERM (("+" | "-") QUAD_TERM)* "]/2" function _parse_quad_expression( - state::LexerState, - cache::Cache{T}, + state::_LexerState, + cache::_ReadCache{T}, prefix::T, ) where {T} - token = read(state, Token, _TOKEN_OPEN_BRACKET) + token = read(state, _Token, _TOKEN_OPEN_BRACKET) f = zero(MOI.ScalarQuadraticFunction{T}) push!(f.quadratic_terms, _parse_quad_term(state, cache, prefix)) - while (p = peek(state, Token)) !== nothing + while (p = peek(state, _Token)) !== nothing if p.kind == _TOKEN_ADDITION - p = read(state, Token) + p = read(state, _Token) push!(f.quadratic_terms, _parse_quad_term(state, cache, prefix)) elseif p.kind == _TOKEN_SUBTRACTION - p = read(state, Token) + p = read(state, _Token) push!(f.quadratic_terms, _parse_quad_term(state, cache, -prefix)) elseif p.kind == _TOKEN_NEWLINE - _ = read(state, Token) + _ = read(state, _Token) elseif p.kind == _TOKEN_CLOSE_BRACKET - _ = read(state, Token) + _ = read(state, _Token) break else - _throw_unexpected_token( + _throw_parse_error( state, p, "We expected this to be a ] to end the quadratic expresssion.", @@ -555,11 +580,11 @@ function _parse_quad_expression( end _skip_newlines(state) if _next_token_is(state, _TOKEN_DIVISION) - _ = read(state, Token) # / + _ = read(state, _Token) # / # Must be /2 - n = read(state, Token, _TOKEN_NUMBER) + n = read(state, _Token, _TOKEN_NUMBER) if n.value != "2" - _throw_unexpected_token( + _throw_parse_error( state, n, "The only supported value here is `] / 2`.", @@ -585,18 +610,18 @@ end # | NUMBER "*" IDENTIFIER # | QUADRATIC_EXPRESSION function _parse_term( - state::LexerState, - cache::Cache{T}, + state::_LexerState, + cache::_ReadCache{T}, prefix::T = one(T), ) where {T} _skip_newlines(state) if _next_token_is(state, _TOKEN_ADDITION) # "+" TERM - _ = read(state, Token, _TOKEN_ADDITION) + _ = read(state, _Token, _TOKEN_ADDITION) return _parse_term(state, cache, prefix) elseif _next_token_is(state, _TOKEN_SUBTRACTION) # "-" TERM - _ = read(state, Token, _TOKEN_SUBTRACTION) + _ = read(state, _Token, _TOKEN_SUBTRACTION) return _parse_term(state, cache, -prefix) elseif _next_token_is(state, _TOKEN_IDENTIFIER) # IDENTIFIER @@ -610,7 +635,7 @@ function _parse_term( return MOI.ScalarAffineTerm(coef, x) elseif _next_token_is(state, _TOKEN_MULTIPLICATION) # NUMBER * IDENTIFIER - _ = read(state, Token, _TOKEN_MULTIPLICATION) + _ = read(state, _Token, _TOKEN_MULTIPLICATION) x = _parse_variable(state, cache) return MOI.ScalarAffineTerm(coef, x) elseif _next_token_is(state, _TOKEN_NEWLINE) || @@ -623,8 +648,8 @@ function _parse_term( # QUADRATIC_EXPRESSION return _parse_quad_expression(state, cache, prefix) end - token = peek(state, Token) - return _throw_unexpected_token( + token = peek(state, _Token) + return _throw_parse_error( state, token, "Got $(_KIND_TO_MSG[token.kind]), but we expected this to be a new term in the expression.", @@ -654,18 +679,18 @@ end # EXPRESSION := # TERM (("+" | "-") TERM)* -function _parse_expression(state::LexerState, cache::Cache{T}) where {T} +function _parse_expression(state::_LexerState, cache::_ReadCache{T}) where {T} f = zero(MOI.ScalarQuadraticFunction{T}) _add_to_expression!(f, _parse_term(state, cache)) - while (p = peek(state, Token)) !== nothing + while (p = peek(state, _Token)) !== nothing if p.kind == _TOKEN_ADDITION - p = read(state, Token) + p = read(state, _Token) _add_to_expression!(f, _parse_term(state, cache)) elseif p.kind == _TOKEN_SUBTRACTION - p = read(state, Token) + p = read(state, _Token) _add_to_expression!(f, _parse_term(state, cache, -one(T))) elseif p.kind == _TOKEN_NEWLINE - _ = read(state, Token) + _ = read(state, _Token) else break end @@ -686,7 +711,7 @@ end # `=`. These are normalized when lexing. function _parse_set_suffix(state, cache) _skip_newlines(state) - p = read(state, Token) + p = read(state, _Token) if p.kind == _TOKEN_IDENTIFIER && lowercase(p.value) == "free" return nothing end @@ -701,7 +726,7 @@ function _parse_set_suffix(state, cache) rhs = _parse_number(state, cache) return MOI.EqualTo(rhs) else - _throw_unexpected_token( + _throw_parse_error( state, p, "We expected this to be an inequality like `>=`, `<=` ,or `==`.", @@ -719,7 +744,7 @@ end function _parse_set_prefix(state, cache) lhs = _parse_number(state, cache) _skip_newlines(state) - p = read(state, Token) + p = read(state, _Token) if p.kind == _TOKEN_GREATER_THAN return MOI.LessThan(lhs) elseif p.kind == _TOKEN_LESS_THAN @@ -727,7 +752,7 @@ function _parse_set_prefix(state, cache) elseif p.kind == _TOKEN_EQUAL_TO return MOI.EqualTo(lhs) else - _throw_unexpected_token( + _throw_parse_error( state, p, "We expected this to be an inequality like `>=`, `<=` ,or `==`.", @@ -736,19 +761,19 @@ function _parse_set_prefix(state, cache) end # NAME := [IDENTIFIER :] -function _parse_optional_name(state::LexerState, cache::Cache) +function _parse_optional_name(state::_LexerState, cache::_ReadCache) _skip_newlines(state) if _next_token_is(state, _TOKEN_IDENTIFIER, 1) && _next_token_is(state, _TOKEN_COLON, 2) - name = read(state, Token) - _ = read(state, Token) # Skip : + name = read(state, _Token) + _ = read(state, _Token) # Skip : return name.value end return nothing end # OBJECTIVE := [NAME] [EXPRESSION] -function _parse_objective(state::LexerState, cache::Cache) +function _parse_objective(state::_LexerState, cache::_ReadCache) _ = _parse_optional_name(state, cache) _skip_newlines(state) if _next_token_is(state, _TOKEN_KEYWORD) @@ -759,7 +784,11 @@ function _parse_objective(state::LexerState, cache::Cache) return end -function _add_bound(cache::Cache, x::MOI.VariableIndex, set::MOI.GreaterThan) +function _add_bound( + cache::_ReadCache, + x::MOI.VariableIndex, + set::MOI.GreaterThan, +) delete!(cache.variable_with_default_bound, x) if isfinite(set.lower) MOI.add_constraint(cache.model, x, set) @@ -767,7 +796,7 @@ function _add_bound(cache::Cache, x::MOI.VariableIndex, set::MOI.GreaterThan) return end -function _add_bound(cache::Cache, x::MOI.VariableIndex, set::MOI.LessThan) +function _add_bound(cache::_ReadCache, x::MOI.VariableIndex, set::MOI.LessThan) if set.upper < 0 delete!(cache.variable_with_default_bound, x) end @@ -777,22 +806,22 @@ function _add_bound(cache::Cache, x::MOI.VariableIndex, set::MOI.LessThan) return end -function _add_bound(cache::Cache, x::MOI.VariableIndex, set::MOI.EqualTo) +function _add_bound(cache::_ReadCache, x::MOI.VariableIndex, set::MOI.EqualTo) delete!(cache.variable_with_default_bound, x) MOI.add_constraint(cache.model, x, set) return end # x free -function _add_bound(cache::Cache, x::MOI.VariableIndex, ::Nothing) +function _add_bound(cache::_ReadCache, x::MOI.VariableIndex, ::Nothing) delete!(cache.variable_with_default_bound, x) return end # BOUND := -# IDENFITIER SET_SUFFIX -# | SET_PREFIX IDENTIFIER -# | SET_PREFIX IDENTIFIER SET_SUFFIX +# IDENFITIER SET_SUFFIX \n +# | SET_PREFIX IDENTIFIER \n +# | SET_PREFIX IDENTIFIER SET_SUFFIX \n function _parse_bound(state, cache) if _next_token_is(state, _TOKEN_IDENTIFIER) # `x free` or `x op b` x = _parse_variable(state, cache) @@ -826,29 +855,32 @@ end # | [NAME] S2:: (IDENTIFIER:NUMBER)+ \n # # The newline character is required. -function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T} - t = read(state, Token, _TOKEN_IDENTIFIER) # Si +function _parse_sos_constraint( + state::_LexerState, + cache::_ReadCache{T}, +) where {T} + t = read(state, _Token, _TOKEN_IDENTIFIER) # Si if !(t.value == "S1" || t.value == "S2") - _throw_unexpected_token( + _throw_parse_error( state, t, "This must be either `S1` for SOS-I or `S2` for SOS-II.", ) end - _ = read(state, Token, _TOKEN_COLON) - _ = read(state, Token, _TOKEN_COLON) + _ = read(state, _Token, _TOKEN_COLON) + _ = read(state, _Token, _TOKEN_COLON) f, w = MOI.VectorOfVariables(MOI.VariableIndex[]), T[] while true if _next_token_is(state, _TOKEN_NEWLINE) - t = peek(state, Token) - _throw_unexpected_token( + t = peek(state, _Token) + _throw_parse_error( state, t, "SOS constraints cannot be spread across lines.", ) end push!(f.variables, _parse_variable(state, cache)) - _ = read(state, Token, _TOKEN_COLON) + _ = read(state, _Token, _TOKEN_COLON) push!(w, _parse_number(state, cache)) if _next_token_is(state, _TOKEN_NEWLINE) break @@ -872,20 +904,20 @@ end # IDENTIFIER "=" "0" "->" EXPRESSION SET_SUFFIX # | IDENTIFIER "=" "1" "->" EXPRESSION SET_SUFFIX function _parse_indicator_constraint( - state::LexerState, - cache::Cache{T}, + state::_LexerState, + cache::_ReadCache{T}, ) where {T} z = _parse_variable(state, cache) - _ = read(state, Token, _TOKEN_EQUAL_TO) - t = read(state, Token, _TOKEN_NUMBER) + _ = read(state, _Token, _TOKEN_EQUAL_TO) + t = read(state, _Token, _TOKEN_NUMBER) indicator = if t.value == "0" MOI.ACTIVATE_ON_ZERO elseif t.value == "1" MOI.ACTIVATE_ON_ONE else - _throw_unexpected_token(state, t, "This must be either `= 0` or `= 1`.") + _throw_parse_error(state, t, "This must be either `= 0` or `= 1`.") end - _ = read(state, Token, _TOKEN_IMPLIES) + _ = read(state, _Token, _TOKEN_IMPLIES) f = _parse_expression(state, cache) set = _parse_set_suffix(state, cache) return MOI.add_constraint( @@ -899,7 +931,7 @@ end # [NAME] EXPRESSION SET_SUFFIX # | [NAME] SOS_CONSTRAINT # | [NAME] INDICATOR_CONSTRAINT -function _parse_constraint(state::LexerState, cache::Cache) +function _parse_constraint(state::_LexerState, cache::_ReadCache) name = _parse_optional_name(state, cache) # Check if this is an SOS constraint c = if _is_sos_constraint(state) diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl index 85f6523449..cb3beae1c1 100644 --- a/test/FileFormats/LP/LP.jl +++ b/test/FileFormats/LP/LP.jl @@ -446,7 +446,7 @@ function test_read_invalid() for filename in filter(f -> startswith(f, "invalid_"), readdir(models)) model = LP.Model() @test_throws( - LP.UnexpectedToken, + LP.ParseError, MOI.read_from_file(model, joinpath(models, filename)), ) end @@ -459,7 +459,7 @@ function test_read_unexpected_line() print(io, line) seekstart(io) model = LP.Model() - @test_throws LP.UnexpectedToken read!(io, model) + @test_throws LP.ParseError read!(io, model) return end @@ -611,10 +611,16 @@ end function test_read_maximum_length_error() filename = joinpath(@__DIR__, "models", "model2.lp") model = LP.Model(; maximum_length = 1) - @test_throws( - ErrorException("Name exceeds maximum length: V4"), - MOI.read_from_file(model, filename), - ) + contents = try + MOI.read_from_file(model, filename) + catch err + sprint(showerror, err) + end + @test contents == """ + Error parsing LP file on line 2: + obj: - 2 - 1 V4 + 1 V5 + 3 + 2 - 0.5 + ^ + Name (V4) exceeds maximum length (1)""" return end @@ -1097,7 +1103,7 @@ function test_invalid_token_in_sos() sprint(showerror, err) end @test contents == """ - Error parsing LP file. Got an unexpected token on line 5: + Error parsing LP file on line 5: c11: S1:: x 1.0 y 2.0 ^ We expected this token to be the symbol `:`""" @@ -1114,7 +1120,7 @@ function test_unable_to_parse_bound() end """) model = LP.Model() - @test_throws LP.UnexpectedToken read!(io, model) + @test_throws LP.ParseError read!(io, model) return end @@ -1229,7 +1235,7 @@ function test_subject_to_name() seekstart(io) model = MOI.FileFormats.LP.Model() if err - @test_throws LP.UnexpectedToken read!(io, model) + @test_throws LP.ParseError read!(io, model) else read!(io, model) out = IOBuffer() @@ -1243,7 +1249,7 @@ function test_subject_to_name() end function test_parse_variable() - cache = LP.Cache(LP.Model{Float64}()) + cache = LP._ReadCache(LP.Model{Float64}()) for input in [ "x", "X", @@ -1256,21 +1262,21 @@ function test_parse_variable() ] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) + state = LP._LexerState(io) x = LP._parse_variable(state, cache) @test cache.variable_name_to_index[input] == x end for input in ["2", "2x", ".x"] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) - @test_throws LP.UnexpectedToken LP._parse_variable(state, cache) + state = LP._LexerState(io) + @test_throws LP.ParseError LP._parse_variable(state, cache) end return end function test_parse_number() - cache = LP.Cache(LP.Model{Float64}()) + cache = LP._ReadCache(LP.Model{Float64}()) for (input, result) in [ "1" => 1.0, "02" => 2.0, @@ -1295,20 +1301,20 @@ function test_parse_number() ] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) + state = LP._LexerState(io) @test LP._parse_number(state, cache) == result end for input in ["x", "abc", "ten", "1.1.1", "1eE1"] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) - @test_throws LP.UnexpectedToken LP._parse_number(state, cache) + state = LP._LexerState(io) + @test_throws LP.ParseError LP._parse_number(state, cache) end return end function test_parse_quad_term() - cache = LP.Cache(LP.Model{Float64}()) + cache = LP._ReadCache(LP.Model{Float64}()) # Diagonal for (input, coef) in [ "x * x" => 2.0, @@ -1326,7 +1332,7 @@ function test_parse_quad_term() ] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) + state = LP._LexerState(io) term = LP._parse_quad_term(state, cache, 1.0) x = cache.variable_name_to_index["x"] @test term == MOI.ScalarQuadraticTerm(coef, x, x) @@ -1349,7 +1355,7 @@ function test_parse_quad_term() ] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) + state = LP._LexerState(io) term = LP._parse_quad_term(state, cache, 1.0) x = cache.variable_name_to_index["x"] y = cache.variable_name_to_index["y"] @@ -1361,14 +1367,14 @@ function test_parse_quad_term() for input in ["x^", "x^x", "x^0", "x^1", "x^3", "x * 2 * x"] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) - @test_throws LP.UnexpectedToken LP._parse_quad_term(state, cache, -1.0) + state = LP._LexerState(io) + @test_throws LP.ParseError LP._parse_quad_term(state, cache, -1.0) end return end function test_parse_term() - cache = LP.Cache(LP.Model{Float64}()) + cache = LP._ReadCache(LP.Model{Float64}()) for (input, coef) in [ "x" => 1.0, "+ x" => 1.0, @@ -1383,7 +1389,7 @@ function test_parse_term() ] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) + state = LP._LexerState(io) term = LP._parse_term(state, cache, 1.0) x = cache.variable_name_to_index["x"] @test term == MOI.ScalarAffineTerm(coef, x) @@ -1394,20 +1400,20 @@ function test_parse_term() for input in ["subject to", ">= 1"] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) - @test_throws LP.UnexpectedToken LP._parse_term(state, cache, 1.0) + state = LP._LexerState(io) + @test_throws LP.ParseError LP._parse_term(state, cache, 1.0) end return end function test_parse_quad_expression() - cache = LP.Cache(LP.Model{Float64}()) + cache = LP._ReadCache(LP.Model{Float64}()) for input in ["x^2", "[ x^2 ]/", "[ x^2 ]/3"] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) + state = LP._LexerState(io) @test_throws( - LP.UnexpectedToken, + LP.ParseError, LP._parse_quad_expression(state, cache, 1.0), ) end @@ -1415,7 +1421,7 @@ function test_parse_quad_expression() end function test_parse_set_prefix() - cache = LP.Cache(LP.Model{Float64}()) + cache = LP._ReadCache(LP.Model{Float64}()) for (input, set) in [ "1.0 <=" => MOI.GreaterThan(1.0), "1.0 <" => MOI.GreaterThan(1.0), @@ -1429,20 +1435,20 @@ function test_parse_set_prefix() ] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) + state = LP._LexerState(io) @test LP._parse_set_prefix(state, cache) == set end for input in ["1 ->"] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) - @test_throws LP.UnexpectedToken LP._parse_set_prefix(state, cache) + state = LP._LexerState(io) + @test_throws LP.ParseError LP._parse_set_prefix(state, cache) end return end function test_parse_set_sufffix() - cache = LP.Cache(LP.Model{Float64}()) + cache = LP._ReadCache(LP.Model{Float64}()) for (input, set) in [ "free" => nothing, "Free" => nothing, @@ -1458,14 +1464,14 @@ function test_parse_set_sufffix() ] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) + state = LP._LexerState(io) @test LP._parse_set_suffix(state, cache) == set end for input in ["-> 1"] io = IOBuffer(input) seekstart(io) - state = LP.LexerState(io) - @test_throws LP.UnexpectedToken LP._parse_set_suffix(state, cache) + state = LP._LexerState(io) + @test_throws LP.ParseError LP._parse_set_suffix(state, cache) end return end From da47a17f30ec068edbbeb7e73ab1633e6cddcd78 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Fri, 12 Sep 2025 08:57:15 +1200 Subject: [PATCH 2/4] Update --- src/FileFormats/LP/read.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index 79661f5151..6fbc866b8f 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -279,12 +279,13 @@ struct ParseError <: Exception end function _throw_parse_error(state::_LexerState, token::_Token, msg::String) - offset = min(40, token.pos) + offset = min(20, token.pos) seek(state.io, token.pos - offset) line = String(read(state.io, 2 * offset)) i = something(findprev('\n', line, offset-1), 0) j = something(findnext('\n', line, offset), length(line) + 1) - help = string(line[(i+1):(j-1)], "\n", " "^(offset - i + - 1), "^\n", msg) + extract = replace(line[(i+1):(j-1)], '\r' => '') + help = string(extract, "\n", " "^(offset - i + - 1), "^\n", msg) return throw(ParseError(state.line, help)) end From da82d60d5cdd11229f345a2858fd855b50121f97 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Fri, 12 Sep 2025 09:05:41 +1200 Subject: [PATCH 3/4] Update --- src/FileFormats/LP/read.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index 6fbc866b8f..e593ce27c1 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -284,7 +284,7 @@ function _throw_parse_error(state::_LexerState, token::_Token, msg::String) line = String(read(state.io, 2 * offset)) i = something(findprev('\n', line, offset-1), 0) j = something(findnext('\n', line, offset), length(line) + 1) - extract = replace(line[(i+1):(j-1)], '\r' => '') + extract = replace(line[(i+1):(j-1)], "\r" => "") help = string(extract, "\n", " "^(offset - i + - 1), "^\n", msg) return throw(ParseError(state.line, help)) end From b6b35da3234c69fbe20ef460f15ed7f58d5bef2a Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Fri, 12 Sep 2025 09:13:17 +1200 Subject: [PATCH 4/4] Update --- src/FileFormats/LP/read.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index e593ce27c1..950d6561b6 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -279,7 +279,7 @@ struct ParseError <: Exception end function _throw_parse_error(state::_LexerState, token::_Token, msg::String) - offset = min(20, token.pos) + offset = min(40, token.pos) seek(state.io, token.pos - offset) line = String(read(state.io, 2 * offset)) i = something(findprev('\n', line, offset-1), 0)