From 4c35875025dedcd6c2e4060239e1dd60de03b3f3 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Wed, 10 Sep 2025 20:02:25 +1200 Subject: [PATCH 01/10] [FileFormats.LP] write a proper recursive descent parser --- src/FileFormats/LP/LP.jl | 653 +--------------- src/FileFormats/LP/read.jl | 734 ++++++++++++++++++ test/FileFormats/LP/LP.jl | 91 ++- .../models/invalid_affine_term_constraint.lp | 13 - .../models/invalid_affine_term_objective.lp | 13 - test/FileFormats/LP/models/invalid_bound.lp | 13 - 6 files changed, 779 insertions(+), 738 deletions(-) create mode 100644 src/FileFormats/LP/read.jl delete mode 100644 test/FileFormats/LP/models/invalid_affine_term_constraint.lp delete mode 100644 test/FileFormats/LP/models/invalid_affine_term_objective.lp delete mode 100644 test/FileFormats/LP/models/invalid_bound.lp diff --git a/src/FileFormats/LP/LP.jl b/src/FileFormats/LP/LP.jl index bfc53cdc06..6d93c815dd 100644 --- a/src/FileFormats/LP/LP.jl +++ b/src/FileFormats/LP/LP.jl @@ -512,657 +512,6 @@ function Base.write(io::IO, model::Model{T}) where {T} return end -# ============================================================================== -# -# `Base.read!` -# -# ============================================================================== - -const _KW_OBJECTIVE = Val{:objective}() -const _KW_CONSTRAINTS = Val{:constraints}() -const _KW_BOUNDS = Val{:bounds}() -const _KW_INTEGER = Val{:integer}() -const _KW_BINARY = Val{:binary}() -const _KW_SOS = Val{:sos}() -const _KW_END = Val{:end}() - -const _KEYWORDS = Dict( - # _KW_OBJECTIVE - "max" => _KW_OBJECTIVE, - "maximize" => _KW_OBJECTIVE, - "maximise" => _KW_OBJECTIVE, - "maximum" => _KW_OBJECTIVE, - "min" => _KW_OBJECTIVE, - "minimize" => _KW_OBJECTIVE, - "minimise" => _KW_OBJECTIVE, - "minimum" => _KW_OBJECTIVE, - # _KW_CONSTRAINTS - "subject to" => _KW_CONSTRAINTS, - "such that" => _KW_CONSTRAINTS, - "st" => _KW_CONSTRAINTS, - "s.t." => _KW_CONSTRAINTS, - # _KW_BOUNDS - "bounds" => _KW_BOUNDS, - "bound" => _KW_BOUNDS, - # _KW_INTEGER - "gen" => _KW_INTEGER, - "general" => _KW_INTEGER, - "generals" => _KW_INTEGER, - "integer" => _KW_INTEGER, - "integers" => _KW_INTEGER, - # _KW_BINARY - "bin" => _KW_BINARY, - "binary" => _KW_BINARY, - "binaries" => _KW_BINARY, - # _KW_SOS - "sos" => _KW_SOS, - # _KW_END - "end" => _KW_END, -) - -mutable struct _ReadCache{T} - objective::MOI.ScalarAffineFunction{T} - quad_obj_terms::Vector{MOI.ScalarQuadraticTerm{T}} - constraint_function::MOI.ScalarAffineFunction{T} - quad_terms::Vector{MOI.ScalarQuadraticTerm{T}} - constraint_name::String - num_constraints::Int - name_to_variable::Dict{String,MOI.VariableIndex} - has_default_bound::Set{MOI.VariableIndex} - indicator::Union{Nothing,Pair{MOI.VariableIndex,MOI.ActivationCondition}} - function _ReadCache{T}() where {T} - return new( - zero(MOI.ScalarAffineFunction{T}), - MOI.ScalarQuadraticTerm{T}[], - zero(MOI.ScalarAffineFunction{T}), - MOI.ScalarQuadraticTerm{T}[], - "", - 0, - Dict{String,MOI.VariableIndex}(), - Set{MOI.VariableIndex}(), - nothing, - ) - end -end - -function _get_variable_from_name( - model::Model{T}, - cache::_ReadCache{T}, - name::String, -) where {T} - current_variable = get(cache.name_to_variable, name, nothing) - if current_variable !== nothing - return current_variable - end - options = get_options(model) - if length(name) > options.maximum_length - error("Name exceeds maximum length: $name") - elseif match(r"^([\.0-9])", name) !== nothing - error("Name starts with invalid character: $name") - elseif match(_NAME_REG, name) !== nothing - error("Name contains with invalid character: $name") - end - x = MOI.add_variable(model) - MOI.set(model, MOI.VariableName(), x, name) - # By default, all variables have a lower bound of 0 unless otherwise - # specified. - MOI.add_constraint(model, x, MOI.GreaterThan(zero(T))) - push!(cache.has_default_bound, x) - cache.name_to_variable[name] = x - return x -end - -_tokenize(line::AbstractString) = String.(split(line, " "; keepempty = false)) - -@enum( - _TokenType, - _TOKEN_VARIABLE, - _TOKEN_COEFFICIENT, - _TOKEN_SIGN, - _TOKEN_QUADRATIC_OPEN, - _TOKEN_QUADRATIC_CLOSE, - _TOKEN_QUADRATIC_DIAG, - _TOKEN_QUADRATIC_OFF_DIAG, -) - -function _parse_token(::Type{T}, token::String) where {T} - if token == "+" - return _TOKEN_SIGN, one(T) - elseif token == "-" - return _TOKEN_SIGN, -one(T) - elseif startswith(token, "[") - return _TOKEN_QUADRATIC_OPEN, zero(T) - elseif startswith(token, "]") - return _TOKEN_QUADRATIC_CLOSE, zero(T) - elseif token == "^" - return _TOKEN_QUADRATIC_DIAG, zero(T) - elseif token == "*" - return _TOKEN_QUADRATIC_OFF_DIAG, zero(T) - end - coef = tryparse(T, token) - if coef === nothing - return _TOKEN_VARIABLE, token - else - return _TOKEN_COEFFICIENT, coef - end -end - -function _get_term(token_types, token_values::Vector{T}, offset) where {T} - coef = one(T) - if token_types[offset] == _TOKEN_SIGN - coef = token_values[offset] - offset += 1 - end - if token_types[offset] == _TOKEN_COEFFICIENT - coef *= token_values[offset] - offset += 1 - elseif token_types[offset] == _TOKEN_SIGN - error("Invalid line") - end - if offset > length(token_types) || token_types[offset] == _TOKEN_SIGN - return coef, offset # It's a standalone constant - end - if token_types[offset] == _TOKEN_QUADRATIC_OPEN - return _get_term(token_types, token_values, offset + 1) - end - @assert token_types[offset] == _TOKEN_VARIABLE - x = MOI.VariableIndex(Int64(token_values[offset])) - offset += 1 - if offset > length(token_types) || - token_types[offset] in (_TOKEN_SIGN, _TOKEN_COEFFICIENT) - return MOI.ScalarAffineTerm(coef, x), offset - end - term = if token_types[offset] == _TOKEN_QUADRATIC_DIAG - MOI.ScalarQuadraticTerm(coef, x, x) - else - @assert token_types[offset] == _TOKEN_QUADRATIC_OFF_DIAG - y = MOI.VariableIndex(Int64(token_values[offset+1])) - MOI.ScalarQuadraticTerm(coef, x, y) - end - if get(token_types, offset + 2, nothing) == _TOKEN_QUADRATIC_CLOSE - return term, offset + 3 - else - return term, offset + 2 - end -end - -_half(x) = x / 2 -_half(x::Integer) = div(x, 2) - -function _parse_function( - f::MOI.ScalarAffineFunction{T}, - model::Model{T}, - cache::_ReadCache{T}, - tokens::Vector{String}, -) where {T} - N = length(tokens) - token_types = Vector{_TokenType}(undef, N) - token_values = Vector{T}(undef, N) - for i in 1:length(tokens) - token_type, token = _parse_token(T, tokens[i]) - token_types[i] = token_type - if token_type in (_TOKEN_SIGN, _TOKEN_COEFFICIENT) - token_values[i] = token::T - elseif token_type in (_TOKEN_QUADRATIC_OPEN, _TOKEN_QUADRATIC_CLOSE) - token_values[i] = zero(T) - elseif token_type in (_TOKEN_QUADRATIC_DIAG, _TOKEN_QUADRATIC_OFF_DIAG) - token_values[i] = zero(T) - else - @assert token_type == _TOKEN_VARIABLE - x = _get_variable_from_name(model, cache, token::String) - # A cheat for type-stability. Store `T` of the variable index - token_values[i] = T(x.value) - end - end - offset = 1 - while offset <= length(tokens) - term, offset = _get_term(token_types, token_values, offset) - if term isa MOI.ScalarAffineTerm{T} - push!(f.terms, term::MOI.ScalarAffineTerm{T}) - elseif term isa MOI.ScalarQuadraticTerm{T} - push!(cache.quad_terms, term::MOI.ScalarQuadraticTerm{T}) - if tokens[offset-1] in ("]", "]/2") - is_half = tokens[offset-1] == "]/2" - for (i, term) in enumerate(cache.quad_terms) - x, y = term.variable_1, term.variable_2 - coef = (x == y ? 2 : 1) * term.coefficient - if is_half - coef = _half(coef) - end - cache.quad_terms[i] = MOI.ScalarQuadraticTerm(coef, x, y) - end - end - else - f.constant += term::T - end - end - return -end - -# _KW_OBJECTIVE - -_set_objective_sense(::Any, ::Model, ::String) = nothing - -function _set_objective_sense( - ::typeof(_KW_OBJECTIVE), - model::Model, - sense::String, -) - if sense in ("max", "maximize", "maximise", "maximum") - MOI.set(model, MOI.ObjectiveSense(), MOI.MAX_SENSE) - else - @assert sense in ("min", "minimize", "minimise", "minimum") - MOI.set(model, MOI.ObjectiveSense(), MOI.MIN_SENSE) - end - return -end - -function _parse_section( - ::typeof(_KW_OBJECTIVE), - model::Model, - cache::_ReadCache, - line::AbstractString, -) - if occursin(":", line) # Strip name of the objective - m = match(r"(.*?)\:(.*)", line)::RegexMatch - line = String(m[2]::AbstractString) - end - if occursin("^", line) - line = replace(line, "^" => " ^ ") - end - if occursin(r"\][\s/][\s/]+2", line) - line = replace(line, r"\][\s/][\s/]+2" => "]/2") - end - tokens = _tokenize(line) - if length(tokens) == 0 - # Can happen if the name of the objective is on one line and the - # expression is on the next. - return - end - _parse_function(cache.objective, model, cache, tokens) - append!(cache.quad_obj_terms, cache.quad_terms) - empty!(cache.quad_terms) - return -end - -# _KW_CONSTRAINTS - -function _parse_section( - ::typeof(_KW_CONSTRAINTS), - model::Model{T}, - cache::_ReadCache{T}, - line::AbstractString, -) where {T} - # SOS constraints should be in their own "SOS" section, but we can also - # recognize them if they're mixed into the constraint section. - if match(r" S([1-2])\w*:: ", line) !== nothing - _parse_section(_KW_SOS, model, cache, line) - return - end - if isempty(cache.constraint_name) - if occursin(":", line) - m = match(r"(.*?)\:(.*)", line)::RegexMatch - cache.constraint_name = String(m[1]::AbstractString) - line = String(m[2]::AbstractString) - else - # Give it a temporary name for now - cache.constraint_name = "R$(cache.num_constraints)" - end - end - if cache.indicator === nothing - if (m = match(r"\s*(.+?)\s*=\s*(0|1)\s*->(.+)", line)) !== nothing - z = _get_variable_from_name(model, cache, String(m[1])) - cond = m[2] == "0" ? MOI.ACTIVATE_ON_ZERO : MOI.ACTIVATE_ON_ONE - cache.indicator = z => cond - line = String(m[3]) - end - end - if occursin("^", line) - # Simplify parsing of constraints with ^2 terms by turning them into - # explicit " ^ 2" terms. This avoids ambiguity when parsing names. - line = replace(line, "^" => " ^ ") - end - if occursin(r"\][\s/][\s/]+2", line) - # Simplify parsing of ]/2 end blocks, which may contain whitespace. - line = replace(line, r"\][\s/][\s/]+2" => "]/2") - end - tokens = _tokenize(line) - if length(tokens) == 0 - # Can happen if the name is on one line and the constraint on the next. - return - end - # This checks if the constaint is finishing on this line. - constraint_set = nothing - if length(tokens) >= 2 && tokens[end-1] in ("<", "<=", ">", ">=", "=", "==") - rhs = parse(T, pop!(tokens)) - sym = pop!(tokens) - constraint_set = if sym in ("<", "<=") - MOI.LessThan(rhs) - elseif sym in (">", ">=") - MOI.GreaterThan(rhs) - else - @assert sym in ("=", "==") - MOI.EqualTo(rhs) - end - end - _parse_function(cache.constraint_function, model, cache, tokens) - if constraint_set !== nothing - f = if isempty(cache.quad_terms) - cache.constraint_function - else - MOI.ScalarQuadraticFunction( - cache.quad_terms, - cache.constraint_function.terms, - cache.constraint_function.constant, - ) - end - if cache.indicator !== nothing - f = MOI.Utilities.operate(vcat, T, cache.indicator[1], f) - constraint_set = MOI.Indicator{cache.indicator[2]}(constraint_set) - end - c = MOI.add_constraint(model, f, constraint_set) - MOI.set(model, MOI.ConstraintName(), c, cache.constraint_name) - cache.num_constraints += 1 - empty!(cache.constraint_function.terms) - empty!(cache.quad_terms) - cache.constraint_function.constant = zero(T) - cache.constraint_name = "" - cache.indicator = nothing - end - return -end - -# _KW_BOUNDS - -function _parse_float(::Type{T}, token::String) where {T} - coef = lowercase(token) - if coef in ("-inf", "-infinity") - return typemin(T) - elseif coef in ("+inf", "+infinity") - return typemax(T) - end - return tryparse(T, coef) -end - -# Yes, the last elements here are really accepted by CPLEX... -_is_less_than(token) = token in ("<=", "<", "=<") -_is_greater_than(token) = token in (">=", ">", "=>") -_is_equal_to(token) = token in ("=", "==") - -function _parse_section( - ::typeof(_KW_BOUNDS), - model::Model{T}, - cache::_ReadCache{T}, - line::AbstractString, -) where {T} - tokens = _tokenize(line) - if length(tokens) == 2 && lowercase(tokens[2]) == "free" - x = _get_variable_from_name(model, cache, tokens[1]) - _delete_default_lower_bound_if_present(model, cache, x) - return - end - lb, ub, name = nothing, nothing, "" - if length(tokens) == 5 - name = tokens[3] - if _is_less_than(tokens[2]) && _is_less_than(tokens[4]) - lb = _parse_float(T, tokens[1])::T - ub = _parse_float(T, tokens[5])::T - elseif _is_greater_than(tokens[2]) && _is_greater_than(tokens[4]) - lb = _parse_float(T, tokens[5])::T - ub = _parse_float(T, tokens[1])::T - else - error("Unable to parse bound due to invalid inequalities: $(line)") - end - elseif length(tokens) == 3 - lhs, rhs = _parse_float(T, tokens[1]), _parse_float(T, tokens[3]) - if lhs === nothing # name [comparison] bound - @assert rhs !== nothing - name = tokens[1] - if _is_less_than(tokens[2]) - # name <= bound - ub = rhs - elseif _is_greater_than(tokens[2]) - # name >= bound - lb = rhs - elseif _is_equal_to(tokens[2]) - lb = ub = rhs - else - error( - "Unable to parse bound due to invalid inequalities: $(line)", - ) - end - else # bound [comparison] name - @assert rhs === nothing - name = tokens[3] - if _is_less_than(tokens[2]) - # bound <= name - lb = lhs - elseif _is_greater_than(tokens[2]) - # bound >= name - ub = lhs - elseif _is_equal_to(tokens[2]) - lb = ub = lhs - else - error( - "Unable to parse bound due to invalid inequalities: $(line)", - ) - end - end - else - error("Unable to parse bound: $(line)") - end - x = _get_variable_from_name(model, cache, name) - if lb !== nothing && ub !== nothing - if lb == ub - _delete_default_lower_bound_if_present(model, cache, x) - MOI.add_constraint(model, x, MOI.EqualTo(lb)) - return - elseif typemin(T) < lb < ub < typemax(T) - _delete_default_lower_bound_if_present(model, cache, x) - # Do not add MOI.Interval constraints because we want to follow - # JuMP's convention of adding separate lower and upper bounds. - MOI.add_constraint(model, x, MOI.GreaterThan(lb)) - MOI.add_constraint(model, x, MOI.LessThan(ub)) - return - elseif lb == typemin(T) - _delete_default_lower_bound_if_present(model, cache, x) - if ub == typemax(T) - return # Explicitly free variable - end - end - end - if lb !== nothing && typemin(T) < lb - _delete_default_lower_bound_if_present(model, cache, x) - MOI.add_constraint(model, x, MOI.GreaterThan(lb)) - end - if ub !== nothing && ub < typemax(T) - if ub < 0 - # We only need to delete the default lower bound if the upper bound - # is less than 0. - _delete_default_lower_bound_if_present(model, cache, x) - end - MOI.add_constraint(model, x, MOI.LessThan(ub)) - end - return -end - -function _delete_default_lower_bound_if_present( - model::Model{T}, - cache, - x, -) where {T} - if !(x in cache.has_default_bound) - return - end - c = MOI.ConstraintIndex{MOI.VariableIndex,MOI.GreaterThan{T}}(x.value) - MOI.delete(model, c) - delete!(cache.has_default_bound, x) - return -end - -# _KW_INTEGER - -function _parse_section(::typeof(_KW_INTEGER), model, cache, line) - for token in _tokenize(line) - x = _get_variable_from_name(model, cache, token) - MOI.add_constraint(model, x, MOI.Integer()) - end - return -end - -# _KW_BINARY - -function _parse_section(::typeof(_KW_BINARY), model, cache, line) - for token in _tokenize(line) - x = _get_variable_from_name(model, cache, token) - MOI.add_constraint(model, x, MOI.ZeroOne()) - end - return -end - -# _KW_SOS - -function _parse_section( - ::typeof(_KW_SOS), - model::Model{T}, - cache::_ReadCache{T}, - line::AbstractString, -) where {T} - # SOS constraints can have all manner of whitespace issues with them. - # Normalize them here before attempting to do anything else. - line = replace(line, r"\s+:\s+" => ":") - line = replace(line, r"\s+::" => "::") - tokens = _tokenize(line) - if length(tokens) < 3 - error("Malformed SOS constraint: $(line)") - end - name = String(split(tokens[1], ":")[1]) - if tokens[2] == "S1::" - order = 1 - elseif tokens[2] == "S2::" - order = 2 - else - error("SOS of type $(tokens[2]) not recognised") - end - variables, weights = MOI.VariableIndex[], T[] - for token in tokens[3:end] - items = String.(split(token, ":")) - if length(items) != 2 - error("Invalid token in SOS constraint: $(token)") - end - push!(variables, _get_variable_from_name(model, cache, items[1])) - push!(weights, parse(T, items[2])) - end - c_ref = if tokens[2] == "S1::" - MOI.add_constraint(model, variables, MOI.SOS1(weights)) - else - @assert tokens[2] == "S2::" - MOI.add_constraint(model, variables, MOI.SOS2(weights)) - end - MOI.set(model, MOI.ConstraintName(), c_ref, name) - return -end - -# _KW_END - -function _parse_section( - ::typeof(_KW_END), - ::Model, - ::_ReadCache, - line::AbstractString, -) - return error("Corrupted LP File. You have the lne $(line) after an end.") -end - -function _strip_comment(line::String) - if occursin("\\", line) - m = match(r"(.*?)\\(.*)", line)::RegexMatch - return strip(String(m[1]::AbstractString)) - else - return strip(line) - end -end - -function _parse_section( - ::Val{:header}, - ::Model, - ::_ReadCache, - line::AbstractString, -) - return error("Unable to read LP file: unexpected line: $(line)") -end - -""" - Base.read!(io::IO, model::FileFormats.LP.Model) - -Read `io` in the LP file format and store the result in `model`. - -This reader attempts to follow the CPLEX LP format, because others like the -lpsolve version are very...flexible...in how they accept input. Read more about -them here: http://lpsolve.sourceforge.net -""" -function Base.read!(io::IO, model::Model{T}) where {T} - if !MOI.is_empty(model) - error("Cannot read in file because model is not empty.") - end - cache = _ReadCache{T}() - section = Val{:header}() - peeked_line = "" - while peeked_line !== nothing - line, peeked_line = _readline(io, peeked_line) - lower_line = lowercase(line) - if haskey(_KEYWORDS, lower_line) - section = _KEYWORDS[lower_line] - _set_objective_sense(section, model, lower_line) - continue - end - while _line_continues(section, peeked_line) - line, peeked_line = _readline(io, string(line, ' ', peeked_line)) - end - _parse_section(section, model, cache, line) - end - obj = if isempty(cache.quad_obj_terms) - cache.objective - else - MOI.ScalarQuadraticFunction( - cache.quad_obj_terms, - cache.objective.terms, - cache.objective.constant, - ) - end - MOI.set(model, MOI.ObjectiveFunction{typeof(obj)}(), obj) - return -end - -function _line_continues( - ::Union{typeof(_KW_OBJECTIVE),typeof(_KW_CONSTRAINTS)}, - peeked_line::AbstractString, -) - return any(Base.Fix1(startswith, peeked_line), ('+', '-')) -end - -_line_continues(::Any, ::Any) = false - -function _readline(io::IO, line::AbstractString) - if eof(io) - return line, nothing - end - peeked_line = _strip_comment(string(readline(io))) - if isempty(line) - # If the line is empty, go to the next - return _readline(io, peeked_line) - elseif isempty(peeked_line) - # If the peeked line is empty, get another - return _readline(io, line) - elseif any(Base.Fix1(endswith, line), ('+', '-', '[', '=')) - # If the line ends with a continuation character, read in the next line. - return _readline(io, string(line, " ", peeked_line)) - elseif any(Base.Fix1(startswith, peeked_line), (']', '/')) - # Always read in the next line if it starts with ] or /, which are used - # in quadratic functions. - return _readline(io, string(line, " ", peeked_line)) - end - return line, peeked_line -end +include("read.jl") end diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl new file mode 100644 index 0000000000..70b96af587 --- /dev/null +++ b/src/FileFormats/LP/read.jl @@ -0,0 +1,734 @@ +# Copyright (c) 2017: Miles Lubin and contributors +# Copyright (c) 2017: Google Inc. +# +# Use of this source code is governed by an MIT-style license that can be found +# in the LICENSE.md file or at https://opensource.org/licenses/MIT. + +struct Cache{T} + model::Model{T} + variable_name_to_index::Dict{String,MOI.VariableIndex} + variable_with_default_bound::Set{MOI.VariableIndex} + function Cache(model::Model{T}) where {T} + return new{T}( + model, + Dict{String,MOI.VariableIndex}(), + Set{MOI.VariableIndex}(), + ) + end +end + +""" + Base.read!(io::IO, model::FileFormats.LP.Model) + +Read `io` in the LP file format and store the result in `model`. + +This reader attempts to follow the CPLEX LP format, because others like the +lpsolve version are very...flexible...in how they accept input. Read more about +them here: http://lpsolve.sourceforge.net +""" +function Base.read!(io::IO, model::Model{T}) where {T} + if !MOI.is_empty(model) + error("Cannot read in file because model is not empty.") + end + state = LexerState(io) + cache = Cache(model) + keyword = :UNKNOWN + while (token = peek(state, Token)) !== nothing + if token.kind == _TOKEN_KEYWORD + read(state, Token) + keyword = Symbol(token.value) + continue + elseif token.kind == _TOKEN_NEWLINE + read(state, Token) + continue + elseif keyword == :MINIMIZE + MOI.set(cache.model, MOI.ObjectiveSense(), MOI.MIN_SENSE) + _parse_objective(state, cache) + keyword = :UNKNOWN + elseif keyword == :MAXIMIZE + MOI.set(cache.model, MOI.ObjectiveSense(), MOI.MAX_SENSE) + _parse_objective(state, cache) + keyword = :UNKNOWN + elseif keyword == :CONSTRAINTS + _parse_constraint(state, cache) + elseif keyword == :BINARY + x = _parse_variable(state, cache) + MOI.add_constraint(cache.model, x, MOI.ZeroOne()) + elseif keyword == :INTEGER + x = _parse_variable(state, cache) + MOI.add_constraint(cache.model, x, MOI.Integer()) + elseif keyword == :BOUNDS + _parse_bound(state, cache) + elseif keyword == :SOS + _parse_constraint(state, cache) + else + throw(UnexpectedToken(token)) + end + end + for x in cache.variable_with_default_bound + MOI.add_constraint(model, x, MOI.GreaterThan(0.0)) + end + return +end + +""" + const _KEYWORDS::Dict{String,Symbol} + +The LP file format is very permissive in what it allows users to call the +various sections. Here is a dictionary that maps possible user words +(normalized to lowercase, even though users can use mixed case) to the section. + +If you find new spellings for the section names, add them here. + +Special handling is needed in the lexer for the keywords that contain spaces. +""" +const _KEYWORDS = Dict( + # MAXIMIZE + "max" => :MAXIMIZE, + "maximize" => :MAXIMIZE, + "maximise" => :MAXIMIZE, + "maximum" => :MAXIMIZE, + # MINIMIZE + "min" => :MINIMIZE, + "minimize" => :MINIMIZE, + "minimise" => :MINIMIZE, + "minimum" => :MINIMIZE, + # CONSTRAINTS + "subject to" => :CONSTRAINTS, + "such that" => :CONSTRAINTS, + "st" => :CONSTRAINTS, + "s.t." => :CONSTRAINTS, + # BOUNDS + "bounds" => :BOUNDS, + "bound" => :BOUNDS, + # INTEGER + "gen" => :INTEGER, + "general" => :INTEGER, + "generals" => :INTEGER, + "integer" => :INTEGER, + "integers" => :INTEGER, + # BINARY + "bin" => :BINARY, + "binary" => :BINARY, + "binaries" => :BINARY, + # SOS + "sos" => :SOS, + # END + "end" => :END, +) + +@enum( + _TokenKind, + _TOKEN_KEYWORD, + _TOKEN_IDENTIFIER, + _TOKEN_NUMBER, + _TOKEN_ADDITION, + _TOKEN_SUBTRACTION, + _TOKEN_MULTIPLICATION, + _TOKEN_DIVISION, + _TOKEN_EXPONENT, + _TOKEN_OPEN_BRACKET, + _TOKEN_CLOSE_BRACKET, + _TOKEN_GREATER_THAN, + _TOKEN_LESS_THAN, + _TOKEN_EQUAL_TO, + _TOKEN_COLON, + _TOKEN_NEWLINE, + _TOKEN_UNKNOWN, +) +""" + const _OPERATORS::Dict{Char,_TokenKind} + +This dictionary is used to simplify the lexer for common operators. + +These operators must not contain spaces. +""" +const _OPERATORS = Dict{Char,_TokenKind}( + '+' => _TOKEN_ADDITION, + '-' => _TOKEN_SUBTRACTION, + '*' => _TOKEN_MULTIPLICATION, + '/' => _TOKEN_DIVISION, + '^' => _TOKEN_EXPONENT, + '[' => _TOKEN_OPEN_BRACKET, + ']' => _TOKEN_CLOSE_BRACKET, + '>' => _TOKEN_GREATER_THAN, + '<' => _TOKEN_LESS_THAN, + '=' => _TOKEN_EQUAL_TO, + ':' => _TOKEN_COLON, + '\n' => _TOKEN_NEWLINE, +) + +""" + struct Token + kind::_TokenKind + value::Union{Nothing,String} + end + +This struct is used to represent each token from the lexer. The `value` is the +unprocessed value. +""" +struct Token + kind::_TokenKind + value::Union{Nothing,String} +end + +""" + mutable struct LexerState + io::IO + peek_char::Union{Nothing,Char} + peek_tokens::Vector{Token} + end + +A struct that is used to manage state when lexing. + +It stores: + + * `io`: the IO object that we are streaming + * `peek_char`: the next `Char` in the `io` + * `peek_tokens`: the list of upcoming tokens that we have already peeked. +""" +mutable struct LexerState + io::IO + peek_char::Union{Nothing,Char} + peek_tokens::Vector{Token} + LexerState(io::IO) = new(io, nothing, Token[]) +end + +function Base.peek(state::LexerState, ::Type{Char}) + if state.peek_char === nothing && !eof(state.io) + state.peek_char = read(state.io, Char) + end + return state.peek_char +end + +function Base.read(state::LexerState, ::Type{Char}) + c = peek(state, Char) + state.peek_char = nothing + return c +end + +function Base.read(state::LexerState, ::Type{Token}) + token = peek(state, Token, 1) + popfirst!(state.peek_tokens) + return token +end + +_is_idenfifier(c::Char) = !(isspace(c) || c in ('+', '-', '*', '^', ':')) + +function Base.peek(state::LexerState, ::Type{Token}, n::Int = 1) + @assert n >= 1 + while length(state.peek_tokens) < n + token = _peek_inner(state) + if token === nothing + return nothing + end + push!(state.peek_tokens, token) + end + return state.peek_tokens[n] +end + +function _peek_inner(state::LexerState) + while (c = peek(state, Char)) !== nothing + if c == '\n' + read(state, Char) + return Token(_TOKEN_NEWLINE, "\n") + elseif isspace(c) # Whitespace + read(state, Char) + elseif c == '\\' # Comment: backslash until newline + while (c = read(state, Char)) !== nothing && c != '\n' + end + elseif isdigit(c) || (c == '-' && isdigit(peek(state, Char))) # Number + buf = IOBuffer() + while (c = peek(state, Char)) !== nothing && (isdigit(c) || c in ['.', 'e', 'E', '+', '-']) + write(buf, c) + read(state, Char) + end + return Token(_TOKEN_NUMBER, String(take!(buf))) + elseif isletter(c) || c == '_' # Identifier / keyword + buf = IOBuffer() + while (c = peek(state, Char)) !== nothing && _is_idenfifier(c) + write(buf, c) + read(state, Char) + end + val = String(take!(buf)) + l_val = lowercase(val) + if l_val == "subject" + t = peek(state, Token) + if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "to" + read(state, Token) # Skip "to" + return Token(_TOKEN_KEYWORD, "CONSTRAINTS") + end + elseif l_val == "such" + t = peek(state, Token) + if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "that" + read(state, Token) # Skip "such" + return Token(_TOKEN_KEYWORD, "CONSTRAINTS") + end + end + if (kw = get(_KEYWORDS, l_val, nothing)) !== nothing + return Token(_TOKEN_KEYWORD, string(kw)) + end + return Token(_TOKEN_IDENTIFIER, val) + elseif (op = get(_OPERATORS, c, nothing)) !== nothing + read(state, Char) + if c in ('<', '>', '=') && peek(state, Char) == '=' + read(state, Char) # Allow <=, >=, and == + end + return Token(op, string(c)) + else + throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "$c"))) + end + end + return +end + +""" + struct UnexpectedToken <: Exception + token::Token + end + +This error is thrown when we encounter an unexpected token when parsing the LP +file. No other information is available. + +TODO: we could improve this by storing line information or other context to help +the user diagnose the problem. +""" +struct UnexpectedToken <: Exception + token::Token +end + +function _expect(token::Token, kind::_TokenKind) + if token.kind != kind + throw(UnexpectedToken(token)) + end + return +end + +""" + _next_token_is(state::LexerState, kind::_TokenKind, n::Int = 1) + +A helper function to check if the token in `n` steps is of kind `kind`. +""" +function _next_token_is(state::LexerState, kind::_TokenKind, n::Int = 1) + if (t = peek(state, Token, n)) !== nothing + return t.kind == kind + end + return false +end + +function _skip_newlines(state::LexerState) + while _next_token_is(state, _TOKEN_NEWLINE) + read(state, Token) + end + return +end + +# IDENTIFIER --> "string" +# +# There _are_ rules to what an identifier can be. We handle these when lexing. +# Anything that makes it here is deemed acceptable. +function _parse_variable(state::LexerState, cache::Cache)::MOI.VariableIndex + _skip_newlines(state) + token = read(state, Token) + _expect(token, _TOKEN_IDENTIFIER) + x = get(cache.variable_name_to_index, token.value, nothing) + if x !== nothing + return x + end + x = MOI.add_variable(cache.model) + if length(token.value) > get_options(cache.model).maximum_length + error("Name exceeds maximum length: $(token.value)") + end + MOI.set(cache.model, MOI.VariableName(), x, token.value) + cache.variable_name_to_index[token.value] = x + push!(cache.variable_with_default_bound, x) + return x +end + +# NUMBER := +# "+" NUMBER +# | "-" NUMBER +# | "inf" +# | "infinity" +# | :(parse(T, x)) +function _parse_number(state::LexerState, cache::Cache{T})::T where {T} + _skip_newlines(state) + token = read(state, Token) + if token.kind == _TOKEN_ADDITION + return _parse_number(state, cache) + elseif token.kind == _TOKEN_SUBTRACTION + return -_parse_number(state, cache) + elseif token.kind == _TOKEN_IDENTIFIER + v = lowercase(token.value) + if v == "inf" || v == "infinity" + return typemax(T) + else + throw(UnexpectedToken(token)) + end + else + _expect(token, _TOKEN_NUMBER) + end + return parse(T, token.value) +end + +# QUAD_TERM := +# "+" QUAD_TERM +# | "-" QUAD_TERM +# | [NUMBER] IDENTIFIER "^" "2" +# | [NUMBER] IDENTIFIER "*" IDENTIFIER +function _parse_quad_term( + state::LexerState, + cache::Cache{T}, + prefix::T, +) where {T} + _skip_newlines(state) + if _next_token_is(state, _TOKEN_ADDITION) + read(state, Token) + return _parse_quad_term(state, cache, prefix) + elseif _next_token_is(state, _TOKEN_SUBTRACTION) + read(state, Token) + return _parse_quad_term(state, cache, -prefix) + end + coef = prefix + if _next_token_is(state, _TOKEN_NUMBER) + coef = prefix * _parse_number(state, cache) + end + x1 = _parse_variable(state, cache) + _skip_newlines(state) + if _next_token_is(state, _TOKEN_EXPONENT) + read(state, Token) # ^ + _skip_newlines(state) + n = read(state, Token) + if n.kind != _TOKEN_NUMBER && n.value != "2" + throw(UnexpectedToken(n)) + end + return MOI.ScalarQuadraticTerm(T(2) * coef, x1, x1) + end + token = read(state, Token) + _expect(token, _TOKEN_MULTIPLICATION) + x2 = _parse_variable(state, cache) + if x1 == x2 + coef *= T(2) + end + return MOI.ScalarQuadraticTerm(coef, x1, x2) +end + +# QUADRATIC_EXPRESSION := +# "[" QUAD_TERM (("+" | "-") QUAD_TERM)* "]" +# | "[" QUAD_TERM (("+" | "-") QUAD_TERM)* "]/2" +function _parse_quad_expression( + state::LexerState, + cache::Cache{T}, + prefix::T, +) where {T} + token = read(state, Token) + _expect(token, _TOKEN_OPEN_BRACKET) + f = zero(MOI.ScalarQuadraticFunction{T}) + push!(f.quadratic_terms, _parse_quad_term(state, cache, prefix)) + while (p = peek(state, Token)) !== nothing + if p.kind == _TOKEN_ADDITION + p = read(state, Token) + push!(f.quadratic_terms, _parse_quad_term(state, cache, prefix)) + elseif p.kind == _TOKEN_SUBTRACTION + p = read(state, Token) + push!(f.quadratic_terms, _parse_quad_term(state, cache, -prefix)) + elseif p.kind == _TOKEN_NEWLINE + read(state, Token) + elseif p.kind == _TOKEN_CLOSE_BRACKET + read(state, Token) + break + else + return throw(UnexpectedToken(p)) + end + end + _skip_newlines(state) + if _next_token_is(state, _TOKEN_DIVISION) + read(state, Token) # / + # Must be /2 + n = read(state, Token) + if n.kind != _TOKEN_NUMBER && n.value != "2" + throw(UnexpectedToken(n)) + end + for (i, term) in enumerate(f.quadratic_terms) + f.quadratic_terms[i] = MOI.ScalarQuadraticTerm( + term.coefficient / T(2), + term.variable_1, + term.variable_2, + ) + end + end + return f +end + +# TERM := +# "+" TERM +# | "-" TERM +# | NUMBER +# | IDENTIFIER +# | NUMBER IDENTIFIER +# | NUMBER "*" IDENTIFIER +# | QUADRATIC_EXPRESSION +function _parse_term( + state::LexerState, + cache::Cache{T}, + prefix::T = one(T), +) where {T} + _skip_newlines(state) + if _next_token_is(state, _TOKEN_ADDITION) + # "+" TERM + read(state, Token) + return _parse_term(state, cache, prefix) + elseif _next_token_is(state, _TOKEN_SUBTRACTION) + # "-" TERM + read(state, Token) + return _parse_term(state, cache, -prefix) + elseif _next_token_is(state, _TOKEN_IDENTIFIER) + # IDENTIFIER + x = _parse_variable(state, cache) + return MOI.ScalarAffineTerm(prefix, x) + elseif _next_token_is(state, _TOKEN_NUMBER) + coef = prefix * _parse_number(state, cache) + if _next_token_is(state, _TOKEN_IDENTIFIER) + # NUMBER IDENTIFIER + x = _parse_variable(state, cache) + return MOI.ScalarAffineTerm(coef, x) + elseif _next_token_is(state, _TOKEN_MULTIPLICATION) + # NUMBER * IDENTIFIER + read(state, token) # skip * + x = _parse_variable(state, cache) + return MOI.ScalarAffineTerm(coef, x) + else + # NUMBER + return coef + end + elseif _next_token_is(state, _TOKEN_OPEN_BRACKET) + # QUADRATIC_EXPRESSION + return _parse_quad_expression(state, cache, prefix) + end + return nothing +end + +function _add_to_expression!(f::MOI.ScalarQuadraticFunction{T}, x::T) where {T} + f.constant += x + return +end + +function _add_to_expression!( + f::MOI.ScalarQuadraticFunction{T}, + x::MOI.ScalarAffineTerm{T}, +) where {T} + push!(f.affine_terms, x) + return +end + +function _add_to_expression!( + f::MOI.ScalarQuadraticFunction{T}, + x::MOI.ScalarQuadraticFunction{T}, +) where {T} + MOI.Utilities.operate!(+, T, f, x) + return +end + + +# EXPRESSION := +# TERM (("+" | "-") TERM)* +function _parse_expression(state::LexerState, cache::Cache{T}) where {T} + f = zero(MOI.ScalarQuadraticFunction{T}) + _add_to_expression!(f, _parse_term(state, cache)) + while (p = peek(state, Token)) !== nothing + if p.kind == _TOKEN_ADDITION + p = read(state, Token) + _add_to_expression!(f, _parse_term(state, cache)) + elseif p.kind == _TOKEN_SUBTRACTION + p = read(state, Token) + _add_to_expression!(f, _parse_term(state, cache, -one(T))) + elseif p.kind == _TOKEN_NEWLINE + read(state, Token) + else + break + end + end + if isempty(f.quadratic_terms) + return MOI.ScalarAffineFunction(f.affine_terms, f.constant) + end + return f +end + +# SET_SUFFIX := +# "free" +# | ">=" NUMBER +# | "<=" NUMBER +# | "==" NUMBER +# +# There are other inequality operators that are supported, like `>`, `<`, and +# `=`. These are normalized when lexing. +function _parse_set_suffix(state, cache) + _skip_newlines(state) + p = read(state, Token) + if p.kind == _TOKEN_IDENTIFIER && lowercase(p.value) == "free" + return nothing + end + _skip_newlines(state) + if p.kind == _TOKEN_GREATER_THAN + rhs = _parse_number(state, cache) + return MOI.GreaterThan(rhs) + elseif p.kind == _TOKEN_LESS_THAN + rhs = _parse_number(state, cache) + return MOI.LessThan(rhs) + elseif p.kind == _TOKEN_EQUAL_TO + rhs = _parse_number(state, cache) + return MOI.EqualTo(rhs) + else + throw(UnexpectedToken(p)) + end +end + +# SET_PREFIX := +# NUMBER ">=" +# | NUMBER "<=" +# | NUMBER "==" +# +# There are other inequality operators that are supported, like `>`, `<`, and +# `=`. These are normalized when lexing. +function _parse_set_prefix(state, cache) + lhs = _parse_number(state, cache) + _skip_newlines(state) + p = read(state, Token) + if p.kind == _TOKEN_GREATER_THAN + return MOI.LessThan(lhs) + elseif p.kind == _TOKEN_LESS_THAN + return MOI.GreaterThan(lhs) + elseif p.kind == _TOKEN_EQUAL_TO + return MOI.EqualTo(lhs) + else + throw(UnexpectedToken(p)) + end +end + +# NAME --> [IDENTIFIER OP_COLON] +function _parse_optional_name(state::LexerState, cache::Cache) + _skip_newlines(state) + if _next_token_is(state, _TOKEN_IDENTIFIER, 1) && + _next_token_is(state, _TOKEN_COLON, 2) + name = read(state, Token) + read(state, Token) # Skip : + return name.value + end + return nothing +end + +# OBJECTIVE --> [NAME] EXPRESSION +function _parse_objective(state::LexerState, cache::Cache) + _ = _parse_optional_name(state, cache) + f = _parse_expression(state, cache) + MOI.set(cache.model, MOI.ObjectiveFunction{typeof(f)}(), f) + return +end + +function _add_bound(cache::Cache, x::MOI.VariableIndex, set::MOI.GreaterThan) + delete!(cache.variable_with_default_bound, x) + if isfinite(set.lower) + MOI.add_constraint(cache.model, x, set) + end + return +end + +function _add_bound(cache::Cache, x::MOI.VariableIndex, set::MOI.LessThan) + if set.upper < 0 + delete!(cache.variable_with_default_bound, x) + end + if isfinite(set.upper) + MOI.add_constraint(cache.model, x, set) + end + return +end + +function _add_bound(cache::Cache, x::MOI.VariableIndex, set::MOI.EqualTo) + delete!(cache.variable_with_default_bound, x) + MOI.add_constraint(cache.model, x, set) + return +end + +# x free +function _add_bound(cache::Cache, x::MOI.VariableIndex, ::Nothing) + delete!(cache.variable_with_default_bound, x) + return +end + +# BOUND --> +# IDENFITIER SET_SUFFIX +# | SET_PREFIX IDENTIFIER +# | SET_PREFIX IDENTIFIER SET_SUFFIX +function _parse_bound(state, cache) + if _next_token_is(state, _TOKEN_IDENTIFIER) # `x free` or `x op b` + x = _parse_variable(state, cache) + set = _parse_set_suffix(state, cache) + _add_bound(cache, x, set) + return + end + # `a op x` or `a op x op b` + lhs_set = _parse_set_prefix(state, cache) + x = _parse_variable(state, cache) + _add_bound(cache, x, lhs_set) + if _next_token_is(state, _TOKEN_GREATER_THAN) || + _next_token_is(state, _TOKEN_LESS_THAN) || + _next_token_is(state, _TOKEN_EQUAL_TO) # `a op x op b` + # We don't add MOI.Interval constraints to follow JuMP's convention of + # separate bounds. + rhs_set = _parse_set_suffix(state, cache) + _add_bound(cache, x, rhs_set) + end + return +end + +# SOS_CONSTRAINT := +# [NAME] S1:: (IDENTIFIER:NUMBER)+ \n +# | [NAME] S2:: (IDENTIFIER:NUMBER)+ \n +# +# The newline character is required. +function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T} + t = read(state, Token) # Si + _expect(read(state, Token), _TOKEN_COLON) + _expect(read(state, Token), _TOKEN_COLON) + f, w = MOI.VectorOfVariables(MOI.VariableIndex[]), T[] + while true + push!(f.variables, _parse_variable(state, cache)) + _expect(read(state, Token), _TOKEN_COLON) + push!(w, _parse_number(state, cache)) + if _next_token_is(state, _TOKEN_NEWLINE) + break + end + end + if t.value == "S1" + return MOI.add_constraint(cache.model, f, MOI.SOS1(w)) + else + return MOI.add_constraint(cache.model, f, MOI.SOS2(w)) + end +end + +function _is_sos_constraint(state) + t = peek(state, Token, 1) + return t.kind == _TOKEN_IDENTIFIER && + (t.value == "S1" || t.value == "S2") && + _next_token_is(state, _TOKEN_COLON, 2) && + _next_token_is(state, _TOKEN_COLON, 3) +end + +# CONSTRAINT := +# [NAME] EXPRESSION SET_SUFFIX +# | [NAME] SOS_CONSTRAINT +function _parse_constraint(state::LexerState, cache::Cache) + name = _parse_optional_name(state, cache) + # Check if this is an SOS constraint + c = if _is_sos_constraint(state) + _parse_sos_constraint(state, cache) + else + f = _parse_expression(state, cache) + set = _parse_set_suffix(state, cache) + MOI.add_constraint(cache.model, f, set) + end + if name !== nothing + MOI.set(cache.model, MOI.ConstraintName(), c, name) + end + return +end diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl index f58a54f061..de8e7e898b 100644 --- a/test/FileFormats/LP/LP.jl +++ b/test/FileFormats/LP/LP.jl @@ -446,7 +446,7 @@ function test_read_invalid() for filename in filter(f -> startswith(f, "invalid_"), readdir(models)) model = LP.Model() @test_throws( - ErrorException, + LP.UnexpectedToken, MOI.read_from_file(model, joinpath(models, filename)), ) end @@ -459,10 +459,7 @@ function test_read_unexpected_line() print(io, line) seekstart(io) model = LP.Model() - @test_throws( - ErrorException("Unable to read LP file: unexpected line: $(line)"), - read!(io, model), - ) + @test_throws LP.UnexpectedToken read!(io, model) return end @@ -513,7 +510,7 @@ function test_read_model1_tricky() @test occursin("CON4: 1 V5 + 1 V6 + 1 V7 <= 1", file) @test occursin("CON1: 1 V1 >= 0", file) @test occursin("CON5: [ 1 Var4 ^ 2 - 1.2 V5 * V1 ] <= 0", file) - @test occursin("R1: 1 V2 >= 2", file) + @test occursin("1 V2 >= 2", file) @test occursin("-infinity <= V1 <= 3", file) @test occursin("Var4 >= 5.5", file) @test occursin("V3 >= -3", file) @@ -580,7 +577,6 @@ function test_read_model2() end function test_read_objective_sense() - model = LP.Model() cases = Dict( "max" => MOI.MAX_SENSE, "maximize" => MOI.MAX_SENSE, @@ -592,7 +588,10 @@ function test_read_objective_sense() "minimum" => MOI.MIN_SENSE, ) for (sense, result) in cases - LP._set_objective_sense(LP._KW_OBJECTIVE, model, sense) + model = LP.Model() + io = IOBuffer("$sense x") + seekstart(io) + read!(io, model) @test MOI.get(model, MOI.ObjectiveSense()) == result end return @@ -1035,40 +1034,41 @@ function test_read_variable_bounds() return end -function test_read_indicator() - io = IOBuffer(""" - minimize - obj: 1 x - subject to - c: z = 1 -> x >= 0 - d: z = 0 -> x - y <= 1.2 - bounds - x free - z free - binary - z - end - """) - model = MOI.FileFormats.Model(format = MOI.FileFormats.FORMAT_LP) - read!(io, model) - io = IOBuffer() - write(io, model) - seekstart(io) - @test read(io, String) == """ - minimize - obj: 1 x - subject to - d: z = 0 -> 1 x - 1 y <= 1.2 - c: z = 1 -> 1 x >= 0 - Bounds - x free - y >= 0 - Binary - z - End - """ - return -end +# TODO(odow): FIXME +# function test_read_indicator() +# io = IOBuffer(""" +# minimize +# obj: 1 x +# subject to +# c: z = 1 -> x >= 0 +# d: z = 0 -> x - y <= 1.2 +# bounds +# x free +# z free +# binary +# z +# end +# """) +# model = MOI.FileFormats.Model(format = MOI.FileFormats.FORMAT_LP) +# read!(io, model) +# io = IOBuffer() +# write(io, model) +# seekstart(io) +# @test read(io, String) == """ +# minimize +# obj: 1 x +# subject to +# d: z = 0 -> 1 x - 1 y <= 1.2 +# c: z = 1 -> 1 x >= 0 +# Bounds +# x free +# y >= 0 +# Binary +# z +# End +# """ +# return +# end function test_VectorAffineFunction_SOS() model = MOI.FileFormats.LP.Model() @@ -1092,10 +1092,7 @@ function test_invalid_token_in_sos() """, ) seekstart(io) - @test_throws( - ErrorException("Invalid token in SOS constraint: x"), - read!(io, model), - ) + @test_throws LP.UnexpectedToken read!(io, model) return end @@ -1109,7 +1106,7 @@ function test_unable_to_parse_bound() end """) model = LP.Model() - @test_throws(ErrorException("Unable to parse bound: x"), read!(io, model)) + @test_throws LP.UnexpectedToken read!(io, model) return end diff --git a/test/FileFormats/LP/models/invalid_affine_term_constraint.lp b/test/FileFormats/LP/models/invalid_affine_term_constraint.lp deleted file mode 100644 index 334a6bbd5c..0000000000 --- a/test/FileFormats/LP/models/invalid_affine_term_constraint.lp +++ /dev/null @@ -1,13 +0,0 @@ -\ File: lo1.lp -maximize -obj: 3 x1 + x2 + 5 x3 + x4 -subject to -c1: 3 x1 + x2 + 2 x3 = 30 -c2: 2 x1 + x2 + - 3 x3 + x4 >= 15 -c3: 2 x2 + 3 x4 <= 25 -bounds - 0 <= x1 <= +infinity - 0 <= x2 <= 10 - 0 <= x3 <= +infinity - 0 <= x4 <= +infinity -end diff --git a/test/FileFormats/LP/models/invalid_affine_term_objective.lp b/test/FileFormats/LP/models/invalid_affine_term_objective.lp deleted file mode 100644 index 8844216423..0000000000 --- a/test/FileFormats/LP/models/invalid_affine_term_objective.lp +++ /dev/null @@ -1,13 +0,0 @@ -\ File: lo1.lp -maximize -obj: 3 x1 + + x2 + 5 x3 + x4 -subject to -c1: 3 x1 + x2 + 2 x3 = 30 -c2: 2 x1 + x2 + 3 x3 + x4 >= 15 -c3: 2 x2 + 3 x4 <= 25 -bounds - 0 <= x1 <= +infinity - 0 <= x2 <= 10 - 0 <= x3 <= +infinity - 0 <= x4 <= +infinity -end diff --git a/test/FileFormats/LP/models/invalid_bound.lp b/test/FileFormats/LP/models/invalid_bound.lp deleted file mode 100644 index 3015587726..0000000000 --- a/test/FileFormats/LP/models/invalid_bound.lp +++ /dev/null @@ -1,13 +0,0 @@ -\ File: lo1.lp -maximize -obj: 3 x1 + x2 + 5 x3 + x4 -subject to -c1: 3 x1 + x2 + 2 x3 = 30 -c2: 2 x1 + x2 + 3 x3 + x4 >= 15 -c3: 2 x2 + 3 x4 <= 25 -bounds - 0 <= x1 <= +infinity - 0 >= x2 <= 10 - 0 <= x3 <= +infinity - 0 <= x4 <= +infinity -end From 9b9c175fb37185155a6b1167920560b40b00145c Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 11 Sep 2025 11:28:29 +1200 Subject: [PATCH 02/10] Update --- src/FileFormats/LP/read.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index 70b96af587..0a4e1ef320 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -617,9 +617,13 @@ function _parse_optional_name(state::LexerState, cache::Cache) return nothing end -# OBJECTIVE --> [NAME] EXPRESSION +# OBJECTIVE --> [NAME] [EXPRESSION] function _parse_objective(state::LexerState, cache::Cache) _ = _parse_optional_name(state, cache) + _skip_newlines(state) + if _next_token_is(state, _TOKEN_KEYWORD) + return # A line like `obj:\nsubject to` + end f = _parse_expression(state, cache) MOI.set(cache.model, MOI.ObjectiveFunction{typeof(f)}(), f) return From 5349cabf753a386899b43c568820c73b0358cc79 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 11 Sep 2025 11:39:41 +1200 Subject: [PATCH 03/10] Update --- src/FileFormats/LP/read.jl | 46 +++++++++++++++++++++++-- test/FileFormats/LP/LP.jl | 69 +++++++++++++++++++------------------- 2 files changed, 78 insertions(+), 37 deletions(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index 0a4e1ef320..1753b25af2 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -133,6 +133,7 @@ const _KEYWORDS = Dict( _TOKEN_LESS_THAN, _TOKEN_EQUAL_TO, _TOKEN_COLON, + _TOKEN_IMPLIES, _TOKEN_NEWLINE, _TOKEN_UNKNOWN, ) @@ -270,8 +271,11 @@ function _peek_inner(state::LexerState) end return Token(_TOKEN_IDENTIFIER, val) elseif (op = get(_OPERATORS, c, nothing)) !== nothing - read(state, Char) - if c in ('<', '>', '=') && peek(state, Char) == '=' + read(state, Char) # Skip c + if c == '-' && peek(state, Char) == '>' + read(state, Char) + return Token(_TOKEN_IMPLIES, "->") + elseif c in ('<', '>', '=') && peek(state, Char) == '=' read(state, Char) # Allow <=, >=, and == end return Token(op, string(c)) @@ -718,14 +722,52 @@ function _is_sos_constraint(state) _next_token_is(state, _TOKEN_COLON, 3) end +function _is_indicator_constraint(state) + return _next_token_is(state, _TOKEN_IDENTIFIER, 1) && + _next_token_is(state, _TOKEN_EQUAL_TO, 2) && + _next_token_is(state, _TOKEN_NUMBER, 3) && + _next_token_is(state, _TOKEN_IMPLIES, 4) +end + +# INDICATOR_CONSTRAINT := +# IDENTIFIER "=" "0" "->" EXPRESSION SET_SUFFIX +# | IDENTIFIER "=" "1" "->" EXPRESSION SET_SUFFIX +function _parse_indicator_constraint( + state::LexerState, + cache::Cache{T}, +) where {T} + z = _parse_variable(state, cache) + _expect(read(state, Token), _TOKEN_EQUAL_TO) + t = read(state, Token) + _expect(t, _TOKEN_NUMBER) + indicator = if t.value == "0" + MOI.ACTIVATE_ON_ZERO + elseif t.value == "1" + MOI.ACTIVATE_ON_ONE + else + throw(UnexpectedToken(t)) + end + _expect(read(state, Token), _TOKEN_IMPLIES) + f = _parse_expression(state, cache) + set = _parse_set_suffix(state, cache) + return MOI.add_constraint( + cache.model, + MOI.Utilities.operate(vcat, T, z, f), + MOI.Indicator{indicator}(set), + ) +end + # CONSTRAINT := # [NAME] EXPRESSION SET_SUFFIX # | [NAME] SOS_CONSTRAINT +# | [NAME] INDICATOR_CONSTRAINT function _parse_constraint(state::LexerState, cache::Cache) name = _parse_optional_name(state, cache) # Check if this is an SOS constraint c = if _is_sos_constraint(state) _parse_sos_constraint(state, cache) + elseif _is_indicator_constraint(state) + _parse_indicator_constraint(state, cache) else f = _parse_expression(state, cache) set = _parse_set_suffix(state, cache) diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl index de8e7e898b..d2d8e8120a 100644 --- a/test/FileFormats/LP/LP.jl +++ b/test/FileFormats/LP/LP.jl @@ -1034,41 +1034,40 @@ function test_read_variable_bounds() return end -# TODO(odow): FIXME -# function test_read_indicator() -# io = IOBuffer(""" -# minimize -# obj: 1 x -# subject to -# c: z = 1 -> x >= 0 -# d: z = 0 -> x - y <= 1.2 -# bounds -# x free -# z free -# binary -# z -# end -# """) -# model = MOI.FileFormats.Model(format = MOI.FileFormats.FORMAT_LP) -# read!(io, model) -# io = IOBuffer() -# write(io, model) -# seekstart(io) -# @test read(io, String) == """ -# minimize -# obj: 1 x -# subject to -# d: z = 0 -> 1 x - 1 y <= 1.2 -# c: z = 1 -> 1 x >= 0 -# Bounds -# x free -# y >= 0 -# Binary -# z -# End -# """ -# return -# end +function test_read_indicator() + io = IOBuffer(""" + minimize + obj: 1 x + subject to + c: z = 1 -> x >= 0 + d: z = 0 -> x - y <= 1.2 + bounds + x free + z free + binary + z + end + """) + model = MOI.FileFormats.Model(format = MOI.FileFormats.FORMAT_LP) + read!(io, model) + io = IOBuffer() + write(io, model) + seekstart(io) + @test read(io, String) == """ + minimize + obj: 1 x + subject to + d: z = 0 -> 1 x - 1 y <= 1.2 + c: z = 1 -> 1 x >= 0 + Bounds + x free + y >= 0 + Binary + z + End + """ + return +end function test_VectorAffineFunction_SOS() model = MOI.FileFormats.LP.Model() From b0d32f30106d31242c846f6fa706cded277a1c05 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 11 Sep 2025 11:44:11 +1200 Subject: [PATCH 04/10] Update --- src/FileFormats/LP/read.jl | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index 1753b25af2..5a20a4d3b8 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -29,7 +29,7 @@ them here: http://lpsolve.sourceforge.net function Base.read!(io::IO, model::Model{T}) where {T} if !MOI.is_empty(model) error("Cannot read in file because model is not empty.") - end + end state = LexerState(io) cache = Cache(model) keyword = :UNKNOWN @@ -215,6 +215,7 @@ function Base.read(state::LexerState, ::Type{Token}) end _is_idenfifier(c::Char) = !(isspace(c) || c in ('+', '-', '*', '^', ':')) +_is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-') function Base.peek(state::LexerState, ::Type{Token}, n::Int = 1) @assert n >= 1 @@ -240,7 +241,7 @@ function _peek_inner(state::LexerState) end elseif isdigit(c) || (c == '-' && isdigit(peek(state, Char))) # Number buf = IOBuffer() - while (c = peek(state, Char)) !== nothing && (isdigit(c) || c in ['.', 'e', 'E', '+', '-']) + while (c = peek(state, Char)) !== nothing && _is_number(c) write(buf, c) read(state, Char) end @@ -391,7 +392,7 @@ function _parse_quad_term( return _parse_quad_term(state, cache, prefix) elseif _next_token_is(state, _TOKEN_SUBTRACTION) read(state, Token) - return _parse_quad_term(state, cache, -prefix) + return _parse_quad_term(state, cache, -prefix) end coef = prefix if _next_token_is(state, _TOKEN_NUMBER) @@ -533,7 +534,6 @@ function _add_to_expression!( return end - # EXPRESSION := # TERM (("+" | "-") TERM)* function _parse_expression(state::LexerState, cache::Cache{T}) where {T} @@ -679,8 +679,8 @@ function _parse_bound(state, cache) x = _parse_variable(state, cache) _add_bound(cache, x, lhs_set) if _next_token_is(state, _TOKEN_GREATER_THAN) || - _next_token_is(state, _TOKEN_LESS_THAN) || - _next_token_is(state, _TOKEN_EQUAL_TO) # `a op x op b` + _next_token_is(state, _TOKEN_LESS_THAN) || + _next_token_is(state, _TOKEN_EQUAL_TO) # `a op x op b` # We don't add MOI.Interval constraints to follow JuMP's convention of # separate bounds. rhs_set = _parse_set_suffix(state, cache) @@ -696,6 +696,9 @@ end # The newline character is required. function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T} t = read(state, Token) # Si + if !(t.value == "S1" || t.value == "S2") + throw(UnexpectedToken(t)) + end _expect(read(state, Token), _TOKEN_COLON) _expect(read(state, Token), _TOKEN_COLON) f, w = MOI.VectorOfVariables(MOI.VariableIndex[]), T[] @@ -715,18 +718,16 @@ function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T} end function _is_sos_constraint(state) - t = peek(state, Token, 1) - return t.kind == _TOKEN_IDENTIFIER && - (t.value == "S1" || t.value == "S2") && - _next_token_is(state, _TOKEN_COLON, 2) && - _next_token_is(state, _TOKEN_COLON, 3) + return _next_token_is(state, _TOKEN_IDENTIFIER, 1) && + _next_token_is(state, _TOKEN_COLON, 2) && + _next_token_is(state, _TOKEN_COLON, 3) end function _is_indicator_constraint(state) return _next_token_is(state, _TOKEN_IDENTIFIER, 1) && - _next_token_is(state, _TOKEN_EQUAL_TO, 2) && - _next_token_is(state, _TOKEN_NUMBER, 3) && - _next_token_is(state, _TOKEN_IMPLIES, 4) + _next_token_is(state, _TOKEN_EQUAL_TO, 2) && + _next_token_is(state, _TOKEN_NUMBER, 3) && + _next_token_is(state, _TOKEN_IMPLIES, 4) end # INDICATOR_CONSTRAINT := From 5c28ebe92663c834f459600db7a8a7f25e1fcb83 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 11 Sep 2025 13:35:58 +1200 Subject: [PATCH 05/10] Update --- src/FileFormats/LP/read.jl | 154 +++++++++++++------------ test/FileFormats/LP/LP.jl | 230 +++++++++++++++++++++++++++++++++++++ 2 files changed, 312 insertions(+), 72 deletions(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index 5a20a4d3b8..0a17a0e478 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -35,11 +35,11 @@ function Base.read!(io::IO, model::Model{T}) where {T} keyword = :UNKNOWN while (token = peek(state, Token)) !== nothing if token.kind == _TOKEN_KEYWORD - read(state, Token) + _ = read(state, Token) keyword = Symbol(token.value) continue elseif token.kind == _TOKEN_NEWLINE - read(state, Token) + _ = read(state, Token) continue elseif keyword == :MINIMIZE MOI.set(cache.model, MOI.ObjectiveSense(), MOI.MIN_SENSE) @@ -173,6 +173,28 @@ struct Token value::Union{Nothing,String} end +""" + struct UnexpectedToken <: Exception + token::Token + end + +This error is thrown when we encounter an unexpected token when parsing the LP +file. No other information is available. + +TODO: we could improve this by storing line information or other context to help +the user diagnose the problem. +""" +struct UnexpectedToken <: Exception + token::Token +end + +function _expect(token::Token, kind::_TokenKind) + if token.kind != kind + throw(UnexpectedToken(token)) + end + return token +end + """ mutable struct LexerState io::IO @@ -210,10 +232,18 @@ end function Base.read(state::LexerState, ::Type{Token}) token = peek(state, Token, 1) + if isempty(state.peek_tokens) + throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "EOF"))) + end popfirst!(state.peek_tokens) return token end +function Base.read(state::LexerState, ::Type{Token}, kind::_TokenKind) + token = read(state, Token) + return _expect(token, kind) +end + _is_idenfifier(c::Char) = !(isspace(c) || c in ('+', '-', '*', '^', ':')) _is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-') @@ -257,13 +287,13 @@ function _peek_inner(state::LexerState) if l_val == "subject" t = peek(state, Token) if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "to" - read(state, Token) # Skip "to" + _ = read(state, Token) # Skip "to" return Token(_TOKEN_KEYWORD, "CONSTRAINTS") end elseif l_val == "such" t = peek(state, Token) if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "that" - read(state, Token) # Skip "such" + _ = read(state, Token) # Skip "such" return Token(_TOKEN_KEYWORD, "CONSTRAINTS") end end @@ -276,8 +306,11 @@ function _peek_inner(state::LexerState) if c == '-' && peek(state, Char) == '>' read(state, Char) return Token(_TOKEN_IMPLIES, "->") + elseif c == '=' && peek(state, Char) in ('<', '>') + c = read(state, Char) # Allow =< and => as <= and >= + return Token(_OPERATORS[c], string(c)) elseif c in ('<', '>', '=') && peek(state, Char) == '=' - read(state, Char) # Allow <=, >=, and == + _ = read(state, Char) # Allow <=, >=, and == end return Token(op, string(c)) else @@ -287,28 +320,6 @@ function _peek_inner(state::LexerState) return end -""" - struct UnexpectedToken <: Exception - token::Token - end - -This error is thrown when we encounter an unexpected token when parsing the LP -file. No other information is available. - -TODO: we could improve this by storing line information or other context to help -the user diagnose the problem. -""" -struct UnexpectedToken <: Exception - token::Token -end - -function _expect(token::Token, kind::_TokenKind) - if token.kind != kind - throw(UnexpectedToken(token)) - end - return -end - """ _next_token_is(state::LexerState, kind::_TokenKind, n::Int = 1) @@ -323,19 +334,18 @@ end function _skip_newlines(state::LexerState) while _next_token_is(state, _TOKEN_NEWLINE) - read(state, Token) + _ = read(state, Token) end return end -# IDENTIFIER --> "string" +# IDENTIFIER := "string" # # There _are_ rules to what an identifier can be. We handle these when lexing. # Anything that makes it here is deemed acceptable. function _parse_variable(state::LexerState, cache::Cache)::MOI.VariableIndex _skip_newlines(state) - token = read(state, Token) - _expect(token, _TOKEN_IDENTIFIER) + token = read(state, Token, _TOKEN_IDENTIFIER) x = get(cache.variable_name_to_index, token.value, nothing) if x !== nothing return x @@ -370,17 +380,16 @@ function _parse_number(state::LexerState, cache::Cache{T})::T where {T} else throw(UnexpectedToken(token)) end - else - _expect(token, _TOKEN_NUMBER) end + _expect(token, _TOKEN_NUMBER) return parse(T, token.value) end # QUAD_TERM := # "+" QUAD_TERM # | "-" QUAD_TERM -# | [NUMBER] IDENTIFIER "^" "2" -# | [NUMBER] IDENTIFIER "*" IDENTIFIER +# | [NUMBER] [*] IDENTIFIER "^" "2" +# | [NUMBER] [*] IDENTIFIER "*" IDENTIFIER function _parse_quad_term( state::LexerState, cache::Cache{T}, @@ -388,29 +397,32 @@ function _parse_quad_term( ) where {T} _skip_newlines(state) if _next_token_is(state, _TOKEN_ADDITION) - read(state, Token) + _ = read(state, Token) return _parse_quad_term(state, cache, prefix) elseif _next_token_is(state, _TOKEN_SUBTRACTION) - read(state, Token) + _ = read(state, Token) return _parse_quad_term(state, cache, -prefix) end coef = prefix if _next_token_is(state, _TOKEN_NUMBER) coef = prefix * _parse_number(state, cache) end + if _next_token_is(state, _TOKEN_MULTIPLICATION) + _skip_newlines(state) + _ = read(state, Token) # Skip optional multiplication + end x1 = _parse_variable(state, cache) _skip_newlines(state) if _next_token_is(state, _TOKEN_EXPONENT) - read(state, Token) # ^ + _ = read(state, Token) # ^ _skip_newlines(state) - n = read(state, Token) - if n.kind != _TOKEN_NUMBER && n.value != "2" + n = read(state, Token, _TOKEN_NUMBER) + if n.value != "2" throw(UnexpectedToken(n)) end return MOI.ScalarQuadraticTerm(T(2) * coef, x1, x1) end - token = read(state, Token) - _expect(token, _TOKEN_MULTIPLICATION) + token = read(state, Token, _TOKEN_MULTIPLICATION) x2 = _parse_variable(state, cache) if x1 == x2 coef *= T(2) @@ -426,8 +438,7 @@ function _parse_quad_expression( cache::Cache{T}, prefix::T, ) where {T} - token = read(state, Token) - _expect(token, _TOKEN_OPEN_BRACKET) + token = read(state, Token, _TOKEN_OPEN_BRACKET) f = zero(MOI.ScalarQuadraticFunction{T}) push!(f.quadratic_terms, _parse_quad_term(state, cache, prefix)) while (p = peek(state, Token)) !== nothing @@ -438,9 +449,9 @@ function _parse_quad_expression( p = read(state, Token) push!(f.quadratic_terms, _parse_quad_term(state, cache, -prefix)) elseif p.kind == _TOKEN_NEWLINE - read(state, Token) + _ = read(state, Token) elseif p.kind == _TOKEN_CLOSE_BRACKET - read(state, Token) + _ = read(state, Token) break else return throw(UnexpectedToken(p)) @@ -448,10 +459,10 @@ function _parse_quad_expression( end _skip_newlines(state) if _next_token_is(state, _TOKEN_DIVISION) - read(state, Token) # / + _ = read(state, Token) # / # Must be /2 - n = read(state, Token) - if n.kind != _TOKEN_NUMBER && n.value != "2" + n = read(state, Token, _TOKEN_NUMBER) + if n.value != "2" throw(UnexpectedToken(n)) end for (i, term) in enumerate(f.quadratic_terms) @@ -481,11 +492,11 @@ function _parse_term( _skip_newlines(state) if _next_token_is(state, _TOKEN_ADDITION) # "+" TERM - read(state, Token) + _ = read(state, Token, _TOKEN_ADDITION) return _parse_term(state, cache, prefix) elseif _next_token_is(state, _TOKEN_SUBTRACTION) # "-" TERM - read(state, Token) + _ = read(state, Token, _TOKEN_SUBTRACTION) return _parse_term(state, cache, -prefix) elseif _next_token_is(state, _TOKEN_IDENTIFIER) # IDENTIFIER @@ -499,7 +510,7 @@ function _parse_term( return MOI.ScalarAffineTerm(coef, x) elseif _next_token_is(state, _TOKEN_MULTIPLICATION) # NUMBER * IDENTIFIER - read(state, token) # skip * + _ = read(state, Token, _TOKEN_MULTIPLICATION) x = _parse_variable(state, cache) return MOI.ScalarAffineTerm(coef, x) else @@ -510,7 +521,7 @@ function _parse_term( # QUADRATIC_EXPRESSION return _parse_quad_expression(state, cache, prefix) end - return nothing + return throw(UnexpectedToken(peek(state, Token))) end function _add_to_expression!(f::MOI.ScalarQuadraticFunction{T}, x::T) where {T} @@ -547,7 +558,7 @@ function _parse_expression(state::LexerState, cache::Cache{T}) where {T} p = read(state, Token) _add_to_expression!(f, _parse_term(state, cache, -one(T))) elseif p.kind == _TOKEN_NEWLINE - read(state, Token) + _ = read(state, Token) else break end @@ -609,19 +620,19 @@ function _parse_set_prefix(state, cache) end end -# NAME --> [IDENTIFIER OP_COLON] +# NAME := [IDENTIFIER :] function _parse_optional_name(state::LexerState, cache::Cache) _skip_newlines(state) if _next_token_is(state, _TOKEN_IDENTIFIER, 1) && _next_token_is(state, _TOKEN_COLON, 2) name = read(state, Token) - read(state, Token) # Skip : + _ = read(state, Token) # Skip : return name.value end return nothing end -# OBJECTIVE --> [NAME] [EXPRESSION] +# OBJECTIVE := [NAME] [EXPRESSION] function _parse_objective(state::LexerState, cache::Cache) _ = _parse_optional_name(state, cache) _skip_newlines(state) @@ -663,7 +674,7 @@ function _add_bound(cache::Cache, x::MOI.VariableIndex, ::Nothing) return end -# BOUND --> +# BOUND := # IDENFITIER SET_SUFFIX # | SET_PREFIX IDENTIFIER # | SET_PREFIX IDENTIFIER SET_SUFFIX @@ -689,22 +700,28 @@ function _parse_bound(state, cache) return end +function _is_sos_constraint(state) + return _next_token_is(state, _TOKEN_IDENTIFIER, 1) && + _next_token_is(state, _TOKEN_COLON, 2) && + _next_token_is(state, _TOKEN_COLON, 3) +end + # SOS_CONSTRAINT := # [NAME] S1:: (IDENTIFIER:NUMBER)+ \n # | [NAME] S2:: (IDENTIFIER:NUMBER)+ \n # # The newline character is required. function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T} - t = read(state, Token) # Si + t = read(state, Token, _TOKEN_IDENTIFIER) # Si if !(t.value == "S1" || t.value == "S2") throw(UnexpectedToken(t)) end - _expect(read(state, Token), _TOKEN_COLON) - _expect(read(state, Token), _TOKEN_COLON) + _ = read(state, Token, _TOKEN_COLON) + _ = read(state, Token, _TOKEN_COLON) f, w = MOI.VectorOfVariables(MOI.VariableIndex[]), T[] while true push!(f.variables, _parse_variable(state, cache)) - _expect(read(state, Token), _TOKEN_COLON) + _ = read(state, Token, _TOKEN_COLON) push!(w, _parse_number(state, cache)) if _next_token_is(state, _TOKEN_NEWLINE) break @@ -717,12 +734,6 @@ function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T} end end -function _is_sos_constraint(state) - return _next_token_is(state, _TOKEN_IDENTIFIER, 1) && - _next_token_is(state, _TOKEN_COLON, 2) && - _next_token_is(state, _TOKEN_COLON, 3) -end - function _is_indicator_constraint(state) return _next_token_is(state, _TOKEN_IDENTIFIER, 1) && _next_token_is(state, _TOKEN_EQUAL_TO, 2) && @@ -738,9 +749,8 @@ function _parse_indicator_constraint( cache::Cache{T}, ) where {T} z = _parse_variable(state, cache) - _expect(read(state, Token), _TOKEN_EQUAL_TO) - t = read(state, Token) - _expect(t, _TOKEN_NUMBER) + _ = read(state, Token, _TOKEN_EQUAL_TO) + t = read(state, Token, _TOKEN_NUMBER) indicator = if t.value == "0" MOI.ACTIVATE_ON_ZERO elseif t.value == "1" @@ -748,7 +758,7 @@ function _parse_indicator_constraint( else throw(UnexpectedToken(t)) end - _expect(read(state, Token), _TOKEN_IMPLIES) + _ = read(state, Token, _TOKEN_IMPLIES) f = _parse_expression(state, cache) set = _parse_set_suffix(state, cache) return MOI.add_constraint( diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl index d2d8e8120a..7d4ce7707b 100644 --- a/test/FileFormats/LP/LP.jl +++ b/test/FileFormats/LP/LP.jl @@ -1202,6 +1202,236 @@ function test_unsupported_objectives() return end +function test_subject_to_name() + for (case, err) in [ + "subject to" => false, + "Subject To" => false, + "such that" => false, + "Such That" => false, + "st" => false, + "s.t." => false, + "subject that" => true, + "subject\nto" => true, + "s. t." => true, + "such to" => true, + ] + io = IOBuffer("Minimize\nobj: x\n$case\n2x == 1\nBounds\nx free\nEnd") + seekstart(io) + model = MOI.FileFormats.LP.Model() + if err + @test_throws LP.UnexpectedToken read!(io, model) + else + read!(io, model) + out = IOBuffer() + write(out, model) + seekstart(out) + file = read(out, String) + @test occursin("subject to\nc1: 2 x = 1\n", file) + end + end + return +end + +function test_parse_number() + cache = LP.Cache(LP.Model{Float64}()) + for (input, result) in [ + "1" => 1.0, + "02" => 2.0, + "- 1" => -1.0, + "- -1" => 1.0, + "+ 1" => 1.0, + "+ -1" => -1.0, + "- + 1" => -1.0, + "+ + 1" => 1.0, + "+ - + 1" => -1.0, + "+ - + -1" => 1.0, + "inf" => Inf, + "-inf" => -Inf, + "- inf" => - Inf, + "iNf" => Inf, + "iNfinitY" => Inf, + "infinity" => Inf, + "1.23e+01" => 12.3, + "1.23e-1" => 0.123, + "1.23E-1" => 0.123, + "1.23E+3" => 1230.0, + ] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + @test LP._parse_number(state, cache) == result + end + for input in ["x", "abc", "ten"] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + @test_throws LP.UnexpectedToken LP._parse_number(state, cache) + end + return +end + +function test_parse_quad_term() + cache = LP.Cache(LP.Model{Float64}()) + # Diagonal + for (input, coef) in [ + "x * x" => 2.0, + "\nx * x" => 2.0, + "x\n * x" => 2.0, + "x * \n x" => 2.0, + "x^2" => 2.0, + "x ^ 2" => 2.0, + "+ x * x" => 2.0, + "+ 2 * x * x" => 4.0, + "- x * x" => -2.0, + "- 2 * x * x" => -4.0, + "-2 x * x" => -4.0, + "2.2 x * x" => 4.4, + ] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + term = LP._parse_quad_term(state, cache, 1.0) + x = cache.variable_name_to_index["x"] + @test term == MOI.ScalarQuadraticTerm(coef, x, x) + seekstart(io) + term = LP._parse_quad_term(state, cache, -1.0) + @test term == MOI.ScalarQuadraticTerm(-coef, x, x) + end + # Off-diagonal + for (input, coef) in [ + "x * y" => 1.0, + "\nx * y" => 1.0, + "x\n * y" => 1.0, + "x * \n y" => 1.0, + "+ x * y" => 1.0, + "+ 2 * x * y" => 2.0, + "- x * y" => -1.0, + "- 2 * x * y" => -2.0, + "2.2 * x * y" => 2.2, + "2.2 x * y" => 2.2, + ] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + term = LP._parse_quad_term(state, cache, 1.0) + x = cache.variable_name_to_index["x"] + y = cache.variable_name_to_index["y"] + @test term == MOI.ScalarQuadraticTerm(coef, x, y) + seekstart(io) + term = LP._parse_quad_term(state, cache, -1.0) + @test term == MOI.ScalarQuadraticTerm(-coef, x, y) + end + for input in ["x^", "x^x", "x^0", "x^1", "x^3", "x * 2 * x"] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + @test_throws LP.UnexpectedToken LP._parse_quad_term(state, cache, -1.0) + end + return +end + +function test_parse_term() + cache = LP.Cache(LP.Model{Float64}()) + for (input, coef) in [ + "x" => 1.0, + "+ x" => 1.0, + "- x" => -1.0, + "- -x" => 1.0, + "+ -x" => -1.0, + "2.0 x" => 2.0, + "3.0 x" => 3.0, + "2.0 * x" => 2.0, + "3.2 * x" => 3.2, + ] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + term = LP._parse_term(state, cache, 1.0) + x = cache.variable_name_to_index["x"] + @test term == MOI.ScalarAffineTerm(coef, x) + seekstart(io) + term = LP._parse_term(state, cache, -1.0) + @test term == MOI.ScalarAffineTerm(-coef, x) + end + for input in ["subject to", ">= 1"] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + @test_throws LP.UnexpectedToken LP._parse_term(state, cache, 1.0) + end + return +end + +function test_parse_quad_expression() + cache = LP.Cache(LP.Model{Float64}()) + for input in ["x^2", "[ x^2 ]/", "[ x^2 ]/3"] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + @test_throws( + LP.UnexpectedToken, + LP._parse_quad_expression(state, cache, 1.0), + ) + end + return +end + +function test_parse_set_prefix() + cache = LP.Cache(LP.Model{Float64}()) + for (input, set) in [ + "1.0 <=" => MOI.GreaterThan(1.0), + "1.0 <" => MOI.GreaterThan(1.0), + "1.0 >=" => MOI.LessThan(1.0), + "1.0 >" => MOI.LessThan(1.0), + "1.0 ==" => MOI.EqualTo(1.0), + "1.0 =" => MOI.EqualTo(1.0), + # Theirs not to reason why, theirs but to do and + "1.0 =<" => MOI.GreaterThan(1.0), + "1.0 =>" => MOI.LessThan(1.0), + ] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + @test LP._parse_set_prefix(state, cache) == set + end + for input in ["-> 1"] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + @test_throws LP.UnexpectedToken LP._parse_set_prefix(state, cache) + end + return +end + +function test_parse_set_sufffix() + cache = LP.Cache(LP.Model{Float64}()) + for (input, set) in [ + "free" => nothing, + "Free" => nothing, + ">= 1.0" => MOI.GreaterThan(1.0), + "> 1.0" => MOI.GreaterThan(1.0), + "<= 1.0" => MOI.LessThan(1.0), + "< 1.0" => MOI.LessThan(1.0), + "== 1.0" => MOI.EqualTo(1.0), + "= 1.0" => MOI.EqualTo(1.0), + # Theirs not to reason why, theirs but to do and + "=< 1.0" => MOI.LessThan(1.0), + "=> 1.0" => MOI.GreaterThan(1.0), + ] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + @test LP._parse_set_suffix(state, cache) == set + end + for input in ["-> 1"] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + @test_throws LP.UnexpectedToken LP._parse_set_suffix(state, cache) + end + return +end + end # module TestLP.runtests() From ee1cb14c31c00c211fd8e662f10ee8f5f9e19450 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 11 Sep 2025 14:22:24 +1200 Subject: [PATCH 06/10] UPdate --- src/FileFormats/LP/read.jl | 29 +++++++++++++++++++++++------ test/FileFormats/LP/LP.jl | 29 +++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index 0a17a0e478..e2eff1e1dd 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -23,8 +23,11 @@ end Read `io` in the LP file format and store the result in `model`. This reader attempts to follow the CPLEX LP format, because others like the -lpsolve version are very...flexible...in how they accept input. Read more about -them here: http://lpsolve.sourceforge.net +lpsolve version are very...flexible...in how they accept input. + +Read more about the format here: + * http://lpsolve.sourceforge.net + * https://web.mit.edu/lpsolve/doc/CPLEX-format.htm """ function Base.read!(io::IO, model::Model{T}) where {T} if !MOI.is_empty(model) @@ -98,6 +101,7 @@ const _KEYWORDS = Dict( "such that" => :CONSTRAINTS, "st" => :CONSTRAINTS, "s.t." => :CONSTRAINTS, + "st." => :CONSTRAINTS, # BOUNDS "bounds" => :BOUNDS, "bound" => :BOUNDS, @@ -244,7 +248,16 @@ function Base.read(state::LexerState, ::Type{Token}, kind::_TokenKind) return _expect(token, kind) end -_is_idenfifier(c::Char) = !(isspace(c) || c in ('+', '-', '*', '^', ':')) +# We're a bit more relaxed than typical, allowing any letter or digit, not just +# ASCII. +function _is_identifier(c::Char) + return isletter(c) || isdigit(c) || c in "!\"#\$%&()/,.;?@_`'{}|~" +end + +function _is_starting_identifier(c::Char) + return isletter(c) || c in "!\"#\$%&(),;?@_`'{}|~" +end + _is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-') function Base.peek(state::LexerState, ::Type{Token}, n::Int = 1) @@ -276,9 +289,9 @@ function _peek_inner(state::LexerState) read(state, Char) end return Token(_TOKEN_NUMBER, String(take!(buf))) - elseif isletter(c) || c == '_' # Identifier / keyword + elseif _is_starting_identifier(c) # Identifier / keyword buf = IOBuffer() - while (c = peek(state, Char)) !== nothing && _is_idenfifier(c) + while (c = peek(state, Char)) !== nothing && _is_identifier(c) write(buf, c) read(state, Char) end @@ -382,7 +395,11 @@ function _parse_number(state::LexerState, cache::Cache{T})::T where {T} end end _expect(token, _TOKEN_NUMBER) - return parse(T, token.value) + ret = tryparse(T, token.value) + if ret === nothing + throw(UnexpectedToken(token)) + end + return ret end # QUAD_TERM := diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl index 7d4ce7707b..0f8c1a7ce8 100644 --- a/test/FileFormats/LP/LP.jl +++ b/test/FileFormats/LP/LP.jl @@ -1210,6 +1210,7 @@ function test_subject_to_name() "Such That" => false, "st" => false, "s.t." => false, + "st." => false, "subject that" => true, "subject\nto" => true, "s. t." => true, @@ -1232,6 +1233,33 @@ function test_subject_to_name() return end +function test_parse_variable() + cache = LP.Cache(LP.Model{Float64}()) + for input in [ + "x", + "X", + "e", + "abc!\"D", + "π", + "𝔼1π!~a", + "x!\"#\$%&()/,.;?@_`'{}|~", + "aAc2", + ] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + x = LP._parse_variable(state, cache) + @test cache.variable_name_to_index[input] == x + end + for input in ["2", "2x", ".x"] + io = IOBuffer(input) + seekstart(io) + state = LP.LexerState(io) + @test_throws LP.UnexpectedToken LP._parse_variable(state, cache) + end + return +end + function test_parse_number() cache = LP.Cache(LP.Model{Float64}()) for (input, result) in [ @@ -1338,6 +1366,7 @@ function test_parse_term() "- x" => -1.0, "- -x" => 1.0, "+ -x" => -1.0, + "2x" => 2.0, "2.0 x" => 2.0, "3.0 x" => 3.0, "2.0 * x" => 2.0, From b5702dff0a7b3303c4fb6c767330a6994bb2ff3f Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 11 Sep 2025 14:23:37 +1200 Subject: [PATCH 07/10] Update --- test/FileFormats/LP/LP.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl index 0f8c1a7ce8..8a31273449 100644 --- a/test/FileFormats/LP/LP.jl +++ b/test/FileFormats/LP/LP.jl @@ -1423,7 +1423,7 @@ function test_parse_set_prefix() state = LP.LexerState(io) @test LP._parse_set_prefix(state, cache) == set end - for input in ["-> 1"] + for input in ["1 ->"] io = IOBuffer(input) seekstart(io) state = LP.LexerState(io) From 2bf659258e98b3b9b53fe995b2b23cec3747d96c Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 11 Sep 2025 15:07:20 +1200 Subject: [PATCH 08/10] Update --- src/FileFormats/LP/read.jl | 20 ++++++++++---------- test/FileFormats/LP/LP.jl | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index e2eff1e1dd..2c1c809ad0 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -275,10 +275,10 @@ end function _peek_inner(state::LexerState) while (c = peek(state, Char)) !== nothing if c == '\n' - read(state, Char) - return Token(_TOKEN_NEWLINE, "\n") + _ = read(state, Char) + return Token(_TOKEN_NEWLINE, nothing) elseif isspace(c) # Whitespace - read(state, Char) + _ = read(state, Char) elseif c == '\\' # Comment: backslash until newline while (c = read(state, Char)) !== nothing && c != '\n' end @@ -286,14 +286,14 @@ function _peek_inner(state::LexerState) buf = IOBuffer() while (c = peek(state, Char)) !== nothing && _is_number(c) write(buf, c) - read(state, Char) + _ = read(state, Char) end return Token(_TOKEN_NUMBER, String(take!(buf))) elseif _is_starting_identifier(c) # Identifier / keyword buf = IOBuffer() while (c = peek(state, Char)) !== nothing && _is_identifier(c) write(buf, c) - read(state, Char) + _ = read(state, Char) end val = String(take!(buf)) l_val = lowercase(val) @@ -315,17 +315,17 @@ function _peek_inner(state::LexerState) end return Token(_TOKEN_IDENTIFIER, val) elseif (op = get(_OPERATORS, c, nothing)) !== nothing - read(state, Char) # Skip c + _ = read(state, Char) # Skip c if c == '-' && peek(state, Char) == '>' - read(state, Char) - return Token(_TOKEN_IMPLIES, "->") + _ = read(state, Char) + return Token(_TOKEN_IMPLIES, nothing) elseif c == '=' && peek(state, Char) in ('<', '>') c = read(state, Char) # Allow =< and => as <= and >= - return Token(_OPERATORS[c], string(c)) + return Token(_OPERATORS[c], nothing) elseif c in ('<', '>', '=') && peek(state, Char) == '=' _ = read(state, Char) # Allow <=, >=, and == end - return Token(op, string(c)) + return Token(op, nothing) else throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "$c"))) end diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl index 8a31273449..50a33a5cf8 100644 --- a/test/FileFormats/LP/LP.jl +++ b/test/FileFormats/LP/LP.jl @@ -1289,7 +1289,7 @@ function test_parse_number() state = LP.LexerState(io) @test LP._parse_number(state, cache) == result end - for input in ["x", "abc", "ten"] + for input in ["x", "abc", "ten", "1.1.1", "1eE1"] io = IOBuffer(input) seekstart(io) state = LP.LexerState(io) From 844f3d4273c4dcbe88f92c16be847b97df80b7cc Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 11 Sep 2025 16:43:48 +1200 Subject: [PATCH 09/10] Add a better error handler --- src/FileFormats/LP/read.jl | 212 ++++++++++++++++++++++++++++--------- test/FileFormats/LP/LP.jl | 11 +- 2 files changed, 173 insertions(+), 50 deletions(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index 2c1c809ad0..8a905d29cd 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -64,10 +64,23 @@ function Base.read!(io::IO, model::Model{T}) where {T} _parse_bound(state, cache) elseif keyword == :SOS _parse_constraint(state, cache) + elseif keyword == :END + _throw_unexpected_token( + state, + token, + "No file contents are allowed after `end`.", + ) else - throw(UnexpectedToken(token)) + _throw_unexpected_token( + state, + token, + "Parsing this section is not supported by the current reader.", + ) end end + # if keyword != :END + # TODO(odow): decide if we should throw an error here. + # end for x in cache.variable_with_default_bound MOI.add_constraint(model, x, MOI.GreaterThan(0.0)) end @@ -141,6 +154,27 @@ const _KEYWORDS = Dict( _TOKEN_NEWLINE, _TOKEN_UNKNOWN, ) + +const _KIND_TO_MSG = Dict{_TokenKind,String}( + _TOKEN_KEYWORD => "a keyword", + _TOKEN_IDENTIFIER => "a variable name", + _TOKEN_NUMBER => "a number", + _TOKEN_ADDITION => "the symbol `+`", + _TOKEN_SUBTRACTION => "the symbol `-`", + _TOKEN_MULTIPLICATION => "the symbol `*`", + _TOKEN_DIVISION => "the symbol `/`", + _TOKEN_EXPONENT => "the symbol `^`", + _TOKEN_OPEN_BRACKET => "the symbol `[`", + _TOKEN_CLOSE_BRACKET => "the symbol `]`", + _TOKEN_GREATER_THAN => "the symbol `>=`", + _TOKEN_LESS_THAN => "the symbol `<=`", + _TOKEN_EQUAL_TO => "the symbol `==`", + _TOKEN_COLON => "the symbol `:`", + _TOKEN_IMPLIES => "the symbol `->`", + _TOKEN_NEWLINE => "a new line", + _TOKEN_UNKNOWN => "some unknown symbol", +) + """ const _OPERATORS::Dict{Char,_TokenKind} @@ -175,28 +209,7 @@ unprocessed value. struct Token kind::_TokenKind value::Union{Nothing,String} -end - -""" - struct UnexpectedToken <: Exception - token::Token - end - -This error is thrown when we encounter an unexpected token when parsing the LP -file. No other information is available. - -TODO: we could improve this by storing line information or other context to help -the user diagnose the problem. -""" -struct UnexpectedToken <: Exception - token::Token -end - -function _expect(token::Token, kind::_TokenKind) - if token.kind != kind - throw(UnexpectedToken(token)) - end - return token + pos::Int end """ @@ -216,9 +229,53 @@ It stores: """ mutable struct LexerState io::IO + line::Int peek_char::Union{Nothing,Char} peek_tokens::Vector{Token} - LexerState(io::IO) = new(io, nothing, Token[]) + LexerState(io::IO) = new(io, 1, nothing, Token[]) +end + +""" + struct UnexpectedToken <: Exception + token::Token + end + +This error is thrown when we encounter an unexpected token when parsing the LP +file. No other information is available. +""" +struct UnexpectedToken <: Exception + token::Token + line::Int + msg::String +end + +function _throw_unexpected_token(state::LexerState, token::Token, msg::String) + offset = min(40, token.pos) + seek(state.io, token.pos - offset) + line = String(read(state.io, 2 * offset)) + i = something(findprev('\n', line, offset-1), 0) + j = something(findnext('\n', line, offset), length(line) + 1) + help = string(line[i+1:j-1], "\n", " "^(offset - i + - 1), "^\n", msg) + return throw(UnexpectedToken(token, state.line, help)) +end + +function Base.showerror(io::IO, err::UnexpectedToken) + return print( + io, + "Error parsing LP file. Got an unexpected token on line $(err.line):\n", + err.msg, + ) +end + +function _expect(state::LexerState, token::Token, kind::_TokenKind) + if token.kind != kind + _throw_unexpected_token( + state, + token, + string("We expected this token to be ", _KIND_TO_MSG[kind]), + ) + end + return token end function Base.peek(state::LexerState, ::Type{Char}) @@ -236,8 +293,12 @@ end function Base.read(state::LexerState, ::Type{Token}) token = peek(state, Token, 1) - if isempty(state.peek_tokens) - throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "EOF"))) + if isempty(state.peek_tokens) + _throw_unexpected_token( + state, + Token(_TOKEN_UNKNOWN, "EOF", position(state.io)), + "Unexpected end to the file. We weren't finished yet.", + ) end popfirst!(state.peek_tokens) return token @@ -245,7 +306,7 @@ end function Base.read(state::LexerState, ::Type{Token}, kind::_TokenKind) token = read(state, Token) - return _expect(token, kind) + return _expect(state, token, kind) end # We're a bit more relaxed than typical, allowing any letter or digit, not just @@ -274,9 +335,11 @@ end function _peek_inner(state::LexerState) while (c = peek(state, Char)) !== nothing + pos = position(state.io) if c == '\n' + state.line += 1 _ = read(state, Char) - return Token(_TOKEN_NEWLINE, nothing) + return Token(_TOKEN_NEWLINE, nothing, pos) elseif isspace(c) # Whitespace _ = read(state, Char) elseif c == '\\' # Comment: backslash until newline @@ -288,7 +351,7 @@ function _peek_inner(state::LexerState) write(buf, c) _ = read(state, Char) end - return Token(_TOKEN_NUMBER, String(take!(buf))) + return Token(_TOKEN_NUMBER, String(take!(buf)), pos) elseif _is_starting_identifier(c) # Identifier / keyword buf = IOBuffer() while (c = peek(state, Char)) !== nothing && _is_identifier(c) @@ -301,33 +364,37 @@ function _peek_inner(state::LexerState) t = peek(state, Token) if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "to" _ = read(state, Token) # Skip "to" - return Token(_TOKEN_KEYWORD, "CONSTRAINTS") + return Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos) end elseif l_val == "such" t = peek(state, Token) if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "that" _ = read(state, Token) # Skip "such" - return Token(_TOKEN_KEYWORD, "CONSTRAINTS") + return Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos) end end if (kw = get(_KEYWORDS, l_val, nothing)) !== nothing - return Token(_TOKEN_KEYWORD, string(kw)) + return Token(_TOKEN_KEYWORD, string(kw), pos) end - return Token(_TOKEN_IDENTIFIER, val) + return Token(_TOKEN_IDENTIFIER, val, pos) elseif (op = get(_OPERATORS, c, nothing)) !== nothing _ = read(state, Char) # Skip c if c == '-' && peek(state, Char) == '>' _ = read(state, Char) - return Token(_TOKEN_IMPLIES, nothing) + return Token(_TOKEN_IMPLIES, nothing, pos) elseif c == '=' && peek(state, Char) in ('<', '>') c = read(state, Char) # Allow =< and => as <= and >= - return Token(_OPERATORS[c], nothing) + return Token(_OPERATORS[c], nothing, pos) elseif c in ('<', '>', '=') && peek(state, Char) == '=' _ = read(state, Char) # Allow <=, >=, and == end - return Token(op, nothing) + return Token(op, nothing, pos) else - throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "$c"))) + _throw_unexpected_token( + state, + Token(_TOKEN_UNKNOWN, "$c", pos), + "This character is not supported an LP file.", + ) end end return @@ -391,13 +458,21 @@ function _parse_number(state::LexerState, cache::Cache{T})::T where {T} if v == "inf" || v == "infinity" return typemax(T) else - throw(UnexpectedToken(token)) + _throw_unexpected_token( + state, + token, + "We expected this to be a number.", + ) end end - _expect(token, _TOKEN_NUMBER) + _expect(state, token, _TOKEN_NUMBER) ret = tryparse(T, token.value) if ret === nothing - throw(UnexpectedToken(token)) + _throw_unexpected_token( + state, + token, + "We expected this to be a number.", + ) end return ret end @@ -435,7 +510,7 @@ function _parse_quad_term( _skip_newlines(state) n = read(state, Token, _TOKEN_NUMBER) if n.value != "2" - throw(UnexpectedToken(n)) + _throw_unexpected_token(state, n, "Only `^ 2` is supported.") end return MOI.ScalarQuadraticTerm(T(2) * coef, x1, x1) end @@ -471,7 +546,11 @@ function _parse_quad_expression( _ = read(state, Token) break else - return throw(UnexpectedToken(p)) + _throw_unexpected_token( + state, + p, + "We expected this to be a ] to end the quadratic expresssion.", + ) end end _skip_newlines(state) @@ -480,7 +559,11 @@ function _parse_quad_expression( # Must be /2 n = read(state, Token, _TOKEN_NUMBER) if n.value != "2" - throw(UnexpectedToken(n)) + _throw_unexpected_token( + state, + n, + "The only supported value here is `] / 2`.", + ) end for (i, term) in enumerate(f.quadratic_terms) f.quadratic_terms[i] = MOI.ScalarQuadraticTerm( @@ -530,7 +613,9 @@ function _parse_term( _ = read(state, Token, _TOKEN_MULTIPLICATION) x = _parse_variable(state, cache) return MOI.ScalarAffineTerm(coef, x) - else + elseif _next_token_is(state, _TOKEN_NEWLINE) || + _next_token_is(state, _TOKEN_ADDITION) || + _next_token_is(state, _TOKEN_SUBTRACTION) # NUMBER return coef end @@ -538,7 +623,12 @@ function _parse_term( # QUADRATIC_EXPRESSION return _parse_quad_expression(state, cache, prefix) end - return throw(UnexpectedToken(peek(state, Token))) + token = peek(state, Token) + return _throw_unexpected_token( + state, + token, + "Got $(_KIND_TO_MSG[token.kind]), But we expected this to be a new term in the expression.", + ) end function _add_to_expression!(f::MOI.ScalarQuadraticFunction{T}, x::T) where {T} @@ -611,7 +701,11 @@ function _parse_set_suffix(state, cache) rhs = _parse_number(state, cache) return MOI.EqualTo(rhs) else - throw(UnexpectedToken(p)) + _throw_unexpected_token( + state, + p, + "We expected this to be an inequality like `>=`, `<=` ,or `==`.", + ) end end @@ -633,7 +727,11 @@ function _parse_set_prefix(state, cache) elseif p.kind == _TOKEN_EQUAL_TO return MOI.EqualTo(lhs) else - throw(UnexpectedToken(p)) + _throw_unexpected_token( + state, + p, + "We expected this to be an inequality like `>=`, `<=` ,or `==`.", + ) end end @@ -731,12 +829,24 @@ end function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T} t = read(state, Token, _TOKEN_IDENTIFIER) # Si if !(t.value == "S1" || t.value == "S2") - throw(UnexpectedToken(t)) + _throw_unexpected_token( + state, + t, + "This must be either `S1` for SOS-I or `S2` for SOS-II.", + ) end _ = read(state, Token, _TOKEN_COLON) _ = read(state, Token, _TOKEN_COLON) f, w = MOI.VectorOfVariables(MOI.VariableIndex[]), T[] while true + if _next_token_is(state, _TOKEN_NEWLINE) + t = peek(state, Token) + _throw_unexpected_token( + state, + t, + "SOS constraints cannot be spread across lines.", + ) + end push!(f.variables, _parse_variable(state, cache)) _ = read(state, Token, _TOKEN_COLON) push!(w, _parse_number(state, cache)) @@ -773,7 +883,11 @@ function _parse_indicator_constraint( elseif t.value == "1" MOI.ACTIVATE_ON_ONE else - throw(UnexpectedToken(t)) + _throw_unexpected_token( + state, + t, + "This must be either `= 0` or `= 1`.", + ) end _ = read(state, Token, _TOKEN_IMPLIES) f = _parse_expression(state, cache) diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl index 50a33a5cf8..85f6523449 100644 --- a/test/FileFormats/LP/LP.jl +++ b/test/FileFormats/LP/LP.jl @@ -1091,7 +1091,16 @@ function test_invalid_token_in_sos() """, ) seekstart(io) - @test_throws LP.UnexpectedToken read!(io, model) + contents = try + read!(io, model) + catch err + sprint(showerror, err) + end + @test contents == """ + Error parsing LP file. Got an unexpected token on line 5: + c11: S1:: x 1.0 y 2.0 + ^ + We expected this token to be the symbol `:`""" return end From b07898c33e14878a21323685e1c7d2e681a06ac0 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Thu, 11 Sep 2025 16:46:22 +1200 Subject: [PATCH 10/10] Update --- src/FileFormats/LP/read.jl | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl index 8a905d29cd..ceeed12e4a 100644 --- a/src/FileFormats/LP/read.jl +++ b/src/FileFormats/LP/read.jl @@ -255,7 +255,7 @@ function _throw_unexpected_token(state::LexerState, token::Token, msg::String) line = String(read(state.io, 2 * offset)) i = something(findprev('\n', line, offset-1), 0) j = something(findnext('\n', line, offset), length(line) + 1) - help = string(line[i+1:j-1], "\n", " "^(offset - i + - 1), "^\n", msg) + help = string(line[(i+1):(j-1)], "\n", " "^(offset - i + - 1), "^\n", msg) return throw(UnexpectedToken(token, state.line, help)) end @@ -293,7 +293,7 @@ end function Base.read(state::LexerState, ::Type{Token}) token = peek(state, Token, 1) - if isempty(state.peek_tokens) + if isempty(state.peek_tokens) _throw_unexpected_token( state, Token(_TOKEN_UNKNOWN, "EOF", position(state.io)), @@ -627,7 +627,7 @@ function _parse_term( return _throw_unexpected_token( state, token, - "Got $(_KIND_TO_MSG[token.kind]), But we expected this to be a new term in the expression.", + "Got $(_KIND_TO_MSG[token.kind]), but we expected this to be a new term in the expression.", ) end @@ -883,11 +883,7 @@ function _parse_indicator_constraint( elseif t.value == "1" MOI.ACTIVATE_ON_ONE else - _throw_unexpected_token( - state, - t, - "This must be either `= 0` or `= 1`.", - ) + _throw_unexpected_token(state, t, "This must be either `= 0` or `= 1`.") end _ = read(state, Token, _TOKEN_IMPLIES) f = _parse_expression(state, cache)