From 4c35875025dedcd6c2e4060239e1dd60de03b3f3 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Wed, 10 Sep 2025 20:02:25 +1200
Subject: [PATCH 01/10] [FileFormats.LP] write a proper recursive descent
 parser

---
 src/FileFormats/LP/LP.jl                      | 653 +---------------
 src/FileFormats/LP/read.jl                    | 734 ++++++++++++++++++
 test/FileFormats/LP/LP.jl                     |  91 ++-
 .../models/invalid_affine_term_constraint.lp  |  13 -
 .../models/invalid_affine_term_objective.lp   |  13 -
 test/FileFormats/LP/models/invalid_bound.lp   |  13 -
 6 files changed, 779 insertions(+), 738 deletions(-)
 create mode 100644 src/FileFormats/LP/read.jl
 delete mode 100644 test/FileFormats/LP/models/invalid_affine_term_constraint.lp
 delete mode 100644 test/FileFormats/LP/models/invalid_affine_term_objective.lp
 delete mode 100644 test/FileFormats/LP/models/invalid_bound.lp

diff --git a/src/FileFormats/LP/LP.jl b/src/FileFormats/LP/LP.jl
index bfc53cdc06..6d93c815dd 100644
--- a/src/FileFormats/LP/LP.jl
+++ b/src/FileFormats/LP/LP.jl
@@ -512,657 +512,6 @@ function Base.write(io::IO, model::Model{T}) where {T}
     return
 end
 
-# ==============================================================================
-#
-#   `Base.read!`
-#
-# ==============================================================================
-
-const _KW_OBJECTIVE = Val{:objective}()
-const _KW_CONSTRAINTS = Val{:constraints}()
-const _KW_BOUNDS = Val{:bounds}()
-const _KW_INTEGER = Val{:integer}()
-const _KW_BINARY = Val{:binary}()
-const _KW_SOS = Val{:sos}()
-const _KW_END = Val{:end}()
-
-const _KEYWORDS = Dict(
-    # _KW_OBJECTIVE
-    "max" => _KW_OBJECTIVE,
-    "maximize" => _KW_OBJECTIVE,
-    "maximise" => _KW_OBJECTIVE,
-    "maximum" => _KW_OBJECTIVE,
-    "min" => _KW_OBJECTIVE,
-    "minimize" => _KW_OBJECTIVE,
-    "minimise" => _KW_OBJECTIVE,
-    "minimum" => _KW_OBJECTIVE,
-    # _KW_CONSTRAINTS
-    "subject to" => _KW_CONSTRAINTS,
-    "such that" => _KW_CONSTRAINTS,
-    "st" => _KW_CONSTRAINTS,
-    "s.t." => _KW_CONSTRAINTS,
-    # _KW_BOUNDS
-    "bounds" => _KW_BOUNDS,
-    "bound" => _KW_BOUNDS,
-    # _KW_INTEGER
-    "gen" => _KW_INTEGER,
-    "general" => _KW_INTEGER,
-    "generals" => _KW_INTEGER,
-    "integer" => _KW_INTEGER,
-    "integers" => _KW_INTEGER,
-    # _KW_BINARY
-    "bin" => _KW_BINARY,
-    "binary" => _KW_BINARY,
-    "binaries" => _KW_BINARY,
-    # _KW_SOS
-    "sos" => _KW_SOS,
-    # _KW_END
-    "end" => _KW_END,
-)
-
-mutable struct _ReadCache{T}
-    objective::MOI.ScalarAffineFunction{T}
-    quad_obj_terms::Vector{MOI.ScalarQuadraticTerm{T}}
-    constraint_function::MOI.ScalarAffineFunction{T}
-    quad_terms::Vector{MOI.ScalarQuadraticTerm{T}}
-    constraint_name::String
-    num_constraints::Int
-    name_to_variable::Dict{String,MOI.VariableIndex}
-    has_default_bound::Set{MOI.VariableIndex}
-    indicator::Union{Nothing,Pair{MOI.VariableIndex,MOI.ActivationCondition}}
-    function _ReadCache{T}() where {T}
-        return new(
-            zero(MOI.ScalarAffineFunction{T}),
-            MOI.ScalarQuadraticTerm{T}[],
-            zero(MOI.ScalarAffineFunction{T}),
-            MOI.ScalarQuadraticTerm{T}[],
-            "",
-            0,
-            Dict{String,MOI.VariableIndex}(),
-            Set{MOI.VariableIndex}(),
-            nothing,
-        )
-    end
-end
-
-function _get_variable_from_name(
-    model::Model{T},
-    cache::_ReadCache{T},
-    name::String,
-) where {T}
-    current_variable = get(cache.name_to_variable, name, nothing)
-    if current_variable !== nothing
-        return current_variable
-    end
-    options = get_options(model)
-    if length(name) > options.maximum_length
-        error("Name exceeds maximum length: $name")
-    elseif match(r"^([\.0-9])", name) !== nothing
-        error("Name starts with invalid character: $name")
-    elseif match(_NAME_REG, name) !== nothing
-        error("Name contains with invalid character: $name")
-    end
-    x = MOI.add_variable(model)
-    MOI.set(model, MOI.VariableName(), x, name)
-    # By default, all variables have a lower bound of 0 unless otherwise
-    # specified.
-    MOI.add_constraint(model, x, MOI.GreaterThan(zero(T)))
-    push!(cache.has_default_bound, x)
-    cache.name_to_variable[name] = x
-    return x
-end
-
-_tokenize(line::AbstractString) = String.(split(line, " "; keepempty = false))
-
-@enum(
-    _TokenType,
-    _TOKEN_VARIABLE,
-    _TOKEN_COEFFICIENT,
-    _TOKEN_SIGN,
-    _TOKEN_QUADRATIC_OPEN,
-    _TOKEN_QUADRATIC_CLOSE,
-    _TOKEN_QUADRATIC_DIAG,
-    _TOKEN_QUADRATIC_OFF_DIAG,
-)
-
-function _parse_token(::Type{T}, token::String) where {T}
-    if token == "+"
-        return _TOKEN_SIGN, one(T)
-    elseif token == "-"
-        return _TOKEN_SIGN, -one(T)
-    elseif startswith(token, "[")
-        return _TOKEN_QUADRATIC_OPEN, zero(T)
-    elseif startswith(token, "]")
-        return _TOKEN_QUADRATIC_CLOSE, zero(T)
-    elseif token == "^"
-        return _TOKEN_QUADRATIC_DIAG, zero(T)
-    elseif token == "*"
-        return _TOKEN_QUADRATIC_OFF_DIAG, zero(T)
-    end
-    coef = tryparse(T, token)
-    if coef === nothing
-        return _TOKEN_VARIABLE, token
-    else
-        return _TOKEN_COEFFICIENT, coef
-    end
-end
-
-function _get_term(token_types, token_values::Vector{T}, offset) where {T}
-    coef = one(T)
-    if token_types[offset] == _TOKEN_SIGN
-        coef = token_values[offset]
-        offset += 1
-    end
-    if token_types[offset] == _TOKEN_COEFFICIENT
-        coef *= token_values[offset]
-        offset += 1
-    elseif token_types[offset] == _TOKEN_SIGN
-        error("Invalid line")
-    end
-    if offset > length(token_types) || token_types[offset] == _TOKEN_SIGN
-        return coef, offset  # It's a standalone constant
-    end
-    if token_types[offset] == _TOKEN_QUADRATIC_OPEN
-        return _get_term(token_types, token_values, offset + 1)
-    end
-    @assert token_types[offset] == _TOKEN_VARIABLE
-    x = MOI.VariableIndex(Int64(token_values[offset]))
-    offset += 1
-    if offset > length(token_types) ||
-       token_types[offset] in (_TOKEN_SIGN, _TOKEN_COEFFICIENT)
-        return MOI.ScalarAffineTerm(coef, x), offset
-    end
-    term = if token_types[offset] == _TOKEN_QUADRATIC_DIAG
-        MOI.ScalarQuadraticTerm(coef, x, x)
-    else
-        @assert token_types[offset] == _TOKEN_QUADRATIC_OFF_DIAG
-        y = MOI.VariableIndex(Int64(token_values[offset+1]))
-        MOI.ScalarQuadraticTerm(coef, x, y)
-    end
-    if get(token_types, offset + 2, nothing) == _TOKEN_QUADRATIC_CLOSE
-        return term, offset + 3
-    else
-        return term, offset + 2
-    end
-end
-
-_half(x) = x / 2
-_half(x::Integer) = div(x, 2)
-
-function _parse_function(
-    f::MOI.ScalarAffineFunction{T},
-    model::Model{T},
-    cache::_ReadCache{T},
-    tokens::Vector{String},
-) where {T}
-    N = length(tokens)
-    token_types = Vector{_TokenType}(undef, N)
-    token_values = Vector{T}(undef, N)
-    for i in 1:length(tokens)
-        token_type, token = _parse_token(T, tokens[i])
-        token_types[i] = token_type
-        if token_type in (_TOKEN_SIGN, _TOKEN_COEFFICIENT)
-            token_values[i] = token::T
-        elseif token_type in (_TOKEN_QUADRATIC_OPEN, _TOKEN_QUADRATIC_CLOSE)
-            token_values[i] = zero(T)
-        elseif token_type in (_TOKEN_QUADRATIC_DIAG, _TOKEN_QUADRATIC_OFF_DIAG)
-            token_values[i] = zero(T)
-        else
-            @assert token_type == _TOKEN_VARIABLE
-            x = _get_variable_from_name(model, cache, token::String)
-            # A cheat for type-stability. Store `T` of the variable index
-            token_values[i] = T(x.value)
-        end
-    end
-    offset = 1
-    while offset <= length(tokens)
-        term, offset = _get_term(token_types, token_values, offset)
-        if term isa MOI.ScalarAffineTerm{T}
-            push!(f.terms, term::MOI.ScalarAffineTerm{T})
-        elseif term isa MOI.ScalarQuadraticTerm{T}
-            push!(cache.quad_terms, term::MOI.ScalarQuadraticTerm{T})
-            if tokens[offset-1] in ("]", "]/2")
-                is_half = tokens[offset-1] == "]/2"
-                for (i, term) in enumerate(cache.quad_terms)
-                    x, y = term.variable_1, term.variable_2
-                    coef = (x == y ? 2 : 1) * term.coefficient
-                    if is_half
-                        coef = _half(coef)
-                    end
-                    cache.quad_terms[i] = MOI.ScalarQuadraticTerm(coef, x, y)
-                end
-            end
-        else
-            f.constant += term::T
-        end
-    end
-    return
-end
-
-# _KW_OBJECTIVE
-
-_set_objective_sense(::Any, ::Model, ::String) = nothing
-
-function _set_objective_sense(
-    ::typeof(_KW_OBJECTIVE),
-    model::Model,
-    sense::String,
-)
-    if sense in ("max", "maximize", "maximise", "maximum")
-        MOI.set(model, MOI.ObjectiveSense(), MOI.MAX_SENSE)
-    else
-        @assert sense in ("min", "minimize", "minimise", "minimum")
-        MOI.set(model, MOI.ObjectiveSense(), MOI.MIN_SENSE)
-    end
-    return
-end
-
-function _parse_section(
-    ::typeof(_KW_OBJECTIVE),
-    model::Model,
-    cache::_ReadCache,
-    line::AbstractString,
-)
-    if occursin(":", line)  # Strip name of the objective
-        m = match(r"(.*?)\:(.*)", line)::RegexMatch
-        line = String(m[2]::AbstractString)
-    end
-    if occursin("^", line)
-        line = replace(line, "^" => " ^ ")
-    end
-    if occursin(r"\][\s/][\s/]+2", line)
-        line = replace(line, r"\][\s/][\s/]+2" => "]/2")
-    end
-    tokens = _tokenize(line)
-    if length(tokens) == 0
-        # Can happen if the name of the objective is on one line and the
-        # expression is on the next.
-        return
-    end
-    _parse_function(cache.objective, model, cache, tokens)
-    append!(cache.quad_obj_terms, cache.quad_terms)
-    empty!(cache.quad_terms)
-    return
-end
-
-# _KW_CONSTRAINTS
-
-function _parse_section(
-    ::typeof(_KW_CONSTRAINTS),
-    model::Model{T},
-    cache::_ReadCache{T},
-    line::AbstractString,
-) where {T}
-    # SOS constraints should be in their own "SOS" section, but we can also
-    # recognize them if they're mixed into the constraint section.
-    if match(r" S([1-2])\w*:: ", line) !== nothing
-        _parse_section(_KW_SOS, model, cache, line)
-        return
-    end
-    if isempty(cache.constraint_name)
-        if occursin(":", line)
-            m = match(r"(.*?)\:(.*)", line)::RegexMatch
-            cache.constraint_name = String(m[1]::AbstractString)
-            line = String(m[2]::AbstractString)
-        else
-            # Give it a temporary name for now
-            cache.constraint_name = "R$(cache.num_constraints)"
-        end
-    end
-    if cache.indicator === nothing
-        if (m = match(r"\s*(.+?)\s*=\s*(0|1)\s*->(.+)", line)) !== nothing
-            z = _get_variable_from_name(model, cache, String(m[1]))
-            cond = m[2] == "0" ? MOI.ACTIVATE_ON_ZERO : MOI.ACTIVATE_ON_ONE
-            cache.indicator = z => cond
-            line = String(m[3])
-        end
-    end
-    if occursin("^", line)
-        # Simplify parsing of constraints with ^2 terms by turning them into
-        # explicit " ^ 2" terms. This avoids ambiguity when parsing names.
-        line = replace(line, "^" => " ^ ")
-    end
-    if occursin(r"\][\s/][\s/]+2", line)
-        # Simplify parsing of ]/2 end blocks, which may contain whitespace.
-        line = replace(line, r"\][\s/][\s/]+2" => "]/2")
-    end
-    tokens = _tokenize(line)
-    if length(tokens) == 0
-        # Can happen if the name is on one line and the constraint on the next.
-        return
-    end
-    # This checks if the constaint is finishing on this line.
-    constraint_set = nothing
-    if length(tokens) >= 2 && tokens[end-1] in ("<", "<=", ">", ">=", "=", "==")
-        rhs = parse(T, pop!(tokens))
-        sym = pop!(tokens)
-        constraint_set = if sym in ("<", "<=")
-            MOI.LessThan(rhs)
-        elseif sym in (">", ">=")
-            MOI.GreaterThan(rhs)
-        else
-            @assert sym in ("=", "==")
-            MOI.EqualTo(rhs)
-        end
-    end
-    _parse_function(cache.constraint_function, model, cache, tokens)
-    if constraint_set !== nothing
-        f = if isempty(cache.quad_terms)
-            cache.constraint_function
-        else
-            MOI.ScalarQuadraticFunction(
-                cache.quad_terms,
-                cache.constraint_function.terms,
-                cache.constraint_function.constant,
-            )
-        end
-        if cache.indicator !== nothing
-            f = MOI.Utilities.operate(vcat, T, cache.indicator[1], f)
-            constraint_set = MOI.Indicator{cache.indicator[2]}(constraint_set)
-        end
-        c = MOI.add_constraint(model, f, constraint_set)
-        MOI.set(model, MOI.ConstraintName(), c, cache.constraint_name)
-        cache.num_constraints += 1
-        empty!(cache.constraint_function.terms)
-        empty!(cache.quad_terms)
-        cache.constraint_function.constant = zero(T)
-        cache.constraint_name = ""
-        cache.indicator = nothing
-    end
-    return
-end
-
-# _KW_BOUNDS
-
-function _parse_float(::Type{T}, token::String) where {T}
-    coef = lowercase(token)
-    if coef in ("-inf", "-infinity")
-        return typemin(T)
-    elseif coef in ("+inf", "+infinity")
-        return typemax(T)
-    end
-    return tryparse(T, coef)
-end
-
-# Yes, the last elements here are really accepted by CPLEX...
-_is_less_than(token) = token in ("<=", "<", "=<")
-_is_greater_than(token) = token in (">=", ">", "=>")
-_is_equal_to(token) = token in ("=", "==")
-
-function _parse_section(
-    ::typeof(_KW_BOUNDS),
-    model::Model{T},
-    cache::_ReadCache{T},
-    line::AbstractString,
-) where {T}
-    tokens = _tokenize(line)
-    if length(tokens) == 2 && lowercase(tokens[2]) == "free"
-        x = _get_variable_from_name(model, cache, tokens[1])
-        _delete_default_lower_bound_if_present(model, cache, x)
-        return
-    end
-    lb, ub, name = nothing, nothing, ""
-    if length(tokens) == 5
-        name = tokens[3]
-        if _is_less_than(tokens[2]) && _is_less_than(tokens[4])
-            lb = _parse_float(T, tokens[1])::T
-            ub = _parse_float(T, tokens[5])::T
-        elseif _is_greater_than(tokens[2]) && _is_greater_than(tokens[4])
-            lb = _parse_float(T, tokens[5])::T
-            ub = _parse_float(T, tokens[1])::T
-        else
-            error("Unable to parse bound due to invalid inequalities: $(line)")
-        end
-    elseif length(tokens) == 3
-        lhs, rhs = _parse_float(T, tokens[1]), _parse_float(T, tokens[3])
-        if lhs === nothing  # name [comparison] bound
-            @assert rhs !== nothing
-            name = tokens[1]
-            if _is_less_than(tokens[2])
-                # name <= bound
-                ub = rhs
-            elseif _is_greater_than(tokens[2])
-                # name >= bound
-                lb = rhs
-            elseif _is_equal_to(tokens[2])
-                lb = ub = rhs
-            else
-                error(
-                    "Unable to parse bound due to invalid inequalities: $(line)",
-                )
-            end
-        else # bound [comparison] name
-            @assert rhs === nothing
-            name = tokens[3]
-            if _is_less_than(tokens[2])
-                # bound <= name
-                lb = lhs
-            elseif _is_greater_than(tokens[2])
-                # bound >= name
-                ub = lhs
-            elseif _is_equal_to(tokens[2])
-                lb = ub = lhs
-            else
-                error(
-                    "Unable to parse bound due to invalid inequalities: $(line)",
-                )
-            end
-        end
-    else
-        error("Unable to parse bound: $(line)")
-    end
-    x = _get_variable_from_name(model, cache, name)
-    if lb !== nothing && ub !== nothing
-        if lb == ub
-            _delete_default_lower_bound_if_present(model, cache, x)
-            MOI.add_constraint(model, x, MOI.EqualTo(lb))
-            return
-        elseif typemin(T) < lb < ub < typemax(T)
-            _delete_default_lower_bound_if_present(model, cache, x)
-            # Do not add MOI.Interval constraints because we want to follow
-            # JuMP's convention of adding separate lower and upper bounds.
-            MOI.add_constraint(model, x, MOI.GreaterThan(lb))
-            MOI.add_constraint(model, x, MOI.LessThan(ub))
-            return
-        elseif lb == typemin(T)
-            _delete_default_lower_bound_if_present(model, cache, x)
-            if ub == typemax(T)
-                return  # Explicitly free variable
-            end
-        end
-    end
-    if lb !== nothing && typemin(T) < lb
-        _delete_default_lower_bound_if_present(model, cache, x)
-        MOI.add_constraint(model, x, MOI.GreaterThan(lb))
-    end
-    if ub !== nothing && ub < typemax(T)
-        if ub < 0
-            # We only need to delete the default lower bound if the upper bound
-            # is less than 0.
-            _delete_default_lower_bound_if_present(model, cache, x)
-        end
-        MOI.add_constraint(model, x, MOI.LessThan(ub))
-    end
-    return
-end
-
-function _delete_default_lower_bound_if_present(
-    model::Model{T},
-    cache,
-    x,
-) where {T}
-    if !(x in cache.has_default_bound)
-        return
-    end
-    c = MOI.ConstraintIndex{MOI.VariableIndex,MOI.GreaterThan{T}}(x.value)
-    MOI.delete(model, c)
-    delete!(cache.has_default_bound, x)
-    return
-end
-
-# _KW_INTEGER
-
-function _parse_section(::typeof(_KW_INTEGER), model, cache, line)
-    for token in _tokenize(line)
-        x = _get_variable_from_name(model, cache, token)
-        MOI.add_constraint(model, x, MOI.Integer())
-    end
-    return
-end
-
-# _KW_BINARY
-
-function _parse_section(::typeof(_KW_BINARY), model, cache, line)
-    for token in _tokenize(line)
-        x = _get_variable_from_name(model, cache, token)
-        MOI.add_constraint(model, x, MOI.ZeroOne())
-    end
-    return
-end
-
-# _KW_SOS
-
-function _parse_section(
-    ::typeof(_KW_SOS),
-    model::Model{T},
-    cache::_ReadCache{T},
-    line::AbstractString,
-) where {T}
-    # SOS constraints can have all manner of whitespace issues with them.
-    # Normalize them here before attempting to do anything else.
-    line = replace(line, r"\s+:\s+" => ":")
-    line = replace(line, r"\s+::" => "::")
-    tokens = _tokenize(line)
-    if length(tokens) < 3
-        error("Malformed SOS constraint: $(line)")
-    end
-    name = String(split(tokens[1], ":")[1])
-    if tokens[2] == "S1::"
-        order = 1
-    elseif tokens[2] == "S2::"
-        order = 2
-    else
-        error("SOS of type $(tokens[2]) not recognised")
-    end
-    variables, weights = MOI.VariableIndex[], T[]
-    for token in tokens[3:end]
-        items = String.(split(token, ":"))
-        if length(items) != 2
-            error("Invalid token in SOS constraint: $(token)")
-        end
-        push!(variables, _get_variable_from_name(model, cache, items[1]))
-        push!(weights, parse(T, items[2]))
-    end
-    c_ref = if tokens[2] == "S1::"
-        MOI.add_constraint(model, variables, MOI.SOS1(weights))
-    else
-        @assert tokens[2] == "S2::"
-        MOI.add_constraint(model, variables, MOI.SOS2(weights))
-    end
-    MOI.set(model, MOI.ConstraintName(), c_ref, name)
-    return
-end
-
-# _KW_END
-
-function _parse_section(
-    ::typeof(_KW_END),
-    ::Model,
-    ::_ReadCache,
-    line::AbstractString,
-)
-    return error("Corrupted LP File. You have the lne $(line) after an end.")
-end
-
-function _strip_comment(line::String)
-    if occursin("\\", line)
-        m = match(r"(.*?)\\(.*)", line)::RegexMatch
-        return strip(String(m[1]::AbstractString))
-    else
-        return strip(line)
-    end
-end
-
-function _parse_section(
-    ::Val{:header},
-    ::Model,
-    ::_ReadCache,
-    line::AbstractString,
-)
-    return error("Unable to read LP file: unexpected line: $(line)")
-end
-
-"""
-    Base.read!(io::IO, model::FileFormats.LP.Model)
-
-Read `io` in the LP file format and store the result in `model`.
-
-This reader attempts to follow the CPLEX LP format, because others like the
-lpsolve version are very...flexible...in how they accept input. Read more about
-them here: http://lpsolve.sourceforge.net
-"""
-function Base.read!(io::IO, model::Model{T}) where {T}
-    if !MOI.is_empty(model)
-        error("Cannot read in file because model is not empty.")
-    end
-    cache = _ReadCache{T}()
-    section = Val{:header}()
-    peeked_line = ""
-    while peeked_line !== nothing
-        line, peeked_line = _readline(io, peeked_line)
-        lower_line = lowercase(line)
-        if haskey(_KEYWORDS, lower_line)
-            section = _KEYWORDS[lower_line]
-            _set_objective_sense(section, model, lower_line)
-            continue
-        end
-        while _line_continues(section, peeked_line)
-            line, peeked_line = _readline(io, string(line, ' ', peeked_line))
-        end
-        _parse_section(section, model, cache, line)
-    end
-    obj = if isempty(cache.quad_obj_terms)
-        cache.objective
-    else
-        MOI.ScalarQuadraticFunction(
-            cache.quad_obj_terms,
-            cache.objective.terms,
-            cache.objective.constant,
-        )
-    end
-    MOI.set(model, MOI.ObjectiveFunction{typeof(obj)}(), obj)
-    return
-end
-
-function _line_continues(
-    ::Union{typeof(_KW_OBJECTIVE),typeof(_KW_CONSTRAINTS)},
-    peeked_line::AbstractString,
-)
-    return any(Base.Fix1(startswith, peeked_line), ('+', '-'))
-end
-
-_line_continues(::Any, ::Any) = false
-
-function _readline(io::IO, line::AbstractString)
-    if eof(io)
-        return line, nothing
-    end
-    peeked_line = _strip_comment(string(readline(io)))
-    if isempty(line)
-        # If the line is empty, go to the next
-        return _readline(io, peeked_line)
-    elseif isempty(peeked_line)
-        # If the peeked line is empty, get another
-        return _readline(io, line)
-    elseif any(Base.Fix1(endswith, line), ('+', '-', '[', '='))
-        # If the line ends with a continuation character, read in the next line.
-        return _readline(io, string(line, " ", peeked_line))
-    elseif any(Base.Fix1(startswith, peeked_line), (']', '/'))
-        # Always read in the next line if it starts with ] or /, which are used
-        # in quadratic functions.
-        return _readline(io, string(line, " ", peeked_line))
-    end
-    return line, peeked_line
-end
+include("read.jl")
 
 end
diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
new file mode 100644
index 0000000000..70b96af587
--- /dev/null
+++ b/src/FileFormats/LP/read.jl
@@ -0,0 +1,734 @@
+# Copyright (c) 2017: Miles Lubin and contributors
+# Copyright (c) 2017: Google Inc.
+#
+# Use of this source code is governed by an MIT-style license that can be found
+# in the LICENSE.md file or at https://opensource.org/licenses/MIT.
+
+struct Cache{T}
+    model::Model{T}
+    variable_name_to_index::Dict{String,MOI.VariableIndex}
+    variable_with_default_bound::Set{MOI.VariableIndex}
+    function Cache(model::Model{T}) where {T}
+        return new{T}(
+            model,
+            Dict{String,MOI.VariableIndex}(),
+            Set{MOI.VariableIndex}(),
+        )
+    end
+end
+
+"""
+    Base.read!(io::IO, model::FileFormats.LP.Model)
+
+Read `io` in the LP file format and store the result in `model`.
+
+This reader attempts to follow the CPLEX LP format, because others like the
+lpsolve version are very...flexible...in how they accept input. Read more about
+them here: http://lpsolve.sourceforge.net
+"""
+function Base.read!(io::IO, model::Model{T}) where {T}
+    if !MOI.is_empty(model)
+        error("Cannot read in file because model is not empty.")
+    end 
+    state = LexerState(io)
+    cache = Cache(model)
+    keyword = :UNKNOWN
+    while (token = peek(state, Token)) !== nothing
+        if token.kind == _TOKEN_KEYWORD
+            read(state, Token)
+            keyword = Symbol(token.value)
+            continue
+        elseif token.kind == _TOKEN_NEWLINE
+            read(state, Token)
+            continue
+        elseif keyword == :MINIMIZE
+            MOI.set(cache.model, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+            _parse_objective(state, cache)
+            keyword = :UNKNOWN
+        elseif keyword == :MAXIMIZE
+            MOI.set(cache.model, MOI.ObjectiveSense(), MOI.MAX_SENSE)
+            _parse_objective(state, cache)
+            keyword = :UNKNOWN
+        elseif keyword == :CONSTRAINTS
+            _parse_constraint(state, cache)
+        elseif keyword == :BINARY
+            x = _parse_variable(state, cache)
+            MOI.add_constraint(cache.model, x, MOI.ZeroOne())
+        elseif keyword == :INTEGER
+            x = _parse_variable(state, cache)
+            MOI.add_constraint(cache.model, x, MOI.Integer())
+        elseif keyword == :BOUNDS
+            _parse_bound(state, cache)
+        elseif keyword == :SOS
+            _parse_constraint(state, cache)
+        else
+            throw(UnexpectedToken(token))
+        end
+    end
+    for x in cache.variable_with_default_bound
+        MOI.add_constraint(model, x, MOI.GreaterThan(0.0))
+    end
+    return
+end
+
+"""
+    const _KEYWORDS::Dict{String,Symbol}
+
+The LP file format is very permissive in what it allows users to call the
+various sections. Here is a dictionary that maps possible user words
+(normalized to lowercase, even though users can use mixed case) to the section.
+
+If you find new spellings for the section names, add them here.
+
+Special handling is needed in the lexer for the keywords that contain spaces.
+"""
+const _KEYWORDS = Dict(
+    # MAXIMIZE
+    "max" => :MAXIMIZE,
+    "maximize" => :MAXIMIZE,
+    "maximise" => :MAXIMIZE,
+    "maximum" => :MAXIMIZE,
+    # MINIMIZE
+    "min" => :MINIMIZE,
+    "minimize" => :MINIMIZE,
+    "minimise" => :MINIMIZE,
+    "minimum" => :MINIMIZE,
+    # CONSTRAINTS
+    "subject to" => :CONSTRAINTS,
+    "such that" => :CONSTRAINTS,
+    "st" => :CONSTRAINTS,
+    "s.t." => :CONSTRAINTS,
+    # BOUNDS
+    "bounds" => :BOUNDS,
+    "bound" => :BOUNDS,
+    # INTEGER
+    "gen" => :INTEGER,
+    "general" => :INTEGER,
+    "generals" => :INTEGER,
+    "integer" => :INTEGER,
+    "integers" => :INTEGER,
+    # BINARY
+    "bin" => :BINARY,
+    "binary" => :BINARY,
+    "binaries" => :BINARY,
+    # SOS
+    "sos" => :SOS,
+    # END
+    "end" => :END,
+)
+
+@enum(
+    _TokenKind,
+    _TOKEN_KEYWORD,
+    _TOKEN_IDENTIFIER,
+    _TOKEN_NUMBER,
+    _TOKEN_ADDITION,
+    _TOKEN_SUBTRACTION,
+    _TOKEN_MULTIPLICATION,
+    _TOKEN_DIVISION,
+    _TOKEN_EXPONENT,
+    _TOKEN_OPEN_BRACKET,
+    _TOKEN_CLOSE_BRACKET,
+    _TOKEN_GREATER_THAN,
+    _TOKEN_LESS_THAN,
+    _TOKEN_EQUAL_TO,
+    _TOKEN_COLON,
+    _TOKEN_NEWLINE,
+    _TOKEN_UNKNOWN,
+)
+"""
+    const _OPERATORS::Dict{Char,_TokenKind}
+
+This dictionary is used to simplify the lexer for common operators.
+
+These operators must not contain spaces.
+"""
+const _OPERATORS = Dict{Char,_TokenKind}(
+    '+' => _TOKEN_ADDITION,
+    '-' => _TOKEN_SUBTRACTION,
+    '*' => _TOKEN_MULTIPLICATION,
+    '/' => _TOKEN_DIVISION,
+    '^' => _TOKEN_EXPONENT,
+    '[' => _TOKEN_OPEN_BRACKET,
+    ']' => _TOKEN_CLOSE_BRACKET,
+    '>' => _TOKEN_GREATER_THAN,
+    '<' => _TOKEN_LESS_THAN,
+    '=' => _TOKEN_EQUAL_TO,
+    ':' => _TOKEN_COLON,
+    '\n' => _TOKEN_NEWLINE,
+)
+
+"""
+    struct Token
+        kind::_TokenKind
+        value::Union{Nothing,String}
+    end
+
+This struct is used to represent each token from the lexer. The `value` is the
+unprocessed value.
+"""
+struct Token
+    kind::_TokenKind
+    value::Union{Nothing,String}
+end
+
+"""
+    mutable struct LexerState
+        io::IO
+        peek_char::Union{Nothing,Char}
+        peek_tokens::Vector{Token}
+    end
+
+A struct that is used to manage state when lexing.
+
+It stores:
+
+ * `io`: the IO object that we are streaming
+ * `peek_char`: the next `Char` in the `io`
+ * `peek_tokens`: the list of upcoming tokens that we have already peeked.
+"""
+mutable struct LexerState
+    io::IO
+    peek_char::Union{Nothing,Char}
+    peek_tokens::Vector{Token}
+    LexerState(io::IO) = new(io, nothing, Token[])
+end
+
+function Base.peek(state::LexerState, ::Type{Char})
+    if state.peek_char === nothing && !eof(state.io)
+        state.peek_char = read(state.io, Char)
+    end
+    return state.peek_char
+end
+
+function Base.read(state::LexerState, ::Type{Char})
+    c = peek(state, Char)
+    state.peek_char = nothing
+    return c
+end
+
+function Base.read(state::LexerState, ::Type{Token})
+    token = peek(state, Token, 1)
+    popfirst!(state.peek_tokens)
+    return token
+end
+
+_is_idenfifier(c::Char) = !(isspace(c) || c in ('+', '-', '*', '^', ':'))
+
+function Base.peek(state::LexerState, ::Type{Token}, n::Int = 1)
+    @assert n >= 1
+    while length(state.peek_tokens) < n
+        token = _peek_inner(state)
+        if token === nothing
+            return nothing
+        end
+        push!(state.peek_tokens, token)
+    end
+    return state.peek_tokens[n]
+end
+
+function _peek_inner(state::LexerState)
+    while (c = peek(state, Char)) !== nothing
+        if c == '\n'
+            read(state, Char)
+            return Token(_TOKEN_NEWLINE, "\n")
+        elseif isspace(c)  # Whitespace
+            read(state, Char)
+        elseif c == '\\'  # Comment: backslash until newline
+            while (c = read(state, Char)) !== nothing && c != '\n'
+            end
+        elseif isdigit(c) || (c == '-' && isdigit(peek(state, Char))) # Number
+            buf = IOBuffer()
+            while (c = peek(state, Char)) !== nothing && (isdigit(c) || c in ['.', 'e', 'E', '+', '-'])
+                write(buf, c)
+                read(state, Char)
+            end
+            return Token(_TOKEN_NUMBER, String(take!(buf)))
+        elseif isletter(c) || c == '_'  # Identifier / keyword
+            buf = IOBuffer()
+            while (c = peek(state, Char)) !== nothing && _is_idenfifier(c)
+                write(buf, c)
+                read(state, Char)
+            end
+            val = String(take!(buf))
+            l_val = lowercase(val)
+            if l_val == "subject"
+                t = peek(state, Token)
+                if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "to"
+                    read(state, Token)  # Skip "to"
+                    return Token(_TOKEN_KEYWORD, "CONSTRAINTS")
+                end
+            elseif l_val == "such"
+                t = peek(state, Token)
+                if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "that"
+                    read(state, Token)  # Skip "such"
+                    return Token(_TOKEN_KEYWORD, "CONSTRAINTS")
+                end
+            end
+            if (kw = get(_KEYWORDS, l_val, nothing)) !== nothing
+                return Token(_TOKEN_KEYWORD, string(kw))
+            end
+            return Token(_TOKEN_IDENTIFIER, val)
+        elseif (op = get(_OPERATORS, c, nothing)) !== nothing
+            read(state, Char)
+            if c in ('<', '>', '=') && peek(state, Char) == '='
+                read(state, Char)  # Allow <=, >=, and ==
+            end
+            return Token(op, string(c))
+        else
+            throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "$c")))
+        end
+    end
+    return
+end
+
+"""
+    struct UnexpectedToken <: Exception
+        token::Token
+    end
+
+This error is thrown when we encounter an unexpected token when parsing the LP
+file. No other information is available.
+
+TODO: we could improve this by storing line information or other context to help
+the user diagnose the problem.
+"""
+struct UnexpectedToken <: Exception
+    token::Token
+end
+
+function _expect(token::Token, kind::_TokenKind)
+    if token.kind != kind
+        throw(UnexpectedToken(token))
+    end
+    return
+end
+
+"""
+    _next_token_is(state::LexerState, kind::_TokenKind, n::Int = 1)
+
+A helper function to check if the token in `n` steps is of kind `kind`.
+"""
+function _next_token_is(state::LexerState, kind::_TokenKind, n::Int = 1)
+    if (t = peek(state, Token, n)) !== nothing
+        return t.kind == kind
+    end
+    return false
+end
+
+function _skip_newlines(state::LexerState)
+    while _next_token_is(state, _TOKEN_NEWLINE)
+        read(state, Token)
+    end
+    return
+end
+
+# IDENTIFIER --> "string"
+#
+#   There _are_ rules to what an identifier can be. We handle these when lexing.
+#   Anything that makes it here is deemed acceptable.
+function _parse_variable(state::LexerState, cache::Cache)::MOI.VariableIndex
+    _skip_newlines(state)
+    token = read(state, Token)
+    _expect(token, _TOKEN_IDENTIFIER)
+    x = get(cache.variable_name_to_index, token.value, nothing)
+    if x !== nothing
+        return x
+    end
+    x = MOI.add_variable(cache.model)
+    if length(token.value) > get_options(cache.model).maximum_length
+        error("Name exceeds maximum length: $(token.value)")
+    end
+    MOI.set(cache.model, MOI.VariableName(), x, token.value)
+    cache.variable_name_to_index[token.value] = x
+    push!(cache.variable_with_default_bound, x)
+    return x
+end
+
+# NUMBER :=
+#   "+" NUMBER
+#   | "-" NUMBER
+#   | "inf"
+#   | "infinity"
+#   | :(parse(T, x))
+function _parse_number(state::LexerState, cache::Cache{T})::T where {T}
+    _skip_newlines(state)
+    token = read(state, Token)
+    if token.kind == _TOKEN_ADDITION
+        return _parse_number(state, cache)
+    elseif token.kind == _TOKEN_SUBTRACTION
+        return -_parse_number(state, cache)
+    elseif token.kind == _TOKEN_IDENTIFIER
+        v = lowercase(token.value)
+        if v == "inf" || v == "infinity"
+            return typemax(T)
+        else
+            throw(UnexpectedToken(token))
+        end
+    else
+        _expect(token, _TOKEN_NUMBER)
+    end
+    return parse(T, token.value)
+end
+
+# QUAD_TERM :=
+#   "+" QUAD_TERM
+#   | "-" QUAD_TERM
+#   | [NUMBER] IDENTIFIER "^" "2"
+#   | [NUMBER] IDENTIFIER "*" IDENTIFIER
+function _parse_quad_term(
+    state::LexerState,
+    cache::Cache{T},
+    prefix::T,
+) where {T}
+    _skip_newlines(state)
+    if _next_token_is(state, _TOKEN_ADDITION)
+        read(state, Token)
+        return _parse_quad_term(state, cache, prefix)
+    elseif _next_token_is(state, _TOKEN_SUBTRACTION)
+        read(state, Token)
+        return _parse_quad_term(state, cache,  -prefix)
+    end
+    coef = prefix
+    if _next_token_is(state, _TOKEN_NUMBER)
+        coef = prefix * _parse_number(state, cache)
+    end
+    x1 = _parse_variable(state, cache)
+    _skip_newlines(state)
+    if _next_token_is(state, _TOKEN_EXPONENT)
+        read(state, Token) # ^
+        _skip_newlines(state)
+        n = read(state, Token)
+        if n.kind != _TOKEN_NUMBER && n.value != "2"
+            throw(UnexpectedToken(n))
+        end
+        return MOI.ScalarQuadraticTerm(T(2) * coef, x1, x1)
+    end
+    token = read(state, Token)
+    _expect(token, _TOKEN_MULTIPLICATION)
+    x2 = _parse_variable(state, cache)
+    if x1 == x2
+        coef *= T(2)
+    end
+    return MOI.ScalarQuadraticTerm(coef, x1, x2)
+end
+
+# QUADRATIC_EXPRESSION :=
+#   "[" QUAD_TERM (("+" | "-") QUAD_TERM)* "]"
+#   | "[" QUAD_TERM (("+" | "-") QUAD_TERM)* "]/2"
+function _parse_quad_expression(
+    state::LexerState,
+    cache::Cache{T},
+    prefix::T,
+) where {T}
+    token = read(state, Token)
+    _expect(token, _TOKEN_OPEN_BRACKET)
+    f = zero(MOI.ScalarQuadraticFunction{T})
+    push!(f.quadratic_terms, _parse_quad_term(state, cache, prefix))
+    while (p = peek(state, Token)) !== nothing
+        if p.kind == _TOKEN_ADDITION
+            p = read(state, Token)
+            push!(f.quadratic_terms, _parse_quad_term(state, cache, prefix))
+        elseif p.kind == _TOKEN_SUBTRACTION
+            p = read(state, Token)
+            push!(f.quadratic_terms, _parse_quad_term(state, cache, -prefix))
+        elseif p.kind == _TOKEN_NEWLINE
+            read(state, Token)
+        elseif p.kind == _TOKEN_CLOSE_BRACKET
+            read(state, Token)
+            break
+        else
+            return throw(UnexpectedToken(p))
+        end
+    end
+    _skip_newlines(state)
+    if _next_token_is(state, _TOKEN_DIVISION)
+        read(state, Token) # /
+        # Must be /2
+        n = read(state, Token)
+        if n.kind != _TOKEN_NUMBER && n.value != "2"
+            throw(UnexpectedToken(n))
+        end
+        for (i, term) in enumerate(f.quadratic_terms)
+            f.quadratic_terms[i] = MOI.ScalarQuadraticTerm(
+                term.coefficient / T(2),
+                term.variable_1,
+                term.variable_2,
+            )
+        end
+    end
+    return f
+end
+
+# TERM :=
+#   "+" TERM
+#   | "-" TERM
+#   | NUMBER
+#   | IDENTIFIER
+#   | NUMBER IDENTIFIER
+#   | NUMBER "*" IDENTIFIER
+#   | QUADRATIC_EXPRESSION
+function _parse_term(
+    state::LexerState,
+    cache::Cache{T},
+    prefix::T = one(T),
+) where {T}
+    _skip_newlines(state)
+    if _next_token_is(state, _TOKEN_ADDITION)
+        # "+" TERM
+        read(state, Token)
+        return _parse_term(state, cache, prefix)
+    elseif _next_token_is(state, _TOKEN_SUBTRACTION)
+        # "-" TERM
+        read(state, Token)
+        return _parse_term(state, cache, -prefix)
+    elseif _next_token_is(state, _TOKEN_IDENTIFIER)
+        # IDENTIFIER
+        x = _parse_variable(state, cache)
+        return MOI.ScalarAffineTerm(prefix, x)
+    elseif _next_token_is(state, _TOKEN_NUMBER)
+        coef = prefix * _parse_number(state, cache)
+        if _next_token_is(state, _TOKEN_IDENTIFIER)
+            # NUMBER IDENTIFIER
+            x = _parse_variable(state, cache)
+            return MOI.ScalarAffineTerm(coef, x)
+        elseif _next_token_is(state, _TOKEN_MULTIPLICATION)
+            # NUMBER * IDENTIFIER
+            read(state, token)  # skip *
+            x = _parse_variable(state, cache)
+            return MOI.ScalarAffineTerm(coef, x)
+        else
+            # NUMBER
+            return coef
+        end
+    elseif _next_token_is(state, _TOKEN_OPEN_BRACKET)
+        # QUADRATIC_EXPRESSION
+        return _parse_quad_expression(state, cache, prefix)
+    end
+    return nothing
+end
+
+function _add_to_expression!(f::MOI.ScalarQuadraticFunction{T}, x::T) where {T}
+    f.constant += x
+    return
+end
+
+function _add_to_expression!(
+    f::MOI.ScalarQuadraticFunction{T},
+    x::MOI.ScalarAffineTerm{T},
+) where {T}
+    push!(f.affine_terms, x)
+    return
+end
+
+function _add_to_expression!(
+    f::MOI.ScalarQuadraticFunction{T},
+    x::MOI.ScalarQuadraticFunction{T},
+) where {T}
+    MOI.Utilities.operate!(+, T, f, x)
+    return
+end
+
+
+# EXPRESSION :=
+#   TERM (("+" | "-") TERM)*
+function _parse_expression(state::LexerState, cache::Cache{T}) where {T}
+    f = zero(MOI.ScalarQuadraticFunction{T})
+    _add_to_expression!(f, _parse_term(state, cache))
+    while (p = peek(state, Token)) !== nothing
+        if p.kind == _TOKEN_ADDITION
+            p = read(state, Token)
+            _add_to_expression!(f, _parse_term(state, cache))
+        elseif p.kind == _TOKEN_SUBTRACTION
+            p = read(state, Token)
+            _add_to_expression!(f, _parse_term(state, cache, -one(T)))
+        elseif p.kind == _TOKEN_NEWLINE
+            read(state, Token)
+        else
+            break
+        end
+    end
+    if isempty(f.quadratic_terms)
+        return MOI.ScalarAffineFunction(f.affine_terms, f.constant)
+    end
+    return f
+end
+
+# SET_SUFFIX :=
+#   "free"
+#   | ">=" NUMBER
+#   | "<=" NUMBER
+#   | "==" NUMBER
+#
+# There are other inequality operators that are supported, like `>`, `<`, and
+# `=`. These are normalized when lexing.
+function _parse_set_suffix(state, cache)
+    _skip_newlines(state)
+    p = read(state, Token)
+    if p.kind == _TOKEN_IDENTIFIER && lowercase(p.value) == "free"
+        return nothing
+    end
+    _skip_newlines(state)
+    if p.kind == _TOKEN_GREATER_THAN
+        rhs = _parse_number(state, cache)
+        return MOI.GreaterThan(rhs)
+    elseif p.kind == _TOKEN_LESS_THAN
+        rhs = _parse_number(state, cache)
+        return MOI.LessThan(rhs)
+    elseif p.kind == _TOKEN_EQUAL_TO
+        rhs = _parse_number(state, cache)
+        return MOI.EqualTo(rhs)
+    else
+        throw(UnexpectedToken(p))
+    end
+end
+
+# SET_PREFIX :=
+#   NUMBER ">="
+#   | NUMBER "<="
+#   | NUMBER "=="
+#
+# There are other inequality operators that are supported, like `>`, `<`, and
+# `=`. These are normalized when lexing.
+function _parse_set_prefix(state, cache)
+    lhs = _parse_number(state, cache)
+    _skip_newlines(state)
+    p = read(state, Token)
+    if p.kind == _TOKEN_GREATER_THAN
+        return MOI.LessThan(lhs)
+    elseif p.kind == _TOKEN_LESS_THAN
+        return MOI.GreaterThan(lhs)
+    elseif p.kind == _TOKEN_EQUAL_TO
+        return MOI.EqualTo(lhs)
+    else
+        throw(UnexpectedToken(p))
+    end
+end
+
+# NAME --> [IDENTIFIER OP_COLON]
+function _parse_optional_name(state::LexerState, cache::Cache)
+    _skip_newlines(state)
+    if _next_token_is(state, _TOKEN_IDENTIFIER, 1) &&
+       _next_token_is(state, _TOKEN_COLON, 2)
+        name = read(state, Token)
+        read(state, Token)  # Skip :
+        return name.value
+    end
+    return nothing
+end
+
+# OBJECTIVE --> [NAME] EXPRESSION
+function _parse_objective(state::LexerState, cache::Cache)
+    _ = _parse_optional_name(state, cache)
+    f = _parse_expression(state, cache)
+    MOI.set(cache.model, MOI.ObjectiveFunction{typeof(f)}(), f)
+    return
+end
+
+function _add_bound(cache::Cache, x::MOI.VariableIndex, set::MOI.GreaterThan)
+    delete!(cache.variable_with_default_bound, x)
+    if isfinite(set.lower)
+        MOI.add_constraint(cache.model, x, set)
+    end
+    return
+end
+
+function _add_bound(cache::Cache, x::MOI.VariableIndex, set::MOI.LessThan)
+    if set.upper < 0
+        delete!(cache.variable_with_default_bound, x)
+    end
+    if isfinite(set.upper)
+        MOI.add_constraint(cache.model, x, set)
+    end
+    return
+end
+
+function _add_bound(cache::Cache, x::MOI.VariableIndex, set::MOI.EqualTo)
+    delete!(cache.variable_with_default_bound, x)
+    MOI.add_constraint(cache.model, x, set)
+    return
+end
+
+# x free
+function _add_bound(cache::Cache, x::MOI.VariableIndex, ::Nothing)
+    delete!(cache.variable_with_default_bound, x)
+    return
+end
+
+# BOUND -->
+#   IDENFITIER SET_SUFFIX
+#   | SET_PREFIX IDENTIFIER
+#   | SET_PREFIX IDENTIFIER SET_SUFFIX
+function _parse_bound(state, cache)
+    if _next_token_is(state, _TOKEN_IDENTIFIER)  # `x free` or `x op b`
+        x = _parse_variable(state, cache)
+        set = _parse_set_suffix(state, cache)
+        _add_bound(cache, x, set)
+        return
+    end
+    # `a op x` or `a op x op b`
+    lhs_set = _parse_set_prefix(state, cache)
+    x = _parse_variable(state, cache)
+    _add_bound(cache, x, lhs_set)
+    if _next_token_is(state, _TOKEN_GREATER_THAN) ||
+        _next_token_is(state, _TOKEN_LESS_THAN) ||
+        _next_token_is(state, _TOKEN_EQUAL_TO)  # `a op x op b`
+        # We don't add MOI.Interval constraints to follow JuMP's convention of
+        # separate bounds.
+        rhs_set = _parse_set_suffix(state, cache)
+        _add_bound(cache, x, rhs_set)
+    end
+    return
+end
+
+# SOS_CONSTRAINT :=
+#   [NAME] S1:: (IDENTIFIER:NUMBER)+ \n
+#   | [NAME] S2:: (IDENTIFIER:NUMBER)+ \n
+#
+# The newline character is required.
+function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T}
+    t = read(state, Token) # Si
+    _expect(read(state, Token), _TOKEN_COLON)
+    _expect(read(state, Token), _TOKEN_COLON)
+    f, w = MOI.VectorOfVariables(MOI.VariableIndex[]), T[]
+    while true
+        push!(f.variables, _parse_variable(state, cache))
+        _expect(read(state, Token), _TOKEN_COLON)
+        push!(w, _parse_number(state, cache))
+        if _next_token_is(state, _TOKEN_NEWLINE)
+            break
+        end
+    end
+    if t.value == "S1"
+        return MOI.add_constraint(cache.model, f, MOI.SOS1(w))
+    else
+        return MOI.add_constraint(cache.model, f, MOI.SOS2(w))
+    end
+end
+
+function _is_sos_constraint(state)
+    t = peek(state, Token, 1)
+    return t.kind == _TOKEN_IDENTIFIER &&
+       (t.value == "S1" || t.value == "S2") &&
+       _next_token_is(state, _TOKEN_COLON, 2) &&
+       _next_token_is(state, _TOKEN_COLON, 3)
+end
+
+# CONSTRAINT :=
+#   [NAME] EXPRESSION SET_SUFFIX
+#   | [NAME] SOS_CONSTRAINT
+function _parse_constraint(state::LexerState, cache::Cache)
+    name = _parse_optional_name(state, cache)
+    # Check if this is an SOS constraint
+    c = if _is_sos_constraint(state)
+        _parse_sos_constraint(state, cache)
+    else
+        f = _parse_expression(state, cache)
+        set = _parse_set_suffix(state, cache)
+        MOI.add_constraint(cache.model, f, set)
+    end
+    if name !== nothing
+        MOI.set(cache.model, MOI.ConstraintName(), c, name)
+    end
+    return
+end
diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl
index f58a54f061..de8e7e898b 100644
--- a/test/FileFormats/LP/LP.jl
+++ b/test/FileFormats/LP/LP.jl
@@ -446,7 +446,7 @@ function test_read_invalid()
     for filename in filter(f -> startswith(f, "invalid_"), readdir(models))
         model = LP.Model()
         @test_throws(
-            ErrorException,
+            LP.UnexpectedToken,
             MOI.read_from_file(model, joinpath(models, filename)),
         )
     end
@@ -459,10 +459,7 @@ function test_read_unexpected_line()
     print(io, line)
     seekstart(io)
     model = LP.Model()
-    @test_throws(
-        ErrorException("Unable to read LP file: unexpected line: $(line)"),
-        read!(io, model),
-    )
+    @test_throws LP.UnexpectedToken read!(io, model)
     return
 end
 
@@ -513,7 +510,7 @@ function test_read_model1_tricky()
     @test occursin("CON4: 1 V5 + 1 V6 + 1 V7 <= 1", file)
     @test occursin("CON1: 1 V1 >= 0", file)
     @test occursin("CON5: [ 1 Var4 ^ 2 - 1.2 V5 * V1 ] <= 0", file)
-    @test occursin("R1: 1 V2 >= 2", file)
+    @test occursin("1 V2 >= 2", file)
     @test occursin("-infinity <= V1 <= 3", file)
     @test occursin("Var4 >= 5.5", file)
     @test occursin("V3 >= -3", file)
@@ -580,7 +577,6 @@ function test_read_model2()
 end
 
 function test_read_objective_sense()
-    model = LP.Model()
     cases = Dict(
         "max" => MOI.MAX_SENSE,
         "maximize" => MOI.MAX_SENSE,
@@ -592,7 +588,10 @@ function test_read_objective_sense()
         "minimum" => MOI.MIN_SENSE,
     )
     for (sense, result) in cases
-        LP._set_objective_sense(LP._KW_OBJECTIVE, model, sense)
+        model = LP.Model()
+        io = IOBuffer("$sense x")
+        seekstart(io)
+        read!(io, model)
         @test MOI.get(model, MOI.ObjectiveSense()) == result
     end
     return
@@ -1035,40 +1034,41 @@ function test_read_variable_bounds()
     return
 end
 
-function test_read_indicator()
-    io = IOBuffer("""
-    minimize
-    obj: 1 x
-    subject to
-    c: z = 1 -> x >= 0
-    d: z = 0 -> x - y <= 1.2
-    bounds
-    x free
-    z free
-    binary
-    z
-    end
-    """)
-    model = MOI.FileFormats.Model(format = MOI.FileFormats.FORMAT_LP)
-    read!(io, model)
-    io = IOBuffer()
-    write(io, model)
-    seekstart(io)
-    @test read(io, String) == """
-    minimize
-    obj: 1 x
-    subject to
-    d:  z = 0 -> 1 x - 1 y <= 1.2
-    c:  z = 1 -> 1 x >= 0
-    Bounds
-    x free
-    y >= 0
-    Binary
-    z
-    End
-    """
-    return
-end
+# TODO(odow): FIXME
+# function test_read_indicator()
+#     io = IOBuffer("""
+#     minimize
+#     obj: 1 x
+#     subject to
+#     c: z = 1 -> x >= 0
+#     d: z = 0 -> x - y <= 1.2
+#     bounds
+#     x free
+#     z free
+#     binary
+#     z
+#     end
+#     """)
+#     model = MOI.FileFormats.Model(format = MOI.FileFormats.FORMAT_LP)
+#     read!(io, model)
+#     io = IOBuffer()
+#     write(io, model)
+#     seekstart(io)
+#     @test read(io, String) == """
+#     minimize
+#     obj: 1 x
+#     subject to
+#     d:  z = 0 -> 1 x - 1 y <= 1.2
+#     c:  z = 1 -> 1 x >= 0
+#     Bounds
+#     x free
+#     y >= 0
+#     Binary
+#     z
+#     End
+#     """
+#     return
+# end
 
 function test_VectorAffineFunction_SOS()
     model = MOI.FileFormats.LP.Model()
@@ -1092,10 +1092,7 @@ function test_invalid_token_in_sos()
         """,
     )
     seekstart(io)
-    @test_throws(
-        ErrorException("Invalid token in SOS constraint: x"),
-        read!(io, model),
-    )
+    @test_throws LP.UnexpectedToken read!(io, model)
     return
 end
 
@@ -1109,7 +1106,7 @@ function test_unable_to_parse_bound()
     end
     """)
     model = LP.Model()
-    @test_throws(ErrorException("Unable to parse bound: x"), read!(io, model))
+    @test_throws LP.UnexpectedToken read!(io, model)
     return
 end
 
diff --git a/test/FileFormats/LP/models/invalid_affine_term_constraint.lp b/test/FileFormats/LP/models/invalid_affine_term_constraint.lp
deleted file mode 100644
index 334a6bbd5c..0000000000
--- a/test/FileFormats/LP/models/invalid_affine_term_constraint.lp
+++ /dev/null
@@ -1,13 +0,0 @@
-\ File: lo1.lp 
-maximize 
-obj: 3 x1 + x2 + 5 x3 + x4 
-subject to 
-c1:  3 x1 + x2 + 2 x3 = 30 
-c2:  2 x1 + x2 + - 3 x3 + x4 >= 15 
-c3:  2 x2 + 3 x4 <= 25 
-bounds 
- 0 <= x1 <= +infinity 
- 0 <= x2 <= 10 
- 0 <= x3 <= +infinity 
- 0 <= x4 <= +infinity 
-end 
diff --git a/test/FileFormats/LP/models/invalid_affine_term_objective.lp b/test/FileFormats/LP/models/invalid_affine_term_objective.lp
deleted file mode 100644
index 8844216423..0000000000
--- a/test/FileFormats/LP/models/invalid_affine_term_objective.lp
+++ /dev/null
@@ -1,13 +0,0 @@
-\ File: lo1.lp 
-maximize 
-obj: 3 x1 + + x2 + 5 x3 + x4 
-subject to 
-c1:  3 x1 + x2 + 2 x3 = 30 
-c2:  2 x1 + x2 + 3 x3 + x4 >= 15 
-c3:  2 x2 + 3 x4 <= 25 
-bounds 
- 0 <= x1 <= +infinity 
- 0 <= x2 <= 10 
- 0 <= x3 <= +infinity 
- 0 <= x4 <= +infinity 
-end 
diff --git a/test/FileFormats/LP/models/invalid_bound.lp b/test/FileFormats/LP/models/invalid_bound.lp
deleted file mode 100644
index 3015587726..0000000000
--- a/test/FileFormats/LP/models/invalid_bound.lp
+++ /dev/null
@@ -1,13 +0,0 @@
-\ File: lo1.lp 
-maximize 
-obj: 3 x1 + x2 + 5 x3 + x4 
-subject to 
-c1:  3 x1 + x2 + 2 x3 = 30 
-c2:  2 x1 + x2 + 3 x3 + x4 >= 15 
-c3:  2 x2 + 3 x4 <= 25 
-bounds 
- 0 <= x1 <= +infinity 
- 0 >= x2 <= 10 
- 0 <= x3 <= +infinity 
- 0 <= x4 <= +infinity 
-end 

From 9b9c175fb37185155a6b1167920560b40b00145c Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Thu, 11 Sep 2025 11:28:29 +1200
Subject: [PATCH 02/10] Update

---
 src/FileFormats/LP/read.jl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index 70b96af587..0a4e1ef320 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -617,9 +617,13 @@ function _parse_optional_name(state::LexerState, cache::Cache)
     return nothing
 end
 
-# OBJECTIVE --> [NAME] EXPRESSION
+# OBJECTIVE --> [NAME] [EXPRESSION]
 function _parse_objective(state::LexerState, cache::Cache)
     _ = _parse_optional_name(state, cache)
+    _skip_newlines(state)
+    if _next_token_is(state, _TOKEN_KEYWORD)
+        return  # A line like `obj:\nsubject to`
+    end
     f = _parse_expression(state, cache)
     MOI.set(cache.model, MOI.ObjectiveFunction{typeof(f)}(), f)
     return

From 5349cabf753a386899b43c568820c73b0358cc79 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Thu, 11 Sep 2025 11:39:41 +1200
Subject: [PATCH 03/10] Update

---
 src/FileFormats/LP/read.jl | 46 +++++++++++++++++++++++--
 test/FileFormats/LP/LP.jl  | 69 +++++++++++++++++++-------------------
 2 files changed, 78 insertions(+), 37 deletions(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index 0a4e1ef320..1753b25af2 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -133,6 +133,7 @@ const _KEYWORDS = Dict(
     _TOKEN_LESS_THAN,
     _TOKEN_EQUAL_TO,
     _TOKEN_COLON,
+    _TOKEN_IMPLIES,
     _TOKEN_NEWLINE,
     _TOKEN_UNKNOWN,
 )
@@ -270,8 +271,11 @@ function _peek_inner(state::LexerState)
             end
             return Token(_TOKEN_IDENTIFIER, val)
         elseif (op = get(_OPERATORS, c, nothing)) !== nothing
-            read(state, Char)
-            if c in ('<', '>', '=') && peek(state, Char) == '='
+            read(state, Char) # Skip c
+            if c == '-' && peek(state, Char) == '>'
+                read(state, Char)
+                return Token(_TOKEN_IMPLIES, "->")
+            elseif c in ('<', '>', '=') && peek(state, Char) == '='
                 read(state, Char)  # Allow <=, >=, and ==
             end
             return Token(op, string(c))
@@ -718,14 +722,52 @@ function _is_sos_constraint(state)
        _next_token_is(state, _TOKEN_COLON, 3)
 end
 
+function _is_indicator_constraint(state)
+    return _next_token_is(state, _TOKEN_IDENTIFIER, 1) &&
+       _next_token_is(state, _TOKEN_EQUAL_TO, 2) &&
+       _next_token_is(state, _TOKEN_NUMBER, 3) &&
+       _next_token_is(state, _TOKEN_IMPLIES, 4)
+end
+
+# INDICATOR_CONSTRAINT :=
+#   IDENTIFIER "=" "0" "->" EXPRESSION SET_SUFFIX
+#   | IDENTIFIER "=" "1" "->" EXPRESSION SET_SUFFIX
+function _parse_indicator_constraint(
+    state::LexerState,
+    cache::Cache{T},
+) where {T}
+    z = _parse_variable(state, cache)
+    _expect(read(state, Token), _TOKEN_EQUAL_TO)
+    t = read(state, Token)
+    _expect(t, _TOKEN_NUMBER)
+    indicator = if t.value == "0"
+        MOI.ACTIVATE_ON_ZERO
+    elseif t.value == "1"
+        MOI.ACTIVATE_ON_ONE
+    else
+        throw(UnexpectedToken(t))
+    end
+    _expect(read(state, Token), _TOKEN_IMPLIES)
+    f = _parse_expression(state, cache)
+    set = _parse_set_suffix(state, cache)
+    return MOI.add_constraint(
+        cache.model,
+        MOI.Utilities.operate(vcat, T, z, f),
+        MOI.Indicator{indicator}(set),
+    )
+end
+
 # CONSTRAINT :=
 #   [NAME] EXPRESSION SET_SUFFIX
 #   | [NAME] SOS_CONSTRAINT
+#   | [NAME] INDICATOR_CONSTRAINT
 function _parse_constraint(state::LexerState, cache::Cache)
     name = _parse_optional_name(state, cache)
     # Check if this is an SOS constraint
     c = if _is_sos_constraint(state)
         _parse_sos_constraint(state, cache)
+    elseif _is_indicator_constraint(state)
+        _parse_indicator_constraint(state, cache)
     else
         f = _parse_expression(state, cache)
         set = _parse_set_suffix(state, cache)
diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl
index de8e7e898b..d2d8e8120a 100644
--- a/test/FileFormats/LP/LP.jl
+++ b/test/FileFormats/LP/LP.jl
@@ -1034,41 +1034,40 @@ function test_read_variable_bounds()
     return
 end
 
-# TODO(odow): FIXME
-# function test_read_indicator()
-#     io = IOBuffer("""
-#     minimize
-#     obj: 1 x
-#     subject to
-#     c: z = 1 -> x >= 0
-#     d: z = 0 -> x - y <= 1.2
-#     bounds
-#     x free
-#     z free
-#     binary
-#     z
-#     end
-#     """)
-#     model = MOI.FileFormats.Model(format = MOI.FileFormats.FORMAT_LP)
-#     read!(io, model)
-#     io = IOBuffer()
-#     write(io, model)
-#     seekstart(io)
-#     @test read(io, String) == """
-#     minimize
-#     obj: 1 x
-#     subject to
-#     d:  z = 0 -> 1 x - 1 y <= 1.2
-#     c:  z = 1 -> 1 x >= 0
-#     Bounds
-#     x free
-#     y >= 0
-#     Binary
-#     z
-#     End
-#     """
-#     return
-# end
+function test_read_indicator()
+    io = IOBuffer("""
+    minimize
+    obj: 1 x
+    subject to
+    c: z = 1 -> x >= 0
+    d: z = 0 -> x - y <= 1.2
+    bounds
+    x free
+    z free
+    binary
+    z
+    end
+    """)
+    model = MOI.FileFormats.Model(format = MOI.FileFormats.FORMAT_LP)
+    read!(io, model)
+    io = IOBuffer()
+    write(io, model)
+    seekstart(io)
+    @test read(io, String) == """
+    minimize
+    obj: 1 x
+    subject to
+    d:  z = 0 -> 1 x - 1 y <= 1.2
+    c:  z = 1 -> 1 x >= 0
+    Bounds
+    x free
+    y >= 0
+    Binary
+    z
+    End
+    """
+    return
+end
 
 function test_VectorAffineFunction_SOS()
     model = MOI.FileFormats.LP.Model()

From b0d32f30106d31242c846f6fa706cded277a1c05 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Thu, 11 Sep 2025 11:44:11 +1200
Subject: [PATCH 04/10] Update

---
 src/FileFormats/LP/read.jl | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index 1753b25af2..5a20a4d3b8 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -29,7 +29,7 @@ them here: http://lpsolve.sourceforge.net
 function Base.read!(io::IO, model::Model{T}) where {T}
     if !MOI.is_empty(model)
         error("Cannot read in file because model is not empty.")
-    end 
+    end
     state = LexerState(io)
     cache = Cache(model)
     keyword = :UNKNOWN
@@ -215,6 +215,7 @@ function Base.read(state::LexerState, ::Type{Token})
 end
 
 _is_idenfifier(c::Char) = !(isspace(c) || c in ('+', '-', '*', '^', ':'))
+_is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-')
 
 function Base.peek(state::LexerState, ::Type{Token}, n::Int = 1)
     @assert n >= 1
@@ -240,7 +241,7 @@ function _peek_inner(state::LexerState)
             end
         elseif isdigit(c) || (c == '-' && isdigit(peek(state, Char))) # Number
             buf = IOBuffer()
-            while (c = peek(state, Char)) !== nothing && (isdigit(c) || c in ['.', 'e', 'E', '+', '-'])
+            while (c = peek(state, Char)) !== nothing && _is_number(c)
                 write(buf, c)
                 read(state, Char)
             end
@@ -391,7 +392,7 @@ function _parse_quad_term(
         return _parse_quad_term(state, cache, prefix)
     elseif _next_token_is(state, _TOKEN_SUBTRACTION)
         read(state, Token)
-        return _parse_quad_term(state, cache,  -prefix)
+        return _parse_quad_term(state, cache, -prefix)
     end
     coef = prefix
     if _next_token_is(state, _TOKEN_NUMBER)
@@ -533,7 +534,6 @@ function _add_to_expression!(
     return
 end
 
-
 # EXPRESSION :=
 #   TERM (("+" | "-") TERM)*
 function _parse_expression(state::LexerState, cache::Cache{T}) where {T}
@@ -679,8 +679,8 @@ function _parse_bound(state, cache)
     x = _parse_variable(state, cache)
     _add_bound(cache, x, lhs_set)
     if _next_token_is(state, _TOKEN_GREATER_THAN) ||
-        _next_token_is(state, _TOKEN_LESS_THAN) ||
-        _next_token_is(state, _TOKEN_EQUAL_TO)  # `a op x op b`
+       _next_token_is(state, _TOKEN_LESS_THAN) ||
+       _next_token_is(state, _TOKEN_EQUAL_TO)  # `a op x op b`
         # We don't add MOI.Interval constraints to follow JuMP's convention of
         # separate bounds.
         rhs_set = _parse_set_suffix(state, cache)
@@ -696,6 +696,9 @@ end
 # The newline character is required.
 function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T}
     t = read(state, Token) # Si
+    if !(t.value == "S1" || t.value == "S2")
+        throw(UnexpectedToken(t))
+    end
     _expect(read(state, Token), _TOKEN_COLON)
     _expect(read(state, Token), _TOKEN_COLON)
     f, w = MOI.VectorOfVariables(MOI.VariableIndex[]), T[]
@@ -715,18 +718,16 @@ function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T}
 end
 
 function _is_sos_constraint(state)
-    t = peek(state, Token, 1)
-    return t.kind == _TOKEN_IDENTIFIER &&
-       (t.value == "S1" || t.value == "S2") &&
-       _next_token_is(state, _TOKEN_COLON, 2) &&
-       _next_token_is(state, _TOKEN_COLON, 3)
+    return _next_token_is(state, _TOKEN_IDENTIFIER, 1) &&
+           _next_token_is(state, _TOKEN_COLON, 2) &&
+           _next_token_is(state, _TOKEN_COLON, 3)
 end
 
 function _is_indicator_constraint(state)
     return _next_token_is(state, _TOKEN_IDENTIFIER, 1) &&
-       _next_token_is(state, _TOKEN_EQUAL_TO, 2) &&
-       _next_token_is(state, _TOKEN_NUMBER, 3) &&
-       _next_token_is(state, _TOKEN_IMPLIES, 4)
+           _next_token_is(state, _TOKEN_EQUAL_TO, 2) &&
+           _next_token_is(state, _TOKEN_NUMBER, 3) &&
+           _next_token_is(state, _TOKEN_IMPLIES, 4)
 end
 
 # INDICATOR_CONSTRAINT :=

From 5c28ebe92663c834f459600db7a8a7f25e1fcb83 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Thu, 11 Sep 2025 13:35:58 +1200
Subject: [PATCH 05/10] Update

---
 src/FileFormats/LP/read.jl | 154 +++++++++++++------------
 test/FileFormats/LP/LP.jl  | 230 +++++++++++++++++++++++++++++++++++++
 2 files changed, 312 insertions(+), 72 deletions(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index 5a20a4d3b8..0a17a0e478 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -35,11 +35,11 @@ function Base.read!(io::IO, model::Model{T}) where {T}
     keyword = :UNKNOWN
     while (token = peek(state, Token)) !== nothing
         if token.kind == _TOKEN_KEYWORD
-            read(state, Token)
+            _ = read(state, Token)
             keyword = Symbol(token.value)
             continue
         elseif token.kind == _TOKEN_NEWLINE
-            read(state, Token)
+            _ = read(state, Token)
             continue
         elseif keyword == :MINIMIZE
             MOI.set(cache.model, MOI.ObjectiveSense(), MOI.MIN_SENSE)
@@ -173,6 +173,28 @@ struct Token
     value::Union{Nothing,String}
 end
 
+"""
+    struct UnexpectedToken <: Exception
+        token::Token
+    end
+
+This error is thrown when we encounter an unexpected token when parsing the LP
+file. No other information is available.
+
+TODO: we could improve this by storing line information or other context to help
+the user diagnose the problem.
+"""
+struct UnexpectedToken <: Exception
+    token::Token
+end
+
+function _expect(token::Token, kind::_TokenKind)
+    if token.kind != kind
+        throw(UnexpectedToken(token))
+    end
+    return token
+end
+
 """
     mutable struct LexerState
         io::IO
@@ -210,10 +232,18 @@ end
 
 function Base.read(state::LexerState, ::Type{Token})
     token = peek(state, Token, 1)
+    if isempty(state.peek_tokens)
+        throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "EOF")))
+    end
     popfirst!(state.peek_tokens)
     return token
 end
 
+function Base.read(state::LexerState, ::Type{Token}, kind::_TokenKind)
+    token = read(state, Token)
+    return _expect(token, kind)
+end
+
 _is_idenfifier(c::Char) = !(isspace(c) || c in ('+', '-', '*', '^', ':'))
 _is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-')
 
@@ -257,13 +287,13 @@ function _peek_inner(state::LexerState)
             if l_val == "subject"
                 t = peek(state, Token)
                 if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "to"
-                    read(state, Token)  # Skip "to"
+                    _ = read(state, Token)  # Skip "to"
                     return Token(_TOKEN_KEYWORD, "CONSTRAINTS")
                 end
             elseif l_val == "such"
                 t = peek(state, Token)
                 if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "that"
-                    read(state, Token)  # Skip "such"
+                    _ = read(state, Token)  # Skip "such"
                     return Token(_TOKEN_KEYWORD, "CONSTRAINTS")
                 end
             end
@@ -276,8 +306,11 @@ function _peek_inner(state::LexerState)
             if c == '-' && peek(state, Char) == '>'
                 read(state, Char)
                 return Token(_TOKEN_IMPLIES, "->")
+            elseif c == '=' && peek(state, Char) in ('<', '>')
+                c = read(state, Char) # Allow =< and => as <= and >=
+                return Token(_OPERATORS[c], string(c))
             elseif c in ('<', '>', '=') && peek(state, Char) == '='
-                read(state, Char)  # Allow <=, >=, and ==
+                _ = read(state, Char)  # Allow <=, >=, and ==
             end
             return Token(op, string(c))
         else
@@ -287,28 +320,6 @@ function _peek_inner(state::LexerState)
     return
 end
 
-"""
-    struct UnexpectedToken <: Exception
-        token::Token
-    end
-
-This error is thrown when we encounter an unexpected token when parsing the LP
-file. No other information is available.
-
-TODO: we could improve this by storing line information or other context to help
-the user diagnose the problem.
-"""
-struct UnexpectedToken <: Exception
-    token::Token
-end
-
-function _expect(token::Token, kind::_TokenKind)
-    if token.kind != kind
-        throw(UnexpectedToken(token))
-    end
-    return
-end
-
 """
     _next_token_is(state::LexerState, kind::_TokenKind, n::Int = 1)
 
@@ -323,19 +334,18 @@ end
 
 function _skip_newlines(state::LexerState)
     while _next_token_is(state, _TOKEN_NEWLINE)
-        read(state, Token)
+        _ = read(state, Token)
     end
     return
 end
 
-# IDENTIFIER --> "string"
+# IDENTIFIER := "string"
 #
 #   There _are_ rules to what an identifier can be. We handle these when lexing.
 #   Anything that makes it here is deemed acceptable.
 function _parse_variable(state::LexerState, cache::Cache)::MOI.VariableIndex
     _skip_newlines(state)
-    token = read(state, Token)
-    _expect(token, _TOKEN_IDENTIFIER)
+    token = read(state, Token, _TOKEN_IDENTIFIER)
     x = get(cache.variable_name_to_index, token.value, nothing)
     if x !== nothing
         return x
@@ -370,17 +380,16 @@ function _parse_number(state::LexerState, cache::Cache{T})::T where {T}
         else
             throw(UnexpectedToken(token))
         end
-    else
-        _expect(token, _TOKEN_NUMBER)
     end
+    _expect(token, _TOKEN_NUMBER)
     return parse(T, token.value)
 end
 
 # QUAD_TERM :=
 #   "+" QUAD_TERM
 #   | "-" QUAD_TERM
-#   | [NUMBER] IDENTIFIER "^" "2"
-#   | [NUMBER] IDENTIFIER "*" IDENTIFIER
+#   | [NUMBER] [*] IDENTIFIER "^" "2"
+#   | [NUMBER] [*] IDENTIFIER "*" IDENTIFIER
 function _parse_quad_term(
     state::LexerState,
     cache::Cache{T},
@@ -388,29 +397,32 @@ function _parse_quad_term(
 ) where {T}
     _skip_newlines(state)
     if _next_token_is(state, _TOKEN_ADDITION)
-        read(state, Token)
+        _ = read(state, Token)
         return _parse_quad_term(state, cache, prefix)
     elseif _next_token_is(state, _TOKEN_SUBTRACTION)
-        read(state, Token)
+        _ = read(state, Token)
         return _parse_quad_term(state, cache, -prefix)
     end
     coef = prefix
     if _next_token_is(state, _TOKEN_NUMBER)
         coef = prefix * _parse_number(state, cache)
     end
+    if _next_token_is(state, _TOKEN_MULTIPLICATION)
+        _skip_newlines(state)
+        _ = read(state, Token)  # Skip optional multiplication
+    end
     x1 = _parse_variable(state, cache)
     _skip_newlines(state)
     if _next_token_is(state, _TOKEN_EXPONENT)
-        read(state, Token) # ^
+        _ = read(state, Token) # ^
         _skip_newlines(state)
-        n = read(state, Token)
-        if n.kind != _TOKEN_NUMBER && n.value != "2"
+        n = read(state, Token, _TOKEN_NUMBER)
+        if n.value != "2"
             throw(UnexpectedToken(n))
         end
         return MOI.ScalarQuadraticTerm(T(2) * coef, x1, x1)
     end
-    token = read(state, Token)
-    _expect(token, _TOKEN_MULTIPLICATION)
+    token = read(state, Token, _TOKEN_MULTIPLICATION)
     x2 = _parse_variable(state, cache)
     if x1 == x2
         coef *= T(2)
@@ -426,8 +438,7 @@ function _parse_quad_expression(
     cache::Cache{T},
     prefix::T,
 ) where {T}
-    token = read(state, Token)
-    _expect(token, _TOKEN_OPEN_BRACKET)
+    token = read(state, Token, _TOKEN_OPEN_BRACKET)
     f = zero(MOI.ScalarQuadraticFunction{T})
     push!(f.quadratic_terms, _parse_quad_term(state, cache, prefix))
     while (p = peek(state, Token)) !== nothing
@@ -438,9 +449,9 @@ function _parse_quad_expression(
             p = read(state, Token)
             push!(f.quadratic_terms, _parse_quad_term(state, cache, -prefix))
         elseif p.kind == _TOKEN_NEWLINE
-            read(state, Token)
+            _ = read(state, Token)
         elseif p.kind == _TOKEN_CLOSE_BRACKET
-            read(state, Token)
+            _ = read(state, Token)
             break
         else
             return throw(UnexpectedToken(p))
@@ -448,10 +459,10 @@ function _parse_quad_expression(
     end
     _skip_newlines(state)
     if _next_token_is(state, _TOKEN_DIVISION)
-        read(state, Token) # /
+        _ = read(state, Token) # /
         # Must be /2
-        n = read(state, Token)
-        if n.kind != _TOKEN_NUMBER && n.value != "2"
+        n = read(state, Token, _TOKEN_NUMBER)
+        if n.value != "2"
             throw(UnexpectedToken(n))
         end
         for (i, term) in enumerate(f.quadratic_terms)
@@ -481,11 +492,11 @@ function _parse_term(
     _skip_newlines(state)
     if _next_token_is(state, _TOKEN_ADDITION)
         # "+" TERM
-        read(state, Token)
+        _ = read(state, Token, _TOKEN_ADDITION)
         return _parse_term(state, cache, prefix)
     elseif _next_token_is(state, _TOKEN_SUBTRACTION)
         # "-" TERM
-        read(state, Token)
+        _ = read(state, Token, _TOKEN_SUBTRACTION)
         return _parse_term(state, cache, -prefix)
     elseif _next_token_is(state, _TOKEN_IDENTIFIER)
         # IDENTIFIER
@@ -499,7 +510,7 @@ function _parse_term(
             return MOI.ScalarAffineTerm(coef, x)
         elseif _next_token_is(state, _TOKEN_MULTIPLICATION)
             # NUMBER * IDENTIFIER
-            read(state, token)  # skip *
+            _ = read(state, Token, _TOKEN_MULTIPLICATION)
             x = _parse_variable(state, cache)
             return MOI.ScalarAffineTerm(coef, x)
         else
@@ -510,7 +521,7 @@ function _parse_term(
         # QUADRATIC_EXPRESSION
         return _parse_quad_expression(state, cache, prefix)
     end
-    return nothing
+    return throw(UnexpectedToken(peek(state, Token)))
 end
 
 function _add_to_expression!(f::MOI.ScalarQuadraticFunction{T}, x::T) where {T}
@@ -547,7 +558,7 @@ function _parse_expression(state::LexerState, cache::Cache{T}) where {T}
             p = read(state, Token)
             _add_to_expression!(f, _parse_term(state, cache, -one(T)))
         elseif p.kind == _TOKEN_NEWLINE
-            read(state, Token)
+            _ = read(state, Token)
         else
             break
         end
@@ -609,19 +620,19 @@ function _parse_set_prefix(state, cache)
     end
 end
 
-# NAME --> [IDENTIFIER OP_COLON]
+# NAME := [IDENTIFIER :]
 function _parse_optional_name(state::LexerState, cache::Cache)
     _skip_newlines(state)
     if _next_token_is(state, _TOKEN_IDENTIFIER, 1) &&
        _next_token_is(state, _TOKEN_COLON, 2)
         name = read(state, Token)
-        read(state, Token)  # Skip :
+        _ = read(state, Token)  # Skip :
         return name.value
     end
     return nothing
 end
 
-# OBJECTIVE --> [NAME] [EXPRESSION]
+# OBJECTIVE := [NAME] [EXPRESSION]
 function _parse_objective(state::LexerState, cache::Cache)
     _ = _parse_optional_name(state, cache)
     _skip_newlines(state)
@@ -663,7 +674,7 @@ function _add_bound(cache::Cache, x::MOI.VariableIndex, ::Nothing)
     return
 end
 
-# BOUND -->
+# BOUND :=
 #   IDENFITIER SET_SUFFIX
 #   | SET_PREFIX IDENTIFIER
 #   | SET_PREFIX IDENTIFIER SET_SUFFIX
@@ -689,22 +700,28 @@ function _parse_bound(state, cache)
     return
 end
 
+function _is_sos_constraint(state)
+    return _next_token_is(state, _TOKEN_IDENTIFIER, 1) &&
+           _next_token_is(state, _TOKEN_COLON, 2) &&
+           _next_token_is(state, _TOKEN_COLON, 3)
+end
+
 # SOS_CONSTRAINT :=
 #   [NAME] S1:: (IDENTIFIER:NUMBER)+ \n
 #   | [NAME] S2:: (IDENTIFIER:NUMBER)+ \n
 #
 # The newline character is required.
 function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T}
-    t = read(state, Token) # Si
+    t = read(state, Token, _TOKEN_IDENTIFIER) # Si
     if !(t.value == "S1" || t.value == "S2")
         throw(UnexpectedToken(t))
     end
-    _expect(read(state, Token), _TOKEN_COLON)
-    _expect(read(state, Token), _TOKEN_COLON)
+    _ = read(state, Token, _TOKEN_COLON)
+    _ = read(state, Token, _TOKEN_COLON)
     f, w = MOI.VectorOfVariables(MOI.VariableIndex[]), T[]
     while true
         push!(f.variables, _parse_variable(state, cache))
-        _expect(read(state, Token), _TOKEN_COLON)
+        _ = read(state, Token, _TOKEN_COLON)
         push!(w, _parse_number(state, cache))
         if _next_token_is(state, _TOKEN_NEWLINE)
             break
@@ -717,12 +734,6 @@ function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T}
     end
 end
 
-function _is_sos_constraint(state)
-    return _next_token_is(state, _TOKEN_IDENTIFIER, 1) &&
-           _next_token_is(state, _TOKEN_COLON, 2) &&
-           _next_token_is(state, _TOKEN_COLON, 3)
-end
-
 function _is_indicator_constraint(state)
     return _next_token_is(state, _TOKEN_IDENTIFIER, 1) &&
            _next_token_is(state, _TOKEN_EQUAL_TO, 2) &&
@@ -738,9 +749,8 @@ function _parse_indicator_constraint(
     cache::Cache{T},
 ) where {T}
     z = _parse_variable(state, cache)
-    _expect(read(state, Token), _TOKEN_EQUAL_TO)
-    t = read(state, Token)
-    _expect(t, _TOKEN_NUMBER)
+    _ = read(state, Token, _TOKEN_EQUAL_TO)
+    t = read(state, Token, _TOKEN_NUMBER)
     indicator = if t.value == "0"
         MOI.ACTIVATE_ON_ZERO
     elseif t.value == "1"
@@ -748,7 +758,7 @@ function _parse_indicator_constraint(
     else
         throw(UnexpectedToken(t))
     end
-    _expect(read(state, Token), _TOKEN_IMPLIES)
+    _ = read(state, Token, _TOKEN_IMPLIES)
     f = _parse_expression(state, cache)
     set = _parse_set_suffix(state, cache)
     return MOI.add_constraint(
diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl
index d2d8e8120a..7d4ce7707b 100644
--- a/test/FileFormats/LP/LP.jl
+++ b/test/FileFormats/LP/LP.jl
@@ -1202,6 +1202,236 @@ function test_unsupported_objectives()
     return
 end
 
+function test_subject_to_name()
+    for (case, err) in [
+        "subject to" => false,
+        "Subject To" => false,
+        "such that" => false,
+        "Such That" => false,
+        "st" => false,
+        "s.t." => false,
+        "subject that" => true,
+        "subject\nto" => true,
+        "s. t." => true,
+        "such to" => true,
+    ]
+        io = IOBuffer("Minimize\nobj: x\n$case\n2x == 1\nBounds\nx free\nEnd")
+        seekstart(io)
+        model = MOI.FileFormats.LP.Model()
+        if err
+            @test_throws LP.UnexpectedToken read!(io, model)
+        else
+            read!(io, model)
+            out = IOBuffer()
+            write(out, model)
+            seekstart(out)
+            file = read(out, String)
+            @test occursin("subject to\nc1: 2 x = 1\n", file)
+        end
+    end
+    return
+end
+
+function test_parse_number()
+    cache = LP.Cache(LP.Model{Float64}())
+    for (input, result) in [
+        "1" => 1.0,
+        "02" => 2.0,
+        "- 1" => -1.0,
+        "- -1" => 1.0,
+        "+ 1" => 1.0,
+        "+ -1" => -1.0,
+        "- + 1" => -1.0,
+        "+ + 1" => 1.0,
+        "+ - + 1" => -1.0,
+        "+ - + -1" => 1.0,
+        "inf" => Inf,
+        "-inf" => -Inf,
+        "- inf" => - Inf,
+        "iNf" => Inf,
+        "iNfinitY" => Inf,
+        "infinity" => Inf,
+        "1.23e+01" => 12.3,
+        "1.23e-1" => 0.123,
+        "1.23E-1" => 0.123,
+        "1.23E+3" => 1230.0,
+    ]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        @test LP._parse_number(state, cache) == result
+    end
+    for input in ["x", "abc", "ten"]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        @test_throws LP.UnexpectedToken LP._parse_number(state, cache)
+    end
+    return
+end
+
+function test_parse_quad_term()
+    cache = LP.Cache(LP.Model{Float64}())
+    # Diagonal
+    for (input, coef) in [
+        "x * x" => 2.0,
+        "\nx * x" => 2.0,
+        "x\n * x" => 2.0,
+        "x * \n x" => 2.0,
+        "x^2" => 2.0,
+        "x ^ 2" => 2.0,
+        "+ x * x" => 2.0,
+        "+ 2 * x * x" => 4.0,
+        "- x * x" => -2.0,
+        "- 2 * x * x" => -4.0,
+        "-2 x * x" => -4.0,
+        "2.2 x * x" => 4.4,
+    ]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        term = LP._parse_quad_term(state, cache, 1.0)
+        x = cache.variable_name_to_index["x"]
+        @test term == MOI.ScalarQuadraticTerm(coef, x, x)
+        seekstart(io)
+        term = LP._parse_quad_term(state, cache, -1.0)
+        @test term == MOI.ScalarQuadraticTerm(-coef, x, x)
+    end
+    # Off-diagonal
+    for (input, coef) in [
+        "x * y" => 1.0,
+        "\nx * y" => 1.0,
+        "x\n * y" => 1.0,
+        "x * \n y" => 1.0,
+        "+ x * y" => 1.0,
+        "+ 2 * x * y" => 2.0,
+        "- x * y" => -1.0,
+        "- 2 * x * y" => -2.0,
+        "2.2 * x * y" => 2.2,
+        "2.2 x * y" => 2.2,
+    ]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        term = LP._parse_quad_term(state, cache, 1.0)
+        x = cache.variable_name_to_index["x"]
+        y = cache.variable_name_to_index["y"]
+        @test term == MOI.ScalarQuadraticTerm(coef, x, y)
+        seekstart(io)
+        term = LP._parse_quad_term(state, cache, -1.0)
+        @test term == MOI.ScalarQuadraticTerm(-coef, x, y)
+    end
+    for input in ["x^", "x^x", "x^0", "x^1", "x^3", "x * 2 * x"]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        @test_throws LP.UnexpectedToken LP._parse_quad_term(state, cache, -1.0)
+    end
+    return
+end
+
+function test_parse_term()
+    cache = LP.Cache(LP.Model{Float64}())
+    for (input, coef) in [
+        "x" => 1.0,
+        "+ x" => 1.0,
+        "- x" => -1.0,
+        "- -x" => 1.0,
+        "+ -x" => -1.0,
+        "2.0 x" => 2.0,
+        "3.0 x" => 3.0,
+        "2.0 * x" => 2.0,
+        "3.2 * x" => 3.2,
+    ]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        term = LP._parse_term(state, cache, 1.0)
+        x = cache.variable_name_to_index["x"]
+        @test term == MOI.ScalarAffineTerm(coef, x)
+        seekstart(io)
+        term = LP._parse_term(state, cache, -1.0)
+        @test term == MOI.ScalarAffineTerm(-coef, x)
+    end
+    for input in ["subject to", ">= 1"]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        @test_throws LP.UnexpectedToken LP._parse_term(state, cache, 1.0)
+    end
+    return
+end
+
+function test_parse_quad_expression()
+    cache = LP.Cache(LP.Model{Float64}())
+    for input in ["x^2", "[ x^2 ]/", "[ x^2 ]/3"]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        @test_throws(
+            LP.UnexpectedToken,
+            LP._parse_quad_expression(state, cache, 1.0),
+        )
+    end
+    return
+end
+
+function test_parse_set_prefix()
+    cache = LP.Cache(LP.Model{Float64}())
+    for (input, set) in [
+        "1.0 <=" => MOI.GreaterThan(1.0),
+        "1.0 <" => MOI.GreaterThan(1.0),
+        "1.0 >=" => MOI.LessThan(1.0),
+        "1.0 >" => MOI.LessThan(1.0),
+        "1.0 ==" => MOI.EqualTo(1.0),
+        "1.0 =" => MOI.EqualTo(1.0),
+        # Theirs not to reason why, theirs but to do and
+        "1.0 =<" => MOI.GreaterThan(1.0),
+        "1.0 =>" => MOI.LessThan(1.0),
+    ]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        @test LP._parse_set_prefix(state, cache) == set
+    end
+    for input in ["-> 1"]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        @test_throws LP.UnexpectedToken LP._parse_set_prefix(state, cache)
+    end
+    return
+end
+
+function test_parse_set_sufffix()
+    cache = LP.Cache(LP.Model{Float64}())
+    for (input, set) in [
+        "free" => nothing,
+        "Free" => nothing,
+        ">= 1.0" => MOI.GreaterThan(1.0),
+        "> 1.0" => MOI.GreaterThan(1.0),
+        "<= 1.0" => MOI.LessThan(1.0),
+        "< 1.0" => MOI.LessThan(1.0),
+        "== 1.0" => MOI.EqualTo(1.0),
+        "= 1.0" => MOI.EqualTo(1.0),
+        # Theirs not to reason why, theirs but to do and
+        "=< 1.0" => MOI.LessThan(1.0),
+        "=> 1.0" => MOI.GreaterThan(1.0),
+    ]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        @test LP._parse_set_suffix(state, cache) == set
+    end
+    for input in ["-> 1"]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        @test_throws LP.UnexpectedToken LP._parse_set_suffix(state, cache)
+    end
+    return
+end
+
 end  # module
 
 TestLP.runtests()

From ee1cb14c31c00c211fd8e662f10ee8f5f9e19450 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Thu, 11 Sep 2025 14:22:24 +1200
Subject: [PATCH 06/10] UPdate

---
 src/FileFormats/LP/read.jl | 29 +++++++++++++++++++++++------
 test/FileFormats/LP/LP.jl  | 29 +++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index 0a17a0e478..e2eff1e1dd 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -23,8 +23,11 @@ end
 Read `io` in the LP file format and store the result in `model`.
 
 This reader attempts to follow the CPLEX LP format, because others like the
-lpsolve version are very...flexible...in how they accept input. Read more about
-them here: http://lpsolve.sourceforge.net
+lpsolve version are very...flexible...in how they accept input.
+
+Read more about the format here:
+ * http://lpsolve.sourceforge.net
+ * https://web.mit.edu/lpsolve/doc/CPLEX-format.htm
 """
 function Base.read!(io::IO, model::Model{T}) where {T}
     if !MOI.is_empty(model)
@@ -98,6 +101,7 @@ const _KEYWORDS = Dict(
     "such that" => :CONSTRAINTS,
     "st" => :CONSTRAINTS,
     "s.t." => :CONSTRAINTS,
+    "st." => :CONSTRAINTS,
     # BOUNDS
     "bounds" => :BOUNDS,
     "bound" => :BOUNDS,
@@ -244,7 +248,16 @@ function Base.read(state::LexerState, ::Type{Token}, kind::_TokenKind)
     return _expect(token, kind)
 end
 
-_is_idenfifier(c::Char) = !(isspace(c) || c in ('+', '-', '*', '^', ':'))
+# We're a bit more relaxed than typical, allowing any letter or digit, not just
+# ASCII.
+function _is_identifier(c::Char)
+    return isletter(c) || isdigit(c) || c in "!\"#\$%&()/,.;?@_`'{}|~"
+end
+
+function _is_starting_identifier(c::Char)
+    return isletter(c) || c in "!\"#\$%&(),;?@_`'{}|~"
+end
+
 _is_number(c::Char) = isdigit(c) || c in ('.', 'e', 'E', '+', '-')
 
 function Base.peek(state::LexerState, ::Type{Token}, n::Int = 1)
@@ -276,9 +289,9 @@ function _peek_inner(state::LexerState)
                 read(state, Char)
             end
             return Token(_TOKEN_NUMBER, String(take!(buf)))
-        elseif isletter(c) || c == '_'  # Identifier / keyword
+        elseif _is_starting_identifier(c)  # Identifier / keyword
             buf = IOBuffer()
-            while (c = peek(state, Char)) !== nothing && _is_idenfifier(c)
+            while (c = peek(state, Char)) !== nothing && _is_identifier(c)
                 write(buf, c)
                 read(state, Char)
             end
@@ -382,7 +395,11 @@ function _parse_number(state::LexerState, cache::Cache{T})::T where {T}
         end
     end
     _expect(token, _TOKEN_NUMBER)
-    return parse(T, token.value)
+    ret = tryparse(T, token.value)
+    if ret === nothing
+        throw(UnexpectedToken(token))
+    end
+    return ret
 end
 
 # QUAD_TERM :=
diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl
index 7d4ce7707b..0f8c1a7ce8 100644
--- a/test/FileFormats/LP/LP.jl
+++ b/test/FileFormats/LP/LP.jl
@@ -1210,6 +1210,7 @@ function test_subject_to_name()
         "Such That" => false,
         "st" => false,
         "s.t." => false,
+        "st." => false,
         "subject that" => true,
         "subject\nto" => true,
         "s. t." => true,
@@ -1232,6 +1233,33 @@ function test_subject_to_name()
     return
 end
 
+function test_parse_variable()
+    cache = LP.Cache(LP.Model{Float64}())
+    for input in [
+        "x",
+        "X",
+        "e",
+        "abc!\"D",
+        "π",
+        "𝔼1π!~a",
+        "x!\"#\$%&()/,.;?@_`'{}|~",
+        "aAc2",
+    ]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        x = LP._parse_variable(state, cache)
+        @test cache.variable_name_to_index[input] == x
+    end
+    for input in ["2", "2x", ".x"]
+        io = IOBuffer(input)
+        seekstart(io)
+        state = LP.LexerState(io)
+        @test_throws LP.UnexpectedToken LP._parse_variable(state, cache)
+    end
+    return
+end
+
 function test_parse_number()
     cache = LP.Cache(LP.Model{Float64}())
     for (input, result) in [
@@ -1338,6 +1366,7 @@ function test_parse_term()
         "- x" => -1.0,
         "- -x" => 1.0,
         "+ -x" => -1.0,
+        "2x" => 2.0,
         "2.0 x" => 2.0,
         "3.0 x" => 3.0,
         "2.0 * x" => 2.0,

From b5702dff0a7b3303c4fb6c767330a6994bb2ff3f Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Thu, 11 Sep 2025 14:23:37 +1200
Subject: [PATCH 07/10] Update

---
 test/FileFormats/LP/LP.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl
index 0f8c1a7ce8..8a31273449 100644
--- a/test/FileFormats/LP/LP.jl
+++ b/test/FileFormats/LP/LP.jl
@@ -1423,7 +1423,7 @@ function test_parse_set_prefix()
         state = LP.LexerState(io)
         @test LP._parse_set_prefix(state, cache) == set
     end
-    for input in ["-> 1"]
+    for input in ["1 ->"]
         io = IOBuffer(input)
         seekstart(io)
         state = LP.LexerState(io)

From 2bf659258e98b3b9b53fe995b2b23cec3747d96c Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Thu, 11 Sep 2025 15:07:20 +1200
Subject: [PATCH 08/10] Update

---
 src/FileFormats/LP/read.jl | 20 ++++++++++----------
 test/FileFormats/LP/LP.jl  |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index e2eff1e1dd..2c1c809ad0 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -275,10 +275,10 @@ end
 function _peek_inner(state::LexerState)
     while (c = peek(state, Char)) !== nothing
         if c == '\n'
-            read(state, Char)
-            return Token(_TOKEN_NEWLINE, "\n")
+            _ = read(state, Char)
+            return Token(_TOKEN_NEWLINE, nothing)
         elseif isspace(c)  # Whitespace
-            read(state, Char)
+            _ = read(state, Char)
         elseif c == '\\'  # Comment: backslash until newline
             while (c = read(state, Char)) !== nothing && c != '\n'
             end
@@ -286,14 +286,14 @@ function _peek_inner(state::LexerState)
             buf = IOBuffer()
             while (c = peek(state, Char)) !== nothing && _is_number(c)
                 write(buf, c)
-                read(state, Char)
+                _ = read(state, Char)
             end
             return Token(_TOKEN_NUMBER, String(take!(buf)))
         elseif _is_starting_identifier(c)  # Identifier / keyword
             buf = IOBuffer()
             while (c = peek(state, Char)) !== nothing && _is_identifier(c)
                 write(buf, c)
-                read(state, Char)
+                _ = read(state, Char)
             end
             val = String(take!(buf))
             l_val = lowercase(val)
@@ -315,17 +315,17 @@ function _peek_inner(state::LexerState)
             end
             return Token(_TOKEN_IDENTIFIER, val)
         elseif (op = get(_OPERATORS, c, nothing)) !== nothing
-            read(state, Char) # Skip c
+            _ = read(state, Char) # Skip c
             if c == '-' && peek(state, Char) == '>'
-                read(state, Char)
-                return Token(_TOKEN_IMPLIES, "->")
+                _ = read(state, Char)
+                return Token(_TOKEN_IMPLIES, nothing)
             elseif c == '=' && peek(state, Char) in ('<', '>')
                 c = read(state, Char) # Allow =< and => as <= and >=
-                return Token(_OPERATORS[c], string(c))
+                return Token(_OPERATORS[c], nothing)
             elseif c in ('<', '>', '=') && peek(state, Char) == '='
                 _ = read(state, Char)  # Allow <=, >=, and ==
             end
-            return Token(op, string(c))
+            return Token(op, nothing)
         else
             throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "$c")))
         end
diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl
index 8a31273449..50a33a5cf8 100644
--- a/test/FileFormats/LP/LP.jl
+++ b/test/FileFormats/LP/LP.jl
@@ -1289,7 +1289,7 @@ function test_parse_number()
         state = LP.LexerState(io)
         @test LP._parse_number(state, cache) == result
     end
-    for input in ["x", "abc", "ten"]
+    for input in ["x", "abc", "ten", "1.1.1", "1eE1"]
         io = IOBuffer(input)
         seekstart(io)
         state = LP.LexerState(io)

From 844f3d4273c4dcbe88f92c16be847b97df80b7cc Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Thu, 11 Sep 2025 16:43:48 +1200
Subject: [PATCH 09/10] Add a better error handler

---
 src/FileFormats/LP/read.jl | 212 ++++++++++++++++++++++++++++---------
 test/FileFormats/LP/LP.jl  |  11 +-
 2 files changed, 173 insertions(+), 50 deletions(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index 2c1c809ad0..8a905d29cd 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -64,10 +64,23 @@ function Base.read!(io::IO, model::Model{T}) where {T}
             _parse_bound(state, cache)
         elseif keyword == :SOS
             _parse_constraint(state, cache)
+        elseif keyword == :END
+            _throw_unexpected_token(
+                state,
+                token,
+                "No file contents are allowed after `end`.",
+            )
         else
-            throw(UnexpectedToken(token))
+            _throw_unexpected_token(
+                state,
+                token,
+                "Parsing this section is not supported by the current reader.",
+            )
         end
     end
+    # if keyword != :END
+    #     TODO(odow): decide if we should throw an error here.
+    # end
     for x in cache.variable_with_default_bound
         MOI.add_constraint(model, x, MOI.GreaterThan(0.0))
     end
@@ -141,6 +154,27 @@ const _KEYWORDS = Dict(
     _TOKEN_NEWLINE,
     _TOKEN_UNKNOWN,
 )
+
+const _KIND_TO_MSG = Dict{_TokenKind,String}(
+    _TOKEN_KEYWORD => "a keyword",
+    _TOKEN_IDENTIFIER => "a variable name",
+    _TOKEN_NUMBER => "a number",
+    _TOKEN_ADDITION => "the symbol `+`",
+    _TOKEN_SUBTRACTION => "the symbol `-`",
+    _TOKEN_MULTIPLICATION => "the symbol `*`",
+    _TOKEN_DIVISION => "the symbol `/`",
+    _TOKEN_EXPONENT => "the symbol `^`",
+    _TOKEN_OPEN_BRACKET => "the symbol `[`",
+    _TOKEN_CLOSE_BRACKET => "the symbol `]`",
+    _TOKEN_GREATER_THAN => "the symbol `>=`",
+    _TOKEN_LESS_THAN => "the symbol `<=`",
+    _TOKEN_EQUAL_TO => "the symbol `==`",
+    _TOKEN_COLON => "the symbol `:`",
+    _TOKEN_IMPLIES => "the symbol `->`",
+    _TOKEN_NEWLINE => "a new line",
+    _TOKEN_UNKNOWN => "some unknown symbol",
+)
+
 """
     const _OPERATORS::Dict{Char,_TokenKind}
 
@@ -175,28 +209,7 @@ unprocessed value.
 struct Token
     kind::_TokenKind
     value::Union{Nothing,String}
-end
-
-"""
-    struct UnexpectedToken <: Exception
-        token::Token
-    end
-
-This error is thrown when we encounter an unexpected token when parsing the LP
-file. No other information is available.
-
-TODO: we could improve this by storing line information or other context to help
-the user diagnose the problem.
-"""
-struct UnexpectedToken <: Exception
-    token::Token
-end
-
-function _expect(token::Token, kind::_TokenKind)
-    if token.kind != kind
-        throw(UnexpectedToken(token))
-    end
-    return token
+    pos::Int
 end
 
 """
@@ -216,9 +229,53 @@ It stores:
 """
 mutable struct LexerState
     io::IO
+    line::Int
     peek_char::Union{Nothing,Char}
     peek_tokens::Vector{Token}
-    LexerState(io::IO) = new(io, nothing, Token[])
+    LexerState(io::IO) = new(io, 1, nothing, Token[])
+end
+
+"""
+    struct UnexpectedToken <: Exception
+        token::Token
+    end
+
+This error is thrown when we encounter an unexpected token when parsing the LP
+file. No other information is available.
+"""
+struct UnexpectedToken <: Exception
+    token::Token
+    line::Int
+    msg::String
+end
+
+function _throw_unexpected_token(state::LexerState, token::Token, msg::String)
+    offset = min(40, token.pos)
+    seek(state.io, token.pos - offset)
+    line = String(read(state.io, 2 * offset))
+    i = something(findprev('\n', line, offset-1), 0)
+    j = something(findnext('\n', line, offset), length(line) + 1)
+    help = string(line[i+1:j-1], "\n", " "^(offset - i + - 1), "^\n", msg)
+    return throw(UnexpectedToken(token, state.line, help))
+end
+
+function Base.showerror(io::IO, err::UnexpectedToken)
+    return print(
+        io,
+        "Error parsing LP file. Got an unexpected token on line $(err.line):\n",
+        err.msg,
+    )
+end
+
+function _expect(state::LexerState, token::Token, kind::_TokenKind)
+    if token.kind != kind
+        _throw_unexpected_token(
+            state,
+            token,
+            string("We expected this token to be ", _KIND_TO_MSG[kind]),
+        )
+    end
+    return token
 end
 
 function Base.peek(state::LexerState, ::Type{Char})
@@ -236,8 +293,12 @@ end
 
 function Base.read(state::LexerState, ::Type{Token})
     token = peek(state, Token, 1)
-    if isempty(state.peek_tokens)
-        throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "EOF")))
+    if isempty(state.peek_tokens)        
+        _throw_unexpected_token(
+            state,
+            Token(_TOKEN_UNKNOWN, "EOF", position(state.io)),
+            "Unexpected end to the file. We weren't finished yet.",
+        )
     end
     popfirst!(state.peek_tokens)
     return token
@@ -245,7 +306,7 @@ end
 
 function Base.read(state::LexerState, ::Type{Token}, kind::_TokenKind)
     token = read(state, Token)
-    return _expect(token, kind)
+    return _expect(state, token, kind)
 end
 
 # We're a bit more relaxed than typical, allowing any letter or digit, not just
@@ -274,9 +335,11 @@ end
 
 function _peek_inner(state::LexerState)
     while (c = peek(state, Char)) !== nothing
+        pos = position(state.io)
         if c == '\n'
+            state.line += 1
             _ = read(state, Char)
-            return Token(_TOKEN_NEWLINE, nothing)
+            return Token(_TOKEN_NEWLINE, nothing, pos)
         elseif isspace(c)  # Whitespace
             _ = read(state, Char)
         elseif c == '\\'  # Comment: backslash until newline
@@ -288,7 +351,7 @@ function _peek_inner(state::LexerState)
                 write(buf, c)
                 _ = read(state, Char)
             end
-            return Token(_TOKEN_NUMBER, String(take!(buf)))
+            return Token(_TOKEN_NUMBER, String(take!(buf)), pos)
         elseif _is_starting_identifier(c)  # Identifier / keyword
             buf = IOBuffer()
             while (c = peek(state, Char)) !== nothing && _is_identifier(c)
@@ -301,33 +364,37 @@ function _peek_inner(state::LexerState)
                 t = peek(state, Token)
                 if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "to"
                     _ = read(state, Token)  # Skip "to"
-                    return Token(_TOKEN_KEYWORD, "CONSTRAINTS")
+                    return Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos)
                 end
             elseif l_val == "such"
                 t = peek(state, Token)
                 if t.kind == _TOKEN_IDENTIFIER && lowercase(t.value) == "that"
                     _ = read(state, Token)  # Skip "such"
-                    return Token(_TOKEN_KEYWORD, "CONSTRAINTS")
+                    return Token(_TOKEN_KEYWORD, "CONSTRAINTS", pos)
                 end
             end
             if (kw = get(_KEYWORDS, l_val, nothing)) !== nothing
-                return Token(_TOKEN_KEYWORD, string(kw))
+                return Token(_TOKEN_KEYWORD, string(kw), pos)
             end
-            return Token(_TOKEN_IDENTIFIER, val)
+            return Token(_TOKEN_IDENTIFIER, val, pos)
         elseif (op = get(_OPERATORS, c, nothing)) !== nothing
             _ = read(state, Char) # Skip c
             if c == '-' && peek(state, Char) == '>'
                 _ = read(state, Char)
-                return Token(_TOKEN_IMPLIES, nothing)
+                return Token(_TOKEN_IMPLIES, nothing, pos)
             elseif c == '=' && peek(state, Char) in ('<', '>')
                 c = read(state, Char) # Allow =< and => as <= and >=
-                return Token(_OPERATORS[c], nothing)
+                return Token(_OPERATORS[c], nothing, pos)
             elseif c in ('<', '>', '=') && peek(state, Char) == '='
                 _ = read(state, Char)  # Allow <=, >=, and ==
             end
-            return Token(op, nothing)
+            return Token(op, nothing, pos)
         else
-            throw(UnexpectedToken(Token(_TOKEN_UNKNOWN, "$c")))
+            _throw_unexpected_token(
+                state,
+                Token(_TOKEN_UNKNOWN, "$c", pos),
+                "This character is not supported an LP file.",
+            )
         end
     end
     return
@@ -391,13 +458,21 @@ function _parse_number(state::LexerState, cache::Cache{T})::T where {T}
         if v == "inf" || v == "infinity"
             return typemax(T)
         else
-            throw(UnexpectedToken(token))
+            _throw_unexpected_token(
+                state,
+                token,
+                "We expected this to be a number.",
+            )
         end
     end
-    _expect(token, _TOKEN_NUMBER)
+    _expect(state, token, _TOKEN_NUMBER)
     ret = tryparse(T, token.value)
     if ret === nothing
-        throw(UnexpectedToken(token))
+        _throw_unexpected_token(
+            state,
+            token,
+            "We expected this to be a number.",
+        )
     end
     return ret
 end
@@ -435,7 +510,7 @@ function _parse_quad_term(
         _skip_newlines(state)
         n = read(state, Token, _TOKEN_NUMBER)
         if n.value != "2"
-            throw(UnexpectedToken(n))
+            _throw_unexpected_token(state, n, "Only `^ 2` is supported.")
         end
         return MOI.ScalarQuadraticTerm(T(2) * coef, x1, x1)
     end
@@ -471,7 +546,11 @@ function _parse_quad_expression(
             _ = read(state, Token)
             break
         else
-            return throw(UnexpectedToken(p))
+            _throw_unexpected_token(
+                state,
+                p,
+                "We expected this to be a ] to end the quadratic expresssion.",
+            )
         end
     end
     _skip_newlines(state)
@@ -480,7 +559,11 @@ function _parse_quad_expression(
         # Must be /2
         n = read(state, Token, _TOKEN_NUMBER)
         if n.value != "2"
-            throw(UnexpectedToken(n))
+            _throw_unexpected_token(
+                state,
+                n,
+                "The only supported value here is `] / 2`.",
+            )
         end
         for (i, term) in enumerate(f.quadratic_terms)
             f.quadratic_terms[i] = MOI.ScalarQuadraticTerm(
@@ -530,7 +613,9 @@ function _parse_term(
             _ = read(state, Token, _TOKEN_MULTIPLICATION)
             x = _parse_variable(state, cache)
             return MOI.ScalarAffineTerm(coef, x)
-        else
+        elseif _next_token_is(state, _TOKEN_NEWLINE) ||
+               _next_token_is(state, _TOKEN_ADDITION) ||
+               _next_token_is(state, _TOKEN_SUBTRACTION)
             # NUMBER
             return coef
         end
@@ -538,7 +623,12 @@ function _parse_term(
         # QUADRATIC_EXPRESSION
         return _parse_quad_expression(state, cache, prefix)
     end
-    return throw(UnexpectedToken(peek(state, Token)))
+    token = peek(state, Token)
+    return _throw_unexpected_token(
+        state,
+        token,
+        "Got $(_KIND_TO_MSG[token.kind]), But we expected this to be a new term in the expression.",
+    )
 end
 
 function _add_to_expression!(f::MOI.ScalarQuadraticFunction{T}, x::T) where {T}
@@ -611,7 +701,11 @@ function _parse_set_suffix(state, cache)
         rhs = _parse_number(state, cache)
         return MOI.EqualTo(rhs)
     else
-        throw(UnexpectedToken(p))
+        _throw_unexpected_token(
+            state,
+            p,
+            "We expected this to be an inequality like `>=`, `<=` ,or `==`.",
+        )
     end
 end
 
@@ -633,7 +727,11 @@ function _parse_set_prefix(state, cache)
     elseif p.kind == _TOKEN_EQUAL_TO
         return MOI.EqualTo(lhs)
     else
-        throw(UnexpectedToken(p))
+        _throw_unexpected_token(
+            state,
+            p,
+            "We expected this to be an inequality like `>=`, `<=` ,or `==`.",
+        )
     end
 end
 
@@ -731,12 +829,24 @@ end
 function _parse_sos_constraint(state::LexerState, cache::Cache{T}) where {T}
     t = read(state, Token, _TOKEN_IDENTIFIER) # Si
     if !(t.value == "S1" || t.value == "S2")
-        throw(UnexpectedToken(t))
+        _throw_unexpected_token(
+            state,
+            t,
+            "This must be either `S1` for SOS-I or `S2` for SOS-II.",
+        )
     end
     _ = read(state, Token, _TOKEN_COLON)
     _ = read(state, Token, _TOKEN_COLON)
     f, w = MOI.VectorOfVariables(MOI.VariableIndex[]), T[]
     while true
+        if _next_token_is(state, _TOKEN_NEWLINE)
+            t = peek(state, Token)
+            _throw_unexpected_token(
+                state,
+                t,
+                "SOS constraints cannot be spread across lines.",
+            )
+        end
         push!(f.variables, _parse_variable(state, cache))
         _ = read(state, Token, _TOKEN_COLON)
         push!(w, _parse_number(state, cache))
@@ -773,7 +883,11 @@ function _parse_indicator_constraint(
     elseif t.value == "1"
         MOI.ACTIVATE_ON_ONE
     else
-        throw(UnexpectedToken(t))
+        _throw_unexpected_token(
+            state,
+            t,
+            "This must be either `= 0` or `= 1`.",
+        )
     end
     _ = read(state, Token, _TOKEN_IMPLIES)
     f = _parse_expression(state, cache)
diff --git a/test/FileFormats/LP/LP.jl b/test/FileFormats/LP/LP.jl
index 50a33a5cf8..85f6523449 100644
--- a/test/FileFormats/LP/LP.jl
+++ b/test/FileFormats/LP/LP.jl
@@ -1091,7 +1091,16 @@ function test_invalid_token_in_sos()
         """,
     )
     seekstart(io)
-    @test_throws LP.UnexpectedToken read!(io, model)
+    contents = try
+        read!(io, model)
+    catch err
+        sprint(showerror, err)
+    end
+    @test contents == """
+    Error parsing LP file. Got an unexpected token on line 5:
+    c11: S1:: x 1.0 y 2.0
+                ^
+    We expected this token to be the symbol `:`"""
     return
 end
 

From b07898c33e14878a21323685e1c7d2e681a06ac0 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <o.dowson@gmail.com>
Date: Thu, 11 Sep 2025 16:46:22 +1200
Subject: [PATCH 10/10] Update

---
 src/FileFormats/LP/read.jl | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/FileFormats/LP/read.jl b/src/FileFormats/LP/read.jl
index 8a905d29cd..ceeed12e4a 100644
--- a/src/FileFormats/LP/read.jl
+++ b/src/FileFormats/LP/read.jl
@@ -255,7 +255,7 @@ function _throw_unexpected_token(state::LexerState, token::Token, msg::String)
     line = String(read(state.io, 2 * offset))
     i = something(findprev('\n', line, offset-1), 0)
     j = something(findnext('\n', line, offset), length(line) + 1)
-    help = string(line[i+1:j-1], "\n", " "^(offset - i + - 1), "^\n", msg)
+    help = string(line[(i+1):(j-1)], "\n", " "^(offset - i + - 1), "^\n", msg)
     return throw(UnexpectedToken(token, state.line, help))
 end
 
@@ -293,7 +293,7 @@ end
 
 function Base.read(state::LexerState, ::Type{Token})
     token = peek(state, Token, 1)
-    if isempty(state.peek_tokens)        
+    if isempty(state.peek_tokens)
         _throw_unexpected_token(
             state,
             Token(_TOKEN_UNKNOWN, "EOF", position(state.io)),
@@ -627,7 +627,7 @@ function _parse_term(
     return _throw_unexpected_token(
         state,
         token,
-        "Got $(_KIND_TO_MSG[token.kind]), But we expected this to be a new term in the expression.",
+        "Got $(_KIND_TO_MSG[token.kind]), but we expected this to be a new term in the expression.",
     )
 end
 
@@ -883,11 +883,7 @@ function _parse_indicator_constraint(
     elseif t.value == "1"
         MOI.ACTIVATE_ON_ONE
     else
-        _throw_unexpected_token(
-            state,
-            t,
-            "This must be either `= 0` or `= 1`.",
-        )
+        _throw_unexpected_token(state, t, "This must be either `= 0` or `= 1`.")
     end
     _ = read(state, Token, _TOKEN_IMPLIES)
     f = _parse_expression(state, cache)