Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove EOL Options (rely on xparse correctly parsing up to the invalid delim) #28

Merged
merged 5 commits into from
Oct 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
DataFrames = "1"
DocStringExtensions = "0.8.5"
InlineStrings = "1"
Parsers = "2"
Parsers = "2.1"
PrettyTables = "1"
Tables = "1"
julia = "1"
Expand Down
54 changes: 18 additions & 36 deletions src/parsing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,6 @@ const OPTIONS = Parsers.Options(
closequotechar='\'',
delim=',',
)
# Change delimiter as way to handle end-of-line comments.
const EOL_OPTIONS = Parsers.Options(
sentinel=missing,
quoted=true,
openquotechar='\'',
closequotechar='\'',
delim='/',
)

getbytes(source::Vector{UInt8}) = source, 1, length(source)
getbytes(source::IOBuffer) = source.data, source.ptr, source.size
Expand All @@ -41,29 +33,27 @@ function parse_network(source)

nrows = count_nrow(bytes, pos, len, OPTIONS)
@debug "buses" nrows pos
buses, pos = parse_records!(Buses(nrows), bytes, pos, len, OPTIONS, EOL_OPTIONS)
buses, pos = parse_records!(Buses(nrows), bytes, pos, len, OPTIONS)

nrows = count_nrow(bytes, pos, len, OPTIONS)
@debug "loads" nrows pos
loads, pos = parse_records!(Loads(nrows), bytes, pos, len, OPTIONS, EOL_OPTIONS)
loads, pos = parse_records!(Loads(nrows), bytes, pos, len, OPTIONS)

nrows = count_nrow(bytes, pos, len, OPTIONS)
@debug "gens" nrows pos
gens, pos = parse_records!(Generators(nrows), bytes, pos, len, OPTIONS, EOL_OPTIONS)
gens, pos = parse_records!(Generators(nrows), bytes, pos, len, OPTIONS)

nrows = count_nrow(bytes, pos, len, OPTIONS)
@debug "branches" nrows pos
branches, pos = parse_records!(Branches(nrows), bytes, pos, len, OPTIONS, EOL_OPTIONS)
branches, pos = parse_records!(Branches(nrows), bytes, pos, len, OPTIONS)

# 2-winding Transformers data is 4 lines each... so this will be correct when all
# transformers as 2-winding, and become incorrect once there are multiple 3-winding.
# TODO: ditch counting of rows and use `push!`
# https://github.com/nickrobinson251/PowerFlowData.jl/issues/5
nrows = count_nrow(bytes, pos, len, OPTIONS) ÷ 4
@debug "2-winding transformers" nrows pos
transformers, pos = parse_records!(
Transformers(nrows), bytes, pos, len, OPTIONS, EOL_OPTIONS
)
transformers, pos = parse_records!(Transformers(nrows), bytes, pos, len, OPTIONS)
return Network(caseid, buses, loads, gens, branches, transformers)
end

Expand Down Expand Up @@ -92,11 +82,11 @@ function parse_caseid(bytes, pos, len, options)
return CaseID(ic, sbase), pos
end

function parse_records!(rec::R, bytes, pos, len, options, eol_options)::Tuple{R, Int} where {R <: Records}
function parse_records!(rec::R, bytes, pos, len, options)::Tuple{R, Int} where {R <: Records}
nrows = length(getfield(rec, 1))
nrows == 0 && return rec, pos
for row in 1:nrows
pos, code = parse_row!(rec, row, bytes, pos, len, options, eol_options)
pos, code = parse_row!(rec, row, bytes, pos, len, options)
end

# Data input is terminated by specifying a bus number of zero.
Expand Down Expand Up @@ -148,24 +138,18 @@ function next_line(bytes, pos, len)
return pos
end

function parse_row!(rec::Records, row::Int, bytes, pos, len, options, eol_options)
ncols = nfields(rec)
function parse_row!(rec::R, row::Int, bytes, pos, len, options) where {R <: Records}
ncols = fieldcount(R)
local code::Parsers.ReturnCode
for col in 1:ncols
eltyp = eltype(fieldtype(typeof(rec), col))
opts = ifelse(col == ncols, eol_options, options)
# TODO: come up with a way to avoid type instability/dynamic dispatch
# in this call to parse_value (this will effect performance a lot!)
val, pos, code = parse_value(eltyp, bytes, pos, len, opts)
val, pos, code = parse_value(eltyp, bytes, pos, len, options)
@inbounds getfield(rec, col)[row] = val

@debug codes(code) row col pos newline=newline(code)
end
# Because we're working around end-of-line comments,
# rows with comments won't have hit the newline character yet
if !newline(code)
pos = next_line(bytes, pos, len)
end
return pos, code
end

Expand All @@ -177,7 +161,7 @@ end
# To hold "three-winding" data we need `sum((14, 11, 16, 16, 16)) == 73` columns, and
# column 14+3=17 and column 14+11+16+2=43 are "special" in that they may or may not be at
# the end of a line.
function parse_row!(rec::Transformers, row::Int, bytes, pos, len, options, eol_options)
function parse_row!(rec::Transformers, row::Int, bytes, pos, len, options)
ncols = fieldcount(Transformers)
@assert ncols == last(EOL_COLS)

Expand All @@ -186,8 +170,7 @@ function parse_row!(rec::Transformers, row::Int, bytes, pos, len, options, eol_o
is_t2 = false
while col ≤ ncols
eltyp = nonmissingtype(eltype(fieldtype(typeof(rec), col)))
opts = ifelse(col in EOL_COLS, eol_options, options)
val, pos, code = parse_value(eltyp, bytes, pos, len, opts)
val, pos, code = parse_value(eltyp, bytes, pos, len, options)
@inbounds getfield(rec, col)[row] = val

@debug codes(code) row col pos newline=newline(code)
Expand All @@ -209,12 +192,6 @@ function parse_row!(rec::Transformers, row::Int, bytes, pos, len, options, eol_o
end
end

# Because we're working around end-of-line comments,
# rows with comments won't have hit the newline character yet
if col in EOL_COLS && !newline(code)
pos = next_line(bytes, pos, len)
end

col += 1
end
return pos, code
Expand All @@ -223,7 +200,12 @@ end
function parse_value(T, bytes, pos, len, options)
res = xparse(T, bytes, pos, len, options)

invalid(res.code) && @warn codes(res.code) pos
code = res.code
if invalid(code)
if !(newline(code) && invaliddelimiter(code)) # not due to end-of-line comments
@warn codes(res.code) pos
end
end

pos += res.tlen
code = res.code
Expand Down