Skip to content

Commit

Permalink
pl.data.read: in csv mode, empty fields considered zero. data.write: …
Browse files Browse the repository at this point in the history
…fix for default delimiter. Updated documentation using ldoc subparams
  • Loading branch information
Steve Donovan committed Sep 9, 2015
1 parent 0306941 commit e15d8ba
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 11 deletions.
32 changes: 21 additions & 11 deletions lua/pl/data.lua
Expand Up @@ -201,34 +201,42 @@ end

--- read a delimited file in a Lua table.
-- By default, attempts to treat first line as separated list of fieldnames.
-- @param file a filename or a file-like object (default stdin)
-- @tab cnfg options table: can override `delim` (a string pattern), `fieldnames` (a list),
-- specify `no_convert` (default is to conversion), `numfields` (indices of columns known
-- to be numbers) and `thousands_dot` (thousands separator in Excel CSV is '.').
-- If `csv` is set then fields may be double-quoted and contain commas;
-- @param file a filename or a file-like object
-- @tab cnfg parsing options
-- @string cnfg.delim a string pattern to split fields
-- @array cnfg.fieldnames (i.e. don't read from first line)
-- @bool cnfg.no_convert (default is to try conversion on first data line)
-- @tab cnfg.convert table of custom conversion functions with column keys
-- @int cnfg.numfields indices of columns known to be numbers
-- @bool cnfg.last_field_collect only split as many fields as fieldnames.
-- @int cnfg.thousands_dot thousands separator in Excel CSV is '.'
-- @bool cnfg.csv fields may be double-quoted and contain commas;
-- Also, empty fields are considered to be equivalent to zero.
-- @return `data` object, or `nil`
-- @return error message. May be a file error, 'not a file-like object'
-- or a conversion error
function data.read(file,cnfg)
local err,opened,count,line,csv
local err,opened,count,line
local D = {}
if not cnfg then cnfg = {} end
local f,err,opened = open_file(file,'r')
if not f then return nil, err end
local thousands_dot = cnfg.thousands_dot
local csv = cnfg.csv
if csv then cnfg.delim = ',' end

-- note that using dot as the thousands separator (@thousands_dot)
-- requires a special conversion function!
-- requires a special conversion function! For CSV, _empty fields_ are
-- considered to default to numerial zeroes.
local tonumber = tonumber
local function try_number(x)
if thousands_dot then x = x:gsub('%.(...)','%1') end
if csv and x == '' then x = '0' end
local v = tonumber(x)
if v == nil then return nil,"not a number" end
return v
end

csv = cnfg.csv
if csv then cnfg.delim = ',' end
count = 1
line = f:read()
if not line then return nil, "empty file" end
Expand Down Expand Up @@ -297,7 +305,7 @@ function data.read(file,cnfg)
if not cnfg.no_convert then
local fields = split(line,D.delim,csv,nfields)
for i = 1,#fields do
if not find(numfields,i) and tonumber(fields[i]) then
if not find(numfields,i) and try_number(fields[i]) then
append_conversion(i,try_number)
end
end
Expand Down Expand Up @@ -346,20 +354,22 @@ end
-- @param file filename or file-like object
-- @tparam[opt] {string} fieldnames list of fields (optional)
-- @string[opt='\t'] delim delimiter (default tab)
-- @return true or nil, error
function data.write (data,file,fieldnames,delim)
local f,err,opened = open_file(file,'w')
if not f then return nil, err end
if not fieldnames then
fieldnames = data.fieldnames
end
delim = delim or '\t'
if fieldnames and #fieldnames > 0 then
f:write(concat(fieldnames,delim),'\n')
end
delim = delim or '\t'
for i = 1,#data do
write_row(data,f,data[i],delim)
end
if opened then f:close() end
return true
end


Expand Down
28 changes: 28 additions & 0 deletions tests/test-data.lua
Expand Up @@ -218,3 +218,31 @@ d = data.read(f,{convert={[1]=date_convert},last_field_collect=true})
asserteq(#d[1],2)
asserteq(d[2][1]:year(),2010)
d = {{1,2,3},{10,20,30}}
out = stringio.create()
data.write(d,out,{'A','B','C'},',')
asserteq(out:value(),
[[
A,B,C
1,2,3
10,20,30
]])
out = stringio.create()
d.fieldnames = {'A','B','C'}
data.write(d,out)
asserteq(out:value(),
[[
A B C
1 2 3
10 20 30
]])
d = data.read(stringio.open 'One,Two\n1,\n,20\n',{csv=true})
asserteq(d,{
{1,0},{0,20},
original_fieldnames={"One","Two"},fieldnames={"One","Two"},delim=","
})

0 comments on commit e15d8ba

Please sign in to comment.