diff --git a/lua/pl/data.lua b/lua/pl/data.lua index 24cd33ed..2bc443f7 100644 --- a/lua/pl/data.lua +++ b/lua/pl/data.lua @@ -201,34 +201,42 @@ end --- read a delimited file in a Lua table. -- By default, attempts to treat first line as separated list of fieldnames. --- @param file a filename or a file-like object (default stdin) --- @tab cnfg options table: can override `delim` (a string pattern), `fieldnames` (a list), --- specify `no_convert` (default is to conversion), `numfields` (indices of columns known --- to be numbers) and `thousands_dot` (thousands separator in Excel CSV is '.'). --- If `csv` is set then fields may be double-quoted and contain commas; +-- @param file a filename or a file-like object +-- @tab cnfg parsing options +-- @string cnfg.delim a string pattern to split fields +-- @array cnfg.fieldnames (i.e. don't read from first line) +-- @bool cnfg.no_convert (default is to try conversion on first data line) +-- @tab cnfg.convert table of custom conversion functions with column keys +-- @int cnfg.numfields indices of columns known to be numbers +-- @bool cnfg.last_field_collect only split as many fields as fieldnames. +-- @int cnfg.thousands_dot thousands separator in Excel CSV is '.' +-- @bool cnfg.csv fields may be double-quoted and contain commas; +-- Also, empty fields are considered to be equivalent to zero. -- @return `data` object, or `nil` -- @return error message. May be a file error, 'not a file-like object' -- or a conversion error function data.read(file,cnfg) - local err,opened,count,line,csv + local err,opened,count,line local D = {} if not cnfg then cnfg = {} end local f,err,opened = open_file(file,'r') if not f then return nil, err end local thousands_dot = cnfg.thousands_dot + local csv = cnfg.csv + if csv then cnfg.delim = ',' end -- note that using dot as the thousands separator (@thousands_dot) - -- requires a special conversion function! + -- requires a special conversion function! For CSV, _empty fields_ are + -- considered to default to numerial zeroes. local tonumber = tonumber local function try_number(x) if thousands_dot then x = x:gsub('%.(...)','%1') end + if csv and x == '' then x = '0' end local v = tonumber(x) if v == nil then return nil,"not a number" end return v end - csv = cnfg.csv - if csv then cnfg.delim = ',' end count = 1 line = f:read() if not line then return nil, "empty file" end @@ -297,7 +305,7 @@ function data.read(file,cnfg) if not cnfg.no_convert then local fields = split(line,D.delim,csv,nfields) for i = 1,#fields do - if not find(numfields,i) and tonumber(fields[i]) then + if not find(numfields,i) and try_number(fields[i]) then append_conversion(i,try_number) end end @@ -346,20 +354,22 @@ end -- @param file filename or file-like object -- @tparam[opt] {string} fieldnames list of fields (optional) -- @string[opt='\t'] delim delimiter (default tab) +-- @return true or nil, error function data.write (data,file,fieldnames,delim) local f,err,opened = open_file(file,'w') if not f then return nil, err end if not fieldnames then fieldnames = data.fieldnames end + delim = delim or '\t' if fieldnames and #fieldnames > 0 then f:write(concat(fieldnames,delim),'\n') end - delim = delim or '\t' for i = 1,#data do write_row(data,f,data[i],delim) end if opened then f:close() end + return true end diff --git a/tests/test-data.lua b/tests/test-data.lua index 3a9c2922..9db0d855 100644 --- a/tests/test-data.lua +++ b/tests/test-data.lua @@ -218,3 +218,31 @@ d = data.read(f,{convert={[1]=date_convert},last_field_collect=true}) asserteq(#d[1],2) asserteq(d[2][1]:year(),2010) + +d = {{1,2,3},{10,20,30}} +out = stringio.create() +data.write(d,out,{'A','B','C'},',') +asserteq(out:value(), +[[ +A,B,C +1,2,3 +10,20,30 +]]) + +out = stringio.create() +d.fieldnames = {'A','B','C'} +data.write(d,out) + +asserteq(out:value(), +[[ +A B C +1 2 3 +10 20 30 +]]) + + +d = data.read(stringio.open 'One,Two\n1,\n,20\n',{csv=true}) +asserteq(d,{ + {1,0},{0,20}, + original_fieldnames={"One","Two"},fieldnames={"One","Two"},delim="," +})