diff --git a/Project.toml b/Project.toml index 8a6ebec..672940f 100644 --- a/Project.toml +++ b/Project.toml @@ -16,9 +16,10 @@ TextParse = "e0df1984-e451-5cb5-8b61-797a481e67e3" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" [targets] -test = ["Test"] +test = ["Test", "DataFrames"] [compat] CodecZlib = "≥ 0.5.2" diff --git a/src/csv_writer.jl b/src/csv_writer.jl index d62f660..0d2a2b3 100644 --- a/src/csv_writer.jl +++ b/src/csv_writer.jl @@ -95,3 +95,49 @@ end function fileio_save(s::FileIO.Stream{FileIO.format"TSV"}, data; delim='\t', quotechar='"', escapechar='"', nastring="NA", header=true) return _save(s.io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) end + +# +# Streaming version writes header (if any) on first call, then appends on subsequent calls. +# +const CSV_or_TSV = Union{FileIO.format"CSV", FileIO.format"TSV"} + +_delim(T) = T <: FileIO.format"CSV" ? ',' : '\t' + +mutable struct CSVFileSaveStream{T} + io::T + first_data_written::Bool + delim::Char + quotechar::Char + escapechar::Char + nastring::AbstractString + header::Bool +end + +function fileio_savestreaming(f::FileIO.File{T}, data=nothing; delim=_delim(T), quotechar='"', escapechar='"', nastring="NA", + header=true) where T <: CSV_or_TSV + io = open(f.filename, "w") + + if data!==nothing + _save(io, data; delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) + end + + return CSVFileSaveStream(io, data!==nothing, delim, quotechar, escapechar, nastring, header) +end + +function fileio_savestreaming(s::FileIO.Stream{T}, data=nothing; delim=_delim(T), quotechar='"', escapechar='"', nastring="NA", + header=false) where T <: CSV_or_TSV + + if data!==nothing + _save(s.io, data; delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) + end + + return CSVFileSaveStream(s.io, data!==nothing, delim, quotechar, escapechar, nastring, header) +end + +function Base.write(s::CSVFileSaveStream, data) + _save(s.io, data; delim=s.delim, quotechar=s.quotechar, escapechar=s.escapechar, nastring=s.nastring, header=s.first_data_written ? false : header) +end + +function Base.close(s::CSVFileSaveStream) + close(s.io) +end diff --git a/test/runtests.jl b/test/runtests.jl index 8007efc..f1a3c2c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -92,7 +92,6 @@ end output_filename4 = tempname() * ".csv" try - @show output_filename4 array |> save(output_filename4, quotechar=nothing) finally @@ -212,8 +211,31 @@ end @test showable("text/html", x2) == true @test showable("application/vnd.dataresource+json", x2) == true end + +end + +@testset "savestreaming" begin + using DataFrames + df = DataFrame(A = 1:2:1000, B = repeat(1:10, inner=50), C = 1:500) + df1 = df[1:5, :] + df2 = df[6:10, :] + + # Test both csv and tsv formats + for ext in ("csv", "tsv") + fname = "output.$ext" + s = savestreaming(fname, df1) + write(s, df2) + write(s, df2) # add this slice twice + close(s) + new_df = DataFrame(load(fname)) + @test new_df[1:5,:] == df1 + @test new_df[6:10,:] == df2 + @test new_df[11:15,:] == df2 + + rm(fname) + end end end # Outer-most testset