Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ TextParse = "e0df1984-e451-5cb5-8b61-797a481e67e3"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"

[targets]
test = ["Test"]
test = ["Test", "DataFrames"]

[compat]
CodecZlib = "≥ 0.5.2"
Expand Down
46 changes: 46 additions & 0 deletions src/csv_writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,49 @@ end
function fileio_save(s::FileIO.Stream{FileIO.format"TSV"}, data; delim='\t', quotechar='"', escapechar='"', nastring="NA", header=true)
return _save(s.io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
end

#
# Streaming version writes header (if any) on first call, then appends on subsequent calls.
#
const CSV_or_TSV = Union{FileIO.format"CSV", FileIO.format"TSV"}

_delim(T) = T <: FileIO.format"CSV" ? ',' : '\t'

mutable struct CSVFileSaveStream{T}
io::T
first_data_written::Bool
delim::Char
quotechar::Char
escapechar::Char
nastring::AbstractString
header::Bool
end

function fileio_savestreaming(f::FileIO.File{T}, data=nothing; delim=_delim(T), quotechar='"', escapechar='"', nastring="NA",
header=true) where T <: CSV_or_TSV
io = open(f.filename, "w")

if data!==nothing
_save(io, data; delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
end

return CSVFileSaveStream(io, data!==nothing, delim, quotechar, escapechar, nastring, header)
end

function fileio_savestreaming(s::FileIO.Stream{T}, data=nothing; delim=_delim(T), quotechar='"', escapechar='"', nastring="NA",
header=false) where T <: CSV_or_TSV

if data!==nothing
_save(s.io, data; delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
end

return CSVFileSaveStream(s.io, data!==nothing, delim, quotechar, escapechar, nastring, header)
end

function Base.write(s::CSVFileSaveStream, data)
_save(s.io, data; delim=s.delim, quotechar=s.quotechar, escapechar=s.escapechar, nastring=s.nastring, header=s.first_data_written ? false : header)
end

function Base.close(s::CSVFileSaveStream)
close(s.io)
end
24 changes: 23 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ end
output_filename4 = tempname() * ".csv"

try
@show output_filename4
array |> save(output_filename4, quotechar=nothing)

finally
Expand Down Expand Up @@ -212,8 +211,31 @@ end
@test showable("text/html", x2) == true
@test showable("application/vnd.dataresource+json", x2) == true
end

end

@testset "savestreaming" begin
using DataFrames

df = DataFrame(A = 1:2:1000, B = repeat(1:10, inner=50), C = 1:500)
df1 = df[1:5, :]
df2 = df[6:10, :]

# Test both csv and tsv formats
for ext in ("csv", "tsv")
fname = "output.$ext"
s = savestreaming(fname, df1)
write(s, df2)
write(s, df2) # add this slice twice
close(s)

new_df = DataFrame(load(fname))
@test new_df[1:5,:] == df1
@test new_df[6:10,:] == df2
@test new_df[11:15,:] == df2

rm(fname)
end
end

end # Outer-most testset
Expand Down