From 0f22fd43938637f1de7c610714f5b34990b885c2 Mon Sep 17 00:00:00 2001 From: David Anthoff Date: Thu, 8 Feb 2018 10:37:49 -0800 Subject: [PATCH 1/5] Add support for loading and saving from streams --- README.md | 15 ++++++++++++++- src/CSVFiles.jl | 29 +++++++++++++++++++++++++++++ src/csv_writer.jl | 36 +++++++++++++++++++++++++----------- test/runtests.jl | 18 ++++++++++++++++++ 4 files changed, 86 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index cd4351b..cbd18aa 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,13 @@ plot(load("data.csv"), x=:a, y=:b, Geom.line) ```` One can load both local files and files that can be downloaded via either http or https. To download -from a remote URL, simply pass a URL to the ``load`` function instead of just a filename. +from a remote URL, simply pass a URL to the ``load`` function instead of just a filename. In addition +one can also load data from an ``IO`` object, i.e. any stream. The syntax +that scenario is + +````julia +df = DataFrame(load(Stream(format"CSV", io))) +```` The ``load`` function also takes a number of parameters: @@ -80,6 +86,13 @@ save("output.csv", it) ```` This will work as long as ``it`` is any of the types supported as sources in [IterableTables.jl](https://github.com/davidanthoff/IterableTables.jl). +One can also save into an arbitrary stream: +````julia +using FileIO, CSVFiles + +save(Stream(format"CSV", io), it) +```` + The ``save`` function takes a number of arguments: ````julia save(f::FileIO.File{FileIO.format"CSV"}, data; delim=',', quotechar='"', escapechar='\\', header=true) diff --git a/src/CSVFiles.jl b/src/CSVFiles.jl index 50d96a0..8b9d749 100644 --- a/src/CSVFiles.jl +++ b/src/CSVFiles.jl @@ -11,6 +11,12 @@ struct CSVFile keywords end +struct CSVStream + io + delim + keywords +end + function load(f::FileIO.File{FileIO.format"CSV"}, delim=','; args...) return CSVFile(f.filename, delim, args) end @@ -19,9 +25,20 @@ function load(f::FileIO.File{FileIO.format"TSV"}, delim='\t'; args...) return CSVFile(f.filename, delim, args) end +function load(s::FileIO.Stream{FileIO.format"CSV"}, delim=','; args...) + return CSVStream(s.io, delim, args) +end + +function load(s::FileIO.Stream{FileIO.format"TSV"}, delim='\t'; args...) + return CSVStream(s.io, delim, args) +end + TableTraits.isiterable(x::CSVFile) = true TableTraits.isiterabletable(x::CSVFile) = true +TableTraits.isiterable(x::CSVStream) = true +TableTraits.isiterabletable(x::CSVStream) = true + function TableTraits.getiterator(file::CSVFile) if startswith(file.filename, "https://") || startswith(file.filename, "http://") response = HTTP.get(file.filename) @@ -36,10 +53,22 @@ function TableTraits.getiterator(file::CSVFile) return it end +function TableTraits.getiterator(s::CSVStream) + res = TextParse.csvread(s.io, s.delim, s.keywords...) + + it = TableTraitsUtils.create_tableiterator([i for i in res[1]], [Symbol(i) for i in res[2]]) + + return it +end + function Base.collect(x::CSVFile) return collect(getiterator(x)) end +function Base.collect(x::CSVStream) + return collect(getiterator(x)) +end + include("csv_writer.jl") end # module diff --git a/src/csv_writer.jl b/src/csv_writer.jl index 89a4fe0..1409a83 100644 --- a/src/csv_writer.jl +++ b/src/csv_writer.jl @@ -46,7 +46,7 @@ end end end -function _save(filename, data; delim=',', quotechar='"', escapechar='\\', header=true) +function _save(io, data; delim=',', quotechar='"', escapechar='\\', header=true) isiterabletable(data) || error("Can't write this data to a CSV file.") it = getiterator(data) @@ -54,17 +54,23 @@ function _save(filename, data; delim=',', quotechar='"', escapechar='\\', header quotechar_internal = quotechar==nothing ? Nullable{Char}() : Nullable{Char}(quotechar) - open(filename, "w") do io - if header - if isnull(quotechar_internal) - join(io,[string(colname) for colname in colnames],delim) - else - join(io,["$(quotechar)" *replace(string(colname), quotechar, "$(escapechar)$(quotechar)") * "$(quotechar)" for colname in colnames],delim) - end - println(io) + if header + if isnull(quotechar_internal) + join(io,[string(colname) for colname in colnames],delim) + else + join(io,["$(quotechar)" *replace(string(colname), quotechar, "$(escapechar)$(quotechar)") * "$(quotechar)" for colname in colnames],delim) end - _writecsv(io, it, eltype(it), delim, quotechar_internal, escapechar) - end + println(io) + end + _writecsv(io, it, eltype(it), delim, quotechar_internal, escapechar) +end + +function _save(filename::AbstractString, data; delim=',', quotechar='"', escapechar='\\', header=true) + isiterabletable(data) || error("Can't write this data to a CSV file.") + + open(filename, "w") do io + _save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, header=header) + end end function save(f::FileIO.File{FileIO.format"CSV"}, data; delim=',', quotechar='"', escapechar='\\', header=true) @@ -74,3 +80,11 @@ end function save(f::FileIO.File{FileIO.format"TSV"}, data; delim='\t', quotechar='"', escapechar='\\', header=true) return _save(f.filename, data, delim=delim, quotechar=quotechar, escapechar=escapechar, header=header) end + +function save(s::FileIO.Stream{FileIO.format"CSV"}, data; delim=',', quotechar='"', escapechar='\\', header=true) + return _save(s.io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, header=header) +end + +function save(s::FileIO.Stream{FileIO.format"TSV"}, data; delim='\t', quotechar='"', escapechar='\\', header=true) + return _save(s.io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, header=header) +end diff --git a/test/runtests.jl b/test/runtests.jl index fbbe59e..0639069 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -66,5 +66,23 @@ finally rm(output_filename4) end +data = [@NT(Name="John",Age=34.,Children=2),@NT(Name="Sally",Age=54.,Children=1),@NT(Name="Jim",Age=23.,Children=0)] + +stream = IOBuffer() +mark(stream) +fileiostream = FileIO.Stream(format"CSV", stream) +save(fileiostream, data) +reset(stream) +reloaded_data = collect(load(fileiostream)) +@test reloaded_data == data + +stream = IOBuffer() +mark(stream) +fileiostream = FileIO.Stream(format"TSV", stream) +save(fileiostream, data) +reset(stream) +reloaded_data = collect(load(fileiostream)) +@test reloaded_data == data end + From dc27b327a562413a89038104be6c9ed6cb573214 Mon Sep 17 00:00:00 2001 From: David Anthoff Date: Thu, 8 Feb 2018 10:50:55 -0800 Subject: [PATCH 2/5] Update NEWS --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index ef2eedc..19a6fdb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# CSVFiles.jl v0.5.0 +* Support for FileIO Stream objects + # CSVFiles.jl v0.4.1 * Various small bug fixes From 1ed38a68931b0e1d8b7eabba3f6d6cdc025d7536 Mon Sep 17 00:00:00 2001 From: David Anthoff Date: Thu, 8 Feb 2018 10:53:59 -0800 Subject: [PATCH 3/5] Use IteratorInterfaceExtensions methods --- src/CSVFiles.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/CSVFiles.jl b/src/CSVFiles.jl index 8b9d749..2797868 100644 --- a/src/CSVFiles.jl +++ b/src/CSVFiles.jl @@ -1,6 +1,7 @@ module CSVFiles -using TextParse, TableTraits, TableTraitsUtils, DataValues +using TextParse, IteratorInterfaceExtensions, TableTraits, TableTraitsUtils, + DataValues import FileIO using HTTP import IterableTables @@ -33,10 +34,10 @@ function load(s::FileIO.Stream{FileIO.format"TSV"}, delim='\t'; args...) return CSVStream(s.io, delim, args) end -TableTraits.isiterable(x::CSVFile) = true +IteratorInterfaceExtensions.isiterable(x::CSVFile) = true TableTraits.isiterabletable(x::CSVFile) = true -TableTraits.isiterable(x::CSVStream) = true +IteratorInterfaceExtensions.isiterable(x::CSVStream) = true TableTraits.isiterabletable(x::CSVStream) = true function TableTraits.getiterator(file::CSVFile) From 7065843d7a456a3d51ebbf08c8e7ff437797fed3 Mon Sep 17 00:00:00 2001 From: David Anthoff Date: Thu, 8 Feb 2018 10:54:06 -0800 Subject: [PATCH 4/5] Update REQUIRE --- REQUIRE | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/REQUIRE b/REQUIRE index 2bae7b0..1f58c08 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,8 +1,9 @@ julia 0.6 -TextParse 0.1.6 -TableTraits 0.0.1 -TableTraitsUtils 0.0.1 +TextParse 0.4.0 +IteratorInterfaceExtensions 0.0.2 +TableTraits 0.0.3 +TableTraitsUtils 0.1.3 DataValues 0.1.0 FileIO 0.4.0 HTTP 0.6.0 -IterableTables 0.5.0 +IterableTables 0.6.1 From 0a4129860cc540f1b421eeee93561ae41576a145 Mon Sep 17 00:00:00 2001 From: David Anthoff Date: Thu, 8 Feb 2018 10:54:16 -0800 Subject: [PATCH 5/5] Increase test coverage --- test/runtests.jl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 0639069..e752460 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -73,7 +73,10 @@ mark(stream) fileiostream = FileIO.Stream(format"CSV", stream) save(fileiostream, data) reset(stream) -reloaded_data = collect(load(fileiostream)) +csvstream = load(fileiostream) +reloaded_data = collect(csvstream) +@test isiterable(csvstream) +@test isiterabletable(csvstream) @test reloaded_data == data stream = IOBuffer() @@ -81,7 +84,10 @@ mark(stream) fileiostream = FileIO.Stream(format"TSV", stream) save(fileiostream, data) reset(stream) -reloaded_data = collect(load(fileiostream)) +csvstream = load(fileiostream) +reloaded_data = collect(csvstream) +@test isiterable(csvstream) +@test isiterabletable(csvstream) @test reloaded_data == data end