Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ using CSVFiles, DataFrames
df = DataFrame(load("data.csv"))
````

To read a gzipped CSV file into a ``DataFrame``:

````julia
using CSVFiles, DataFrames

df = DataFrame(load(File(format"CSV", "data.csv.gz")))
````

The call to ``load`` returns a ``struct`` that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing a CSV file into data structures that are not a ``DataFrame``:

````julia
Expand Down Expand Up @@ -87,6 +95,14 @@ save("output.csv", it)
````
This will work as long as ``it`` is any of the types supported as sources in [IterableTables.jl](https://github.com/queryverse/IterableTables.jl).

Compressed CSV files can be created by specifying the ``.gz`` file extension:

````julia
using CSVFiles

save(File(format"CSV", "output.csv.gz"), df)
````

One can also save into an arbitrary stream:
````julia
using CSVFiles
Expand Down
1 change: 1 addition & 0 deletions REQUIRE
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ FileIO 1.0.1
HTTP 0.6.14
IterableTables 0.8.3
TableShowUtils 0.1.1
CodecZlib 0.5.2
2 changes: 1 addition & 1 deletion src/CSVFiles.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module CSVFiles

using TextParse, IteratorInterfaceExtensions, TableTraits, TableTraitsUtils,
DataValues, FileIO, HTTP, TableShowUtils
DataValues, FileIO, HTTP, TableShowUtils, CodecZlib
import IterableTables

export load, save, File, @format_str
Expand Down
12 changes: 10 additions & 2 deletions src/csv_writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,16 @@ end
function _save(filename::AbstractString, data; delim=',', quotechar='"', escapechar='"', nastring="NA", header=true)
isiterabletable(data) || error("Can't write this data to a CSV file.")

open(filename, "w") do io
_save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
ext = last(split(filename, '.'))

if ext == "gz" # Gzipped
open(GzipCompressorStream, filename, "w") do io
_save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
end
else
open(filename, "w") do io
_save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
end
end
end

Expand Down
26 changes: 26 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,32 @@ end
end
end

@testset "Compression" begin
data = [(Name="John",Age=34.,Children=2),(Name="Sally",Age=54.,Children=1),(Name="Jim",Age=23.,Children=0)]

@testset "CSV" begin
output_filename = "output.csv.gz"
try
save(File(format"CSV", output_filename), data)
reloaded_data = collect(load(File(format"CSV", output_filename)))
@test reloaded_data == data
finally
rm(output_filename)
end
end

@testset "TSV" begin
output_filename = "output.tsv.gz"
try
save(File(format"TSV", output_filename), data)
reloaded_data = collect(load(File(format"TSV", output_filename)))
@test reloaded_data == data
finally
rm(output_filename)
end
end
end

@testset "show" begin
x = load(joinpath(@__DIR__, "data.csv"))

Expand Down