diff --git a/README.md b/README.md index 8fb6680..1a44f91 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,14 @@ using CSVFiles, DataFrames df = DataFrame(load("data.csv")) ```` +To read a gzipped CSV file into a ``DataFrame``: + +````julia +using CSVFiles, DataFrames + +df = DataFrame(load(File(format"CSV", "data.csv.gz"))) +```` + The call to ``load`` returns a ``struct`` that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing a CSV file into data structures that are not a ``DataFrame``: ````julia @@ -87,6 +95,14 @@ save("output.csv", it) ```` This will work as long as ``it`` is any of the types supported as sources in [IterableTables.jl](https://github.com/queryverse/IterableTables.jl). +Compressed CSV files can be created by specifying the ``.gz`` file extension: + +````julia +using CSVFiles + +save(File(format"CSV", "output.csv.gz"), df) +```` + One can also save into an arbitrary stream: ````julia using CSVFiles diff --git a/REQUIRE b/REQUIRE index 2a22d71..5fefc2d 100644 --- a/REQUIRE +++ b/REQUIRE @@ -8,3 +8,4 @@ FileIO 1.0.1 HTTP 0.6.14 IterableTables 0.8.3 TableShowUtils 0.1.1 +CodecZlib 0.5.2 diff --git a/src/CSVFiles.jl b/src/CSVFiles.jl index 9f4995f..cc4b887 100644 --- a/src/CSVFiles.jl +++ b/src/CSVFiles.jl @@ -1,7 +1,7 @@ module CSVFiles using TextParse, IteratorInterfaceExtensions, TableTraits, TableTraitsUtils, - DataValues, FileIO, HTTP, TableShowUtils + DataValues, FileIO, HTTP, TableShowUtils, CodecZlib import IterableTables export load, save, File, @format_str diff --git a/src/csv_writer.jl b/src/csv_writer.jl index 960dbb9..d62f660 100644 --- a/src/csv_writer.jl +++ b/src/csv_writer.jl @@ -65,8 +65,16 @@ end function _save(filename::AbstractString, data; delim=',', quotechar='"', escapechar='"', nastring="NA", header=true) isiterabletable(data) || error("Can't write this data to a CSV file.") - open(filename, "w") do io - _save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) + ext = last(split(filename, '.')) + + if ext == "gz" # Gzipped + open(GzipCompressorStream, filename, "w") do io + _save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) + end + else + open(filename, "w") do io + _save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) + end end end diff --git a/test/runtests.jl b/test/runtests.jl index c40f193..8007efc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -146,6 +146,32 @@ end end end +@testset "Compression" begin + data = [(Name="John",Age=34.,Children=2),(Name="Sally",Age=54.,Children=1),(Name="Jim",Age=23.,Children=0)] + + @testset "CSV" begin + output_filename = "output.csv.gz" + try + save(File(format"CSV", output_filename), data) + reloaded_data = collect(load(File(format"CSV", output_filename))) + @test reloaded_data == data + finally + rm(output_filename) + end + end + + @testset "TSV" begin + output_filename = "output.tsv.gz" + try + save(File(format"TSV", output_filename), data) + reloaded_data = collect(load(File(format"TSV", output_filename))) + @test reloaded_data == data + finally + rm(output_filename) + end + end +end + @testset "show" begin x = load(joinpath(@__DIR__, "data.csv"))