From 564f468e3fffd24d258c46edd6d368b6acdc787d Mon Sep 17 00:00:00 2001 From: Rick Cobb Date: Fri, 18 Oct 2019 06:30:30 +0900 Subject: [PATCH] ARROW-6813: [Ruby] Arrow::Table.load with headers=true leads to exception in Arrow 0.15 After reviewing the commentary on https://github.com/apache/arrow/pull/5206 , and examining the new read options, I've concluded that supporting a boolean `:headers` option at this level is not a good idea. In particular, all the interpretations of `headers: false` that I could think of would be surprising to a typical Ruby CSV programmer (does it mean `:autogenerate_column_names`? are you required to add `:column_names` instead?). So I've just made the exception clearer. It would be great if there were a way to pull the [documentation of the options](https://github.com/apache/arrow/blob/02d1e9736808d9a9624bef5577c880d8c165e853/python/pyarrow/_csv.pyx#L42) over from the `.py` files into the Ruby doc. Closes #5609 from cobbr2/bug/ARROW-6813-ruby-headers-option-fail and squashes the following commits: c941b1091 Retry the docker-compose build now that I see it succeed for anybody 0d67b6521 Retry the docker-compose build, can't find button in Github UX 0134d5eb4 simplify headers handling 2dedfad58 and now with a headers: string test 76551389b Merge work that makes headers work more like CSV 1215bfe52 truthy headers should work like CSV c7fe77f56 Make `headers:` compatible with Ruby's CSV.new a4934b6ab Should never have committed Gemfile.lock 4af01118f Make a better error message than NoMethodError 03416528e WIP: is this the correct test... and will skip_rows work? Authored-by: Rick Cobb Signed-off-by: Sutou Kouhei --- ruby/red-arrow/lib/arrow/csv-loader.rb | 13 +++-- ruby/red-arrow/test/test-csv-loader.rb | 75 ++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/ruby/red-arrow/lib/arrow/csv-loader.rb b/ruby/red-arrow/lib/arrow/csv-loader.rb index c63ab89eb099b..5e244e2334afb 100644 --- a/ruby/red-arrow/lib/arrow/csv-loader.rb +++ b/ruby/red-arrow/lib/arrow/csv-loader.rb @@ -93,10 +93,17 @@ def reader_options @options.each do |key, value| case key when :headers - if value - options.n_header_rows = 1 + case value + when ::Array + options.column_names = value + when String + return nil else - options.n_header_rows = 0 + if value + options.generate_column_names = false + else + options.generate_column_names = true + end end when :column_types value.each do |name, type| diff --git a/ruby/red-arrow/test/test-csv-loader.rb b/ruby/red-arrow/test/test-csv-loader.rb index 96de9c8634540..42eef7f67ba57 100644 --- a/ruby/red-arrow/test/test-csv-loader.rb +++ b/ruby/red-arrow/test/test-csv-loader.rb @@ -121,6 +121,81 @@ def load_csv(data, options) Arrow::CSVLoader.load(data, options) end + sub_test_case(":headers") do + test("true") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(value: values), + load_csv(<<-CSV, headers: true)) +value +a +b +c + CSV + end + + test(":first_line") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(value: values), + load_csv(<<-CSV, headers: :first_line)) +value +a +b +c + CSV + end + + test("truthy") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(value: values), + load_csv(<<-CSV, headers: 0)) +value +a +b +c + CSV + end + + test("Array of column names") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(column: values), + load_csv(<<-CSV, headers: ["column"])) +a +b +c + CSV + end + + test("false") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(f0: values), + load_csv(<<-CSV, headers: false)) +a +b +c + CSV + end + + test("nil") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(f0: values), + load_csv(<<-CSV, headers: nil)) +a +b +c + CSV + end + + test("string") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(column: values), + load_csv(<<-CSV, headers: "column")) +a +b +c + CSV + end + end + test(":column_types") do assert_equal(Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4])), load_csv(<<-CSV, column_types: {count: :uint16}))