Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

adding code to auto-detect :row_sep (issue #22)

  • Loading branch information...
commit 5b3cfc873fb753a6eba4f12474ea651dea45c61b 1 parent 03139de
@tilo authored
4 README.md
@@ -135,6 +135,7 @@ The options and the block are optional.
---------------------------------------------------------------------------------------------------------------------------------
| :col_sep | ',' | column separator |
| :row_sep | $/ ,"\n" | row separator or record separator , defaults to system's $/ , which defaults to "\n" |
+ | | | this can also be set to :auto , but will process the whole cvs file first |
| :quote_char | '"' | quotation character |
| :comment_regexp | /^#/ | regular expression which matches comment lines (see NOTE about the CSV header) |
| :chunk_size | nil | if set, determines the desired chunk-size (defaults to nil, no chunk processing) |
@@ -228,6 +229,9 @@ Or install it yourself as:
## Changes
+#### 1.0.17 (2014-01-13)
+ * added option to set :row_sep to :auto , for automatic detection of the row-separator (issue #22)
+
#### 1.0.16 (2014-01-13)
* :convert_values_to_numeric option can now be qualified with :except or :only (thanks to Hugo Lepetit)
* removed deprecated `process_csv` method
View
20 lib/smarter_csv/smarter_csv.rb
@@ -18,9 +18,14 @@ def SmarterCSV.process(input, options={}, &block) # first parameter: filename
old_row_sep = $/
line_count = 0
begin
- $/ = options[:row_sep]
f = input.respond_to?(:readline) ? input : File.open(input, "r:#{options[:file_encoding]}")
+ if options[:row_sep] == :auto
+ options[:row_sep] = SmarterCSV.guess_line_ending( f )
+ f.rewind
+ end
+ $/ = options[:row_sep]
+
if options[:headers_in_file] # extract the header line
# process the header line in the CSV file..
# the first line of a CSV file contains the header .. it might be commented out, so we need to read it anyhow
@@ -185,5 +190,18 @@ def self.only_or_except_limit_execution( options, option_name, key )
end
return false
end
+
+ # limitation: this currently reads the whole file in before making a decision
+ def self.guess_line_ending( filehandle )
+ counts = {"\n" => 0 , "\r" => 0, "\r\n" => 0}
+
+ filehandle.each_char do |c|
+ next if c !~ /\r|\n|\r\n/
+ counts[c] += 1 # count how many of the pre-defined line-endings we find
+ end
+ # find the key/value pair with the largest counter:
+ k,v = counts.max_by{|k,v| v}
+ return k # the most frequent one is it
+ end
end
View
2  lib/smarter_csv/version.rb
@@ -1,3 +1,3 @@
module SmarterCSV
- VERSION = "1.0.16"
+ VERSION = "1.0.17"
end
View
4 spec/fixtures/line_endings_n.csv
@@ -0,0 +1,4 @@
+name,count,price
+hammer,4,12.50
+axe,2,7.30
+crowbar,3,17.50
View
1  spec/fixtures/line_endings_r.csv
@@ -0,0 +1 @@
+name,count,price hammer,4,12.50 axe,2,7.30 crowbar,3,17.50
View
4 spec/fixtures/line_endings_rn.csv
@@ -0,0 +1,4 @@
+name,count,price
+hammer,4,12.50
+axe,2,7.30
+crowbar,3,17.50
View
43 spec/smarter_csv/line_ending_spec.rb
@@ -0,0 +1,43 @@
+require 'spec_helper'
+
+fixture_path = 'spec/fixtures'
+
+describe 'process files with line endings explicitly pre-specified' do
+ it 'reads file with \n line endings' do
+ options = {:row_sep => "\n"}
+ data = SmarterCSV.process("#{fixture_path}/line_endings_n.csv", options)
+ data.size.should == 3
+ end
+
+ it 'reads file with \r line endings' do
+ options = {:row_sep => "\r"}
+ data = SmarterCSV.process("#{fixture_path}/line_endings_r.csv", options)
+ data.size.should == 3
+ end
+
+ it 'reads file with \r\n line endings' do
+ options = {:row_sep => "\r\n"}
+ data = SmarterCSV.process("#{fixture_path}/line_endings_rn.csv", options)
+ data.size.should == 3
+ end
+end
+
+describe 'process files with line endings in automatic mode' do
+ it 'reads file with \n line endings' do
+ options = {:row_sep => :auto}
+ data = SmarterCSV.process("#{fixture_path}/line_endings_n.csv", options)
+ data.size.should == 3
+ end
+
+ it 'reads file with \r line endings' do
+ options = {:row_sep => :auto}
+ data = SmarterCSV.process("#{fixture_path}/line_endings_r.csv", options)
+ data.size.should == 3
+ end
+
+ it 'reads file with \r\n line endings' do
+ options = {:row_sep => :auto}
+ data = SmarterCSV.process("#{fixture_path}/line_endings_rn.csv", options)
+ data.size.should == 3
+ end
+end
Please sign in to comment.
Something went wrong with that request. Please try again.