diff --git a/lib/dullard/reader.rb b/lib/dullard/reader.rb index 7e6f36b..28a34c4 100755 --- a/lib/dullard/reader.rb +++ b/lib/dullard/reader.rb @@ -6,6 +6,10 @@ class Error < StandardError; end OOXMLEpoch = DateTime.new(1899,12,30) SharedStringPath = 'xl/sharedStrings.xml' StylesPath = 'xl/styles.xml' + + class Time < Struct.new('Time', :hours, :minutes, :seconds) + end + end class Dullard::Workbook @@ -26,8 +30,8 @@ class Dullard::Workbook 'd-mmm-yy' => :date, 'd-mmm' => :date, 'mmm-yy' => :date, - 'h:mm am/pm' => :date, - 'h:mm:ss am/pm' => :date, + 'h:mm am/pm' => :time, + 'h:mm:ss am/pm' => :time, 'h:mm' => :time, 'h:mm:ss' => :time, 'm/d/yy h:mm' => :date, @@ -160,27 +164,27 @@ def read_styles # Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb) # convert internal excelx attribute to a format - def attribute2format(s) - id = @cell_xfs[s.to_i].to_i - result = @num_formats[id] + def attribute_to_type(t, s) + if t == 's' + :shared + elsif t == 'b' + :boolean + else + id = @cell_xfs[s.to_i].to_i + result = @num_formats[id] - if result == nil - if STANDARD_FORMATS.has_key? id - result = STANDARD_FORMATS[id] + if result == nil + if STANDARD_FORMATS.has_key? id + result = STANDARD_FORMATS[id] + end end - end - - result.downcase - end + format = result.downcase - # Code borrowed from Roo (https://github.com/hmcgowan/roo/blob/master/lib/roo/excelx.rb) - def format2type(format) - if FORMATS.has_key? format - FORMATS[format] - elsif @user_defined_formats.has_key? format - @user_defined_formats[format] - else - :float + if @user_defined_formats.has_key? format + @user_defined_formats[format] + else + FORMATS[format] || :float + end end end @@ -229,17 +233,13 @@ def rows next when 'c' node_type = node.attributes['t'] + node_style = node.attributes['s'] cell_index = node.attributes['r'] if !cell_index raise Dullard::Error, 'Invalid spreadsheet XML.' end - - if node_type != 's' && node_type != 'b' - cell_format_index = node.attributes['s'].to_i - cell_type = @workbook.format2type(@workbook.attribute2format(cell_format_index)) - end - column = cell_index.delete('0-9') + cell_type = @workbook.attribute_to_type(node_type, node_style) shared = (node_type == 's') next end @@ -251,24 +251,34 @@ def rows end if node.value - value = (shared ? string_lookup(value.to_i) : value) - case cell_type - when :datetime + value = case cell_type + when :shared + string_lookup(node.value.to_i) + when :boolean + node.value.to_i != 0 + when :datetime, :date + Dullard::OOXMLEpoch + node.value.to_f when :time - when :date - value = (Dullard::OOXMLEpoch + node.value.to_f) + parse_time(node.value.to_f) when :float - value = node.value.to_f + node.value.to_f else # leave as string - end - cell_type = nil + node.value + end cell_map[column] = value end end end end + def parse_time(float) + hours = (float * 24).floor + minutes = (float * 24 * 60).floor % 60 + seconds = (float * 24 * 60 * 60).floor % 60 + Dullard::Time.new(hours, minutes, seconds) + end + def process_row(cell_map) max = cell_map.keys.map {|c| self.class.column_name_to_index c }.max row = [] diff --git a/spec/dullard_spec.rb b/spec/dullard_spec.rb index f1ff08c..c0ef21a 100755 --- a/spec/dullard_spec.rb +++ b/spec/dullard_spec.rb @@ -41,7 +41,7 @@ row[0].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:00:00" row[1].strftime("%Y-%m-%d %H:%M:%S").should == "2012-10-18 00:17:58" row[2].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:48" - row[3].strftime("%Y-%m-%d %H:%M:%S").should == "2012-07-01 21:18:52" + [row[3].hours, row[3].minutes, row[3].seconds].should == [13, 0, 0] end end count.should == 117 @@ -86,6 +86,18 @@ end end +describe "date_bool.xlsx" do + before(:each) do + @file = File.open(File.expand_path("../date_bool.xlsx", __FILE__)) + end + + it "should read boolean cells following dates" do + rows = Dullard::Workbook.new(@file).sheets[0].rows + rows.next.should == [DateTime.new(2015, 1, 2)] + rows.next.should == [true] + end +end + describe "error handling" do it "should raise an error when a cell is missing r attr" do @file = File.expand_path("../error_missing_r.xlsx", __FILE__) diff --git a/spec/test.xlsx b/spec/test.xlsx old mode 100755 new mode 100644 index eb9b33d..a685e53 Binary files a/spec/test.xlsx and b/spec/test.xlsx differ