Skip to content

Commit

Permalink
Added some minor performance improvements to date format parsing and …
Browse files Browse the repository at this point in the history
…to column index generation as well as adding enumeration to workbook and worksheet.
  • Loading branch information
Robert Brazier committed Jan 13, 2012
1 parent 0beb025 commit a9507aa
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 10 deletions.
2 changes: 1 addition & 1 deletion lib/rubyXL/cell.rb
Expand Up @@ -30,7 +30,7 @@ def is_date?
if @workbook.num_fmts
num_fmt_id = xf_id()[:numFmtId]
num_fmt = @workbook.num_fmts[:numFmt].select { |f| f[:attributes][:numFmtId] == num_fmt_id }[0].andand[:attributes].andand[:formatCode]
if num_fmt && is_date_format?(num_fmt)
if num_fmt && workbook.date_num_fmt?(num_fmt)
return true
end
end
Expand Down
23 changes: 15 additions & 8 deletions lib/rubyXL/parser.rb
Expand Up @@ -7,22 +7,29 @@ module RubyXL

class Parser
attr_reader :data_only, :num_sheets

@@parsed_column_hash ={}
# converts cell string (such as "AA1") to matrix indices
def Parser.convert_to_index(cell_string)
index = Array.new(2)
index[0]=-1
index[1]=-1
if(cell_string =~ /^([A-Z]+)(\d+)$/)
one = $1.to_s
row = Integer($2) - 1 #-1 for 0 indexing

one = $1
row = $2.to_i - 1 #-1 for 0 indexing
col = 0
i = 0
one = one.reverse #because of 26^i calculation
one.each_byte do |c|
int_val = c - 64 #converts A to 1
col += int_val * 26**(i)
i=i+1
if @@parsed_column_hash[one].nil?
puts "||#{one}||"
two = one.reverse #because of 26^i calculation
two.each_byte do |c|
int_val = c - 64 #converts A to 1
col += int_val * 26**(i)
i=i+1
end
@@parsed_column_hash[one] = col
else
col = @@parsed_column_hash[one]
end
col -= 1 #zer0 index
index[0] = row
Expand Down
73 changes: 72 additions & 1 deletion lib/rubyXL/workbook.rb
Expand Up @@ -13,11 +13,13 @@

module RubyXL
class Workbook
include Enumerable
attr_accessor :worksheets, :filepath, :creator, :modifier, :created_at,
:modified_at, :company, :application, :appversion, :num_fmts, :fonts, :fills,
:borders, :cell_xfs, :cell_style_xfs, :cell_styles, :shared_strings, :calc_chain,
:num_strings, :size, :date1904, :external_links, :style_corrector, :drawings,
:worksheet_rels, :printer_settings, :macros, :colors, :shared_strings_XML, :defined_names
:worksheet_rels, :printer_settings, :macros, :colors, :shared_strings_XML, :defined_names, :column_lookup_hash


APPLICATION = 'Microsoft Macintosh Excel'
APPVERSION = '12.0000'
Expand Down Expand Up @@ -57,6 +59,7 @@ def initialize(worksheets=[], filepath=nil, creator=nil, modifier=nil, created_a
@colors = nil
@shared_strings_XML = nil
@defined_names = nil
@column_lookup_hash = {}

begin
@created_at = DateTime.parse(created_at).strftime('%Y-%m-%dT%TZ')
Expand All @@ -75,6 +78,10 @@ def [](worksheet)
return worksheets[worksheet]
end

def each
worksheets.each{|i| yield i}
end

#filepath of xlsx file (including file itself)
def write(filepath=@filepath)
validate_before_write
Expand Down Expand Up @@ -214,6 +221,70 @@ def num_to_date(num)
compare_date - 1 + num
end

def date_num_fmt?(num_fmt)
@num_fmt_date_hash ||= {}
if @num_fmt_date_hash[num_fmt].nil?
@num_fmt_date_hash[num_fmt] = is_date_format?(num_fmt)
end
return @num_fmt_date_hash[num_fmt]
end

def is_date_format?(num_fmt)
skip_chars = ['$', '-', '+', '/', '(', ')', ':', ' ']
num_chars = ['0', '#', '?']
non_date_formats = ['0.00E+00', '##0.0E+0', 'General', 'GENERAL', 'general', '@']
date_chars = ['y','m','d','h','s']

state = 0
s = ''
num_fmt.split(//).each do |c|
if state == 0
if c == '"'
state = 1
elsif ['\\', '_', '*'].include?(c)
state = 2
elsif skip_chars.include?(c)
next
else
s << c
end
elsif state == 1
if c == '"'
state = 0
end
elsif state == 2
state = 0
end
end
s.gsub!(/\[[^\]]*\]/, '')
if non_date_formats.include?(s)
return false
end
separator = ';'
got_sep = 0
date_count = 0
num_count = 0
s.split(//).each do |c|
if date_chars.include?(c)
date_count += 1
elsif num_chars.include?(c)
num_count += 1
elsif c == separator
got_sep = 1
end
end
if date_count > 0 && num_count == 0
return true
elsif num_count > 0 && date_count == 0
return false
elsif date_count
# ambiguous result
elsif got_sep == 0
# constant result
end
return date_count > num_count
end

#gets style object from style array given index
def get_style(style_index)
if !@cell_xfs[:xf].is_a?Array
Expand Down
7 changes: 7 additions & 0 deletions lib/rubyXL/worksheet.rb
@@ -1,5 +1,6 @@
module RubyXL
class Worksheet < PrivateClass
include Enumerable

attr_accessor :sheet_name, :sheet_data, :cols, :merged_cells, :pane,
:validations, :sheet_view, :legacy_drawing, :extLst, :workbook, :row_styles
Expand All @@ -26,6 +27,10 @@ def [](row=0)
return @sheet_data[row]
end

def each
@sheet_data.each {|i| yield i}
end

#returns 2d array of just the cell values (without style or formula information)
def extract_data
return @sheet_data.map {|row| row.map {|c| if c.is_a?(Cell) then c.value else nil end}}
Expand All @@ -51,6 +56,8 @@ def get_table(headers=[])
header_row.each_with_index do |header_cell, index|
next if header_cell.nil? || header_cell.value.nil?
header = header_cell.value.to_s
table_hash[:sorted_headers]||=[]
table_hash[:sorted_headers] << header
table_hash[header] = []

original_row = row_num + 1
Expand Down

0 comments on commit a9507aa

Please sign in to comment.