-
Notifications
You must be signed in to change notification settings - Fork 500
/
base.rb
800 lines (710 loc) · 21.3 KB
/
base.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
# encoding: utf-8
require 'tmpdir'
require 'stringio'
require 'nokogiri'
Roo::ZipFile =
begin
require 'zip/zipfilesystem'
Zip::ZipFile
rescue LoadError
# For rubyzip >= 1.0.0
require 'zip/filesystem'
Zip::File
end
# Base class for all other types of spreadsheets
class Roo::Base
include Enumerable
TEMP_PREFIX = "oo_"
LETTERS = ('A'..'Z').to_a
attr_reader :default_sheet, :headers
# sets the line with attribute names (default: 1)
attr_accessor :header_line
class << self
def split_coordinate(str)
letter,number = split_coord(str)
x = letter_to_number(letter)
y = number
return y, x
end
def split_coord(s)
if s =~ /([a-zA-Z]+)([0-9]+)/
letter = $1
number = $2.to_i
else
raise ArgumentError
end
return letter, number
end
# convert a number to something like 'AB' (1 => 'A', 2 => 'B', ...)
def number_to_letter(n)
letters=""
if n > 26
while n % 26 == 0 && n != 0
letters << 'Z'
n = ((n - 26) / 26).to_i
end
while n > 0
num = n%26
letters = LETTERS[num-1] + letters
n = (n / 26).to_i
end
else
letters = LETTERS[n-1]
end
letters
end
# convert letters like 'AB' to a number ('A' => 1, 'B' => 2, ...)
def letter_to_number(letters)
result = 0
while letters && letters.length > 0
character = letters[0,1].upcase
num = LETTERS.index(character)
raise ArgumentError, "invalid column character '#{letters[0,1]}'" if num == nil
num += 1
result = result * 26 + num
letters = letters[1..-1]
end
result
end
end
def initialize(filename, options={}, file_warning=:error, tmpdir=nil)
@filename = filename
@options = options
@cell = {}
@cell_type = {}
@cells_read = {}
@first_row = {}
@last_row = {}
@first_column = {}
@last_column = {}
@header_line = 1
@default_sheet = self.sheets.first
end
# sets the working sheet in the document
# 'sheet' can be a number (1 = first sheet) or the name of a sheet.
def default_sheet=(sheet)
validate_sheet!(sheet)
@default_sheet = sheet
@first_row[sheet] = @last_row[sheet] = @first_column[sheet] = @last_column[sheet] = nil
@cells_read[sheet] = false
end
# first non-empty column as a letter
def first_column_as_letter(sheet=nil)
self.class.number_to_letter(first_column(sheet))
end
# last non-empty column as a letter
def last_column_as_letter(sheet=nil)
self.class.number_to_letter(last_column(sheet))
end
# returns the number of the first non-empty row
def first_row(sheet=nil)
sheet ||= @default_sheet
read_cells(sheet)
@first_row[sheet] ||=
begin
impossible_value = 999_999 # more than a spreadsheet can hold
result = impossible_value
@cell[sheet].each_pair {|key,value|
result = [result, key.first.to_i].min if value
} if @cell[sheet]
result unless result == impossible_value
end
end
# returns the number of the last non-empty row
def last_row(sheet=nil)
sheet ||= @default_sheet
read_cells(sheet)
@last_row[sheet] ||=
begin
impossible_value = 0
result = impossible_value
@cell[sheet].each_pair {|key,value|
result = [result, key.first.to_i].max if value
} if @cell[sheet]
result unless result == impossible_value
end
end
# returns the number of the first non-empty column
def first_column(sheet=nil)
sheet ||= @default_sheet
read_cells(sheet)
@first_column[sheet] ||=
begin
impossible_value = 999_999 # more than a spreadsheet can hold
result = impossible_value
@cell[sheet].each_pair {|key,value|
result = [result, key.last.to_i].min if value
} if @cell[sheet]
result unless result == impossible_value
end
end
# returns the number of the last non-empty column
def last_column(sheet=nil)
sheet ||= @default_sheet
read_cells(sheet)
@last_column[sheet] ||=
begin
impossible_value = 0
result = impossible_value
@cell[sheet].each_pair {|key,value|
result = [result, key.last.to_i].max if value
} if @cell[sheet]
result unless result == impossible_value
end
end
# returns a rectangular area (default: all cells) as yaml-output
# you can add additional attributes with the prefix parameter like:
# oo.to_yaml({"file"=>"flightdata_2007-06-26", "sheet" => "1"})
def to_yaml(prefix={}, from_row=nil, from_column=nil, to_row=nil, to_column=nil,sheet=nil)
sheet ||= @default_sheet
return '' unless first_row # empty result if there is no first_row in a sheet
from_row ||= first_row(sheet)
to_row ||= last_row(sheet)
from_column ||= first_column(sheet)
to_column ||= last_column(sheet)
result = "--- \n"
from_row.upto(to_row) do |row|
from_column.upto(to_column) do |col|
unless empty?(row,col,sheet)
result << "cell_#{row}_#{col}: \n"
prefix.each {|k,v|
result << " #{k}: #{v} \n"
}
result << " row: #{row} \n"
result << " col: #{col} \n"
result << " celltype: #{self.celltype(row,col,sheet)} \n"
value = cell(row,col,sheet)
if celltype(row,col,sheet) == :time
value = integer_to_timestring(value)
end
result << " value: #{value} \n"
end
end
end
result
end
# write the current spreadsheet to stdout or into a file
def to_csv(filename=nil,sheet=nil,separator=',')
sheet ||= @default_sheet
if filename
File.open(filename,"w") do |file|
write_csv_content(file,sheet,separator)
end
return true
else
sio = StringIO.new
write_csv_content(sio,sheet,separator)
sio.rewind
return sio.read
end
end
# returns a matrix object from the whole sheet or a rectangular area of a sheet
def to_matrix(from_row=nil, from_column=nil, to_row=nil, to_column=nil,sheet=nil)
require 'matrix'
sheet ||= @default_sheet
return Matrix.empty unless first_row
from_row ||= first_row(sheet)
to_row ||= last_row(sheet)
from_column ||= first_column(sheet)
to_column ||= last_column(sheet)
Matrix.rows(from_row.upto(to_row).map do |row|
from_column.upto(to_column).map do |col|
cell(row,col,sheet)
end
end)
end
# call to_s method defined on subclasses
def inspect
to_s
end
# find a row either by row number or a condition
# Caution: this works only within the default sheet -> set default_sheet before you call this method
# (experimental. see examples in the test_roo.rb file)
def find(*args) # :nodoc
options = (args.last.is_a?(Hash) ? args.pop : {})
case args[0]
when Fixnum
find_by_row(args[0])
when :all
find_by_conditions(options)
else
raise ArgumentError, "unexpected arg #{args[0].inspect}, pass a row index or :all"
end
end
# returns all values in this row as an array
# row numbers are 1,2,3,... like in the spreadsheet
def row(rownumber,sheet=nil)
sheet ||= @default_sheet
read_cells(sheet)
first_column(sheet).upto(last_column(sheet)).map do |col|
cell(rownumber,col,sheet)
end
end
# returns all values in this column as an array
# column numbers are 1,2,3,... like in the spreadsheet
def column(columnnumber,sheet=nil)
if columnnumber.class == String
columnnumber = self.class.letter_to_number(columnnumber)
end
sheet ||= @default_sheet
read_cells(sheet)
first_row(sheet).upto(last_row(sheet)).map do |row|
cell(row,columnnumber,sheet)
end
end
# set a cell to a certain value
# (this will not be saved back to the spreadsheet file!)
def set(row,col,value,sheet=nil) #:nodoc:
sheet ||= @default_sheet
read_cells(sheet)
row, col = normalize(row,col)
cell_type =
case value
when Fixnum then :float
when String, Float then :string
else
raise ArgumentError, "Type for #{value} not set"
end
set_value(row,col,value,sheet)
set_type(row,col,cell_type,sheet)
end
# reopens and read a spreadsheet document
def reload
ds = @default_sheet
reinitialize
self.default_sheet = ds
end
# true if cell is empty
def empty?(row, col, sheet=nil)
sheet ||= @default_sheet
read_cells(sheet)
row,col = normalize(row,col)
contents = cell(row, col, sheet)
!contents || (celltype(row, col, sheet) == :string && contents.empty?) \
|| (row < first_row(sheet) || row > last_row(sheet) || col < first_column(sheet) || col > last_column(sheet))
end
# returns information of the spreadsheet document and all sheets within
# this document.
def info
without_changing_default_sheet do
result = "File: #{File.basename(@filename)}\n"+
"Number of sheets: #{sheets.size}\n"+
"Sheets: #{sheets.join(', ')}\n"
n = 1
sheets.each {|sheet|
self.default_sheet = sheet
result << "Sheet " + n.to_s + ":\n"
unless first_row
result << " - empty -"
else
result << " First row: #{first_row}\n"
result << " Last row: #{last_row}\n"
result << " First column: #{self.class.number_to_letter(first_column)}\n"
result << " Last column: #{self.class.number_to_letter(last_column)}"
end
result << "\n" if sheet != sheets.last
n += 1
}
result
end
end
# returns an XML representation of all sheets of a spreadsheet file
def to_xml
Nokogiri::XML::Builder.new do |xml|
xml.spreadsheet {
self.sheets.each do |sheet|
self.default_sheet = sheet
xml.sheet(:name => sheet) { |x|
if first_row and last_row and first_column and last_column
# sonst gibt es Fehler bei leeren Blaettern
first_row.upto(last_row) do |row|
first_column.upto(last_column) do |col|
unless empty?(row,col)
x.cell(cell(row,col),
:row =>row,
:column => col,
:type => celltype(row,col))
end
end
end
end
}
end
}
end.to_xml
end
# when a method like spreadsheet.a42 is called
# convert it to a call of spreadsheet.cell('a',42)
def method_missing(m, *args)
# #aa42 => #cell('aa',42)
# #aa42('Sheet1') => #cell('aa',42,'Sheet1')
if m =~ /^([a-z]+)(\d)$/
col = self.class.letter_to_number($1)
row = $2.to_i
if args.empty?
cell(row,col)
else
cell(row,col,args.first)
end
else
super
end
end
# access different worksheets by calling spreadsheet.sheet(1)
# or spreadsheet.sheet('SHEETNAME')
def sheet(index,name=false)
@default_sheet = String === index ? index : self.sheets[index]
name ? [@default_sheet,self] : self
end
# iterate through all worksheets of a document
def each_with_pagename
self.sheets.each do |s|
yield sheet(s,true)
end
end
# by passing in headers as options, this method returns
# specific columns from your header assignment
# for example:
# xls.sheet('New Prices').parse(:upc => 'UPC', :price => 'Price') would return:
# [{:upc => 123456789012, :price => 35.42},..]
# the queries are matched with regex, so regex options can be passed in
# such as :price => '^(Cost|Price)'
# case insensitive by default
# by using the :header_search option, you can query for headers
# and return a hash of every row with the keys set to the header result
# for example:
# xls.sheet('New Prices').parse(:header_search => ['UPC*SKU','^Price*\sCost\s'])
# that example searches for a column titled either UPC or SKU and another
# column titled either Price or Cost (regex characters allowed)
# * is the wildcard character
# you can also pass in a :clean => true option to strip the sheet of
# odd unicode characters and white spaces around columns
def each(options={})
if options.empty?
1.upto(last_row) do |line|
yield row(line)
end
else
if options[:clean]
options.delete(:clean)
@cleaned ||= {}
@cleaned[@default_sheet] || clean_sheet(@default_sheet)
end
if options[:header_search]
@headers = nil
@header_line = row_with(options[:header_search])
elsif [:first_row,true].include?(options[:headers])
@headers = []
row(first_row).each_with_index {|x,i| @headers << [x,i + 1]}
else
set_headers(options)
end
headers = @headers ||
Hash[(first_column..last_column).map do |col|
[cell(@header_line,col), col]
end]
@header_line.upto(last_row) do |line|
yield(Hash[headers.map {|k,v| [k,cell(line,v)]}])
end
end
end
def parse(options={})
ary = []
if block_given?
each(options) {|row| ary << yield(row)}
else
each(options) {|row| ary << row}
end
ary
end
def row_with(query,return_headers=false)
query.map! {|x| Array(x.split('*'))}
line_no = 0
each do |row|
line_no += 1
# makes sure headers is the first part of wildcard search for priority
# ex. if UPC and SKU exist for UPC*SKU search, UPC takes the cake
headers = query.map do |q|
q.map {|i| row.grep(/#{i}/i)[0]}.compact[0]
end.compact
if headers.length == query.length
@header_line = line_no
return return_headers ? headers : line_no
elsif line_no > 100
raise "Couldn't find header row."
end
end
end
protected
def load_xml(path)
::File.open(path, 'rb') do |file|
::Nokogiri::XML(file)
end
end
def file_type_check(filename, ext, name, warning_level, packed=nil)
if packed == :zip
# lalala.ods.zip => lalala.ods
# hier wird KEIN unzip gemacht, sondern nur der Name der Datei
# getestet, falls es eine gepackte Datei ist.
filename = File.basename(filename,File.extname(filename))
end
case ext
when '.ods', '.xls', '.xlsx', '.csv', '.xml'
correct_class = "use #{Roo::CLASS_FOR_EXTENSION[ext]}.new to handle #{ext} spreadsheet files. This has #{File.extname(filename).downcase}"
else
raise "unknown file type: #{ext}"
end
if uri?(filename) && qs_begin = filename.rindex('?')
filename = filename[0..qs_begin-1]
end
if File.extname(filename).downcase != ext
case warning_level
when :error
warn correct_class
raise TypeError, "#{filename} is not #{name} file"
when :warning
warn "are you sure, this is #{name} spreadsheet file?"
warn correct_class
when :ignore
# ignore
else
raise "#{warning_level} illegal state of file_warning"
end
end
end
# konvertiert einen Key in der Form "12,45" (=row,column) in
# ein Array mit numerischen Werten ([12,45])
# Diese Methode ist eine temp. Loesung, um zu erforschen, ob der
# Zugriff mit numerischen Keys schneller ist.
def key_to_num(str)
r,c = str.split(',')
[r.to_i,c.to_i]
end
# see: key_to_num
def key_to_string(arr)
"#{arr[0]},#{arr[1]}"
end
private
def find_by_row(row_index)
row_index += (header_line - 1) if @header_line
row(row_index).size.times.map do |cell_index|
cell(row_index, cell_index + 1)
end
end
def find_by_conditions(options)
rows = first_row.upto(last_row)
header_for = Hash[1.upto(last_column).map do |col|
[col, cell(@header_line,col)]
end]
# are all conditions met?
conditions = options[:conditions]
if conditions && !conditions.empty?
column_with = header_for.invert
rows = rows.select do |i|
conditions.all? { |key,val| cell(i,column_with[key]) == val }
end
end
if options[:array]
rows.map {|i| self.row(i) }
else
rows.map do |i|
Hash[1.upto(self.row(i).size).map do |j|
[header_for.fetch(j), cell(i,j)]
end]
end
end
end
def without_changing_default_sheet
original_default_sheet = default_sheet
yield
ensure
self.default_sheet = original_default_sheet
end
def reinitialize
initialize(@filename)
end
def make_tmpdir(tmp_root = nil)
Dir.mktmpdir(TEMP_PREFIX, tmp_root || ENV['ROO_TMP']) do |tmpdir|
yield tmpdir
end
end
def clean_sheet(sheet)
read_cells(sheet)
@cell[sheet].each_pair do |coord,value|
if String === value
@cell[sheet][coord] = sanitize_value(value)
end
end
@cleaned[sheet] = true
end
def sanitize_value(v)
v.unpack('U*').select {|b| b < 127}.pack('U*').strip
end
def set_headers(hash={})
# try to find header row with all values or give an error
# then create new hash by indexing strings and keeping integers for header array
@headers = row_with(hash.values,true)
@headers = Hash[hash.keys.zip(@headers.map {|x| header_index(x)})]
end
def header_index(query)
row(@header_line).index(query) + first_column
end
def set_value(row,col,value,sheet=nil)
sheet ||= @default_sheet
@cell[sheet][[row,col]] = value
end
def set_type(row,col,type,sheet=nil)
sheet ||= @default_sheet
@cell_type[sheet][[row,col]] = type
end
# converts cell coordinate to numeric values of row,col
def normalize(row,col)
if row.class == String
if col.class == Fixnum
# ('A',1):
# ('B', 5) -> (5, 2)
row, col = col, row
else
raise ArgumentError
end
end
if col.class == String
col = self.class.letter_to_number(col)
end
return row,col
end
def uri?(filename)
begin
filename.start_with?("http://", "https://")
rescue
false
end
end
def download_uri(uri, tmpdir)
require 'open-uri'
tempfilename = File.join(tmpdir, File.basename(uri))
response = ''
begin
File.open(tempfilename,"wb") do |file|
open(uri, "User-Agent" => "Ruby/#{RUBY_VERSION}") { |net|
file.write(net.read)
}
end
rescue OpenURI::HTTPError
raise "could not open #{uri}"
end
tempfilename
end
def open_from_stream(stream, tmpdir)
tempfilename = File.join(tmpdir, "spreadsheet")
File.open(tempfilename,"wb") do |file|
file.write(stream[7..-1])
end
File.join(tmpdir, "spreadsheet")
end
def unzip(filename, tmpdir)
Roo::ZipFile.open(filename) do |zip|
process_zipfile_packed(zip, tmpdir)
end
end
# check if default_sheet was set and exists in sheets-array
def validate_sheet!(sheet)
case sheet
when nil
raise ArgumentError, "Error: sheet 'nil' not valid"
when Fixnum
self.sheets.fetch(sheet-1) do
raise RangeError, "sheet index #{sheet} not found"
end
when String
if !sheets.include? sheet
raise RangeError, "sheet '#{sheet}' not found"
end
else
raise TypeError, "not a valid sheet type: #{sheet.inspect}"
end
end
def process_zipfile_packed(zip, tmpdir, path='')
if zip.file.file? path
# extract and return filename
File.open(File.join(tmpdir, path),"wb") do |file|
file.write(zip.read(path))
end
File.join(tmpdir, path)
else
ret=nil
path += '/' unless path.empty?
zip.dir.foreach(path) do |filename|
ret = process_zipfile_packed(zip, tmpdir, path + filename)
end
ret
end
end
# Write all cells to the csv file. File can be a filename or nil. If the this
# parameter is nil the output goes to STDOUT
def write_csv_content(file=nil,sheet=nil,separator=',')
file ||= STDOUT
if first_row(sheet) # sheet is not empty
1.upto(last_row(sheet)) do |row|
1.upto(last_column(sheet)) do |col|
file.print(separator) if col > 1
file.print cell_to_csv(row,col,sheet)
end
file.print("\n")
end # sheet not empty
end
end
# The content of a cell in the csv output
def cell_to_csv(row, col, sheet)
if empty?(row,col,sheet)
''
else
onecell = cell(row,col,sheet)
case celltype(row,col,sheet)
when :string
%{"#{onecell.tr('"','""')}"} unless onecell.empty?
when :boolean
%{"#{onecell.tr('"','""').downcase}"}
when :float, :percentage
if onecell == onecell.to_i
onecell.to_i.to_s
else
onecell.to_s
end
when :formula
case onecell
when String
%{"#{onecell.tr('"','""')}"} unless onecell.empty?
when Float
if onecell == onecell.to_i
onecell.to_i.to_s
else
onecell.to_s
end
when DateTime
onecell.to_s
else
raise "unhandled onecell-class #{onecell.class}"
end
when :date, :datetime
onecell.to_s
when :time
integer_to_timestring(onecell)
when :link
%{"#{onecell.url.tr('"','""')}"}
else
raise "unhandled celltype #{celltype(row,col,sheet)}"
end || ""
end
end
private
# converts an integer value to a time string like '02:05:06'
def integer_to_timestring(content)
h = (content/3600.0).floor
content = content - h*3600
m = (content/60.0).floor
content = content - m*60
s = content
sprintf("%02d:%02d:%02d",h,m,s)
end
end