Skip to content

Commit

Permalink
Support comparing xls and xslx files
Browse files Browse the repository at this point in the history
  • Loading branch information
ngiger committed Jan 15, 2014
1 parent 23d1534 commit a5f8494
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 27 deletions.
59 changes: 59 additions & 0 deletions lib/compatibility.rb
@@ -0,0 +1,59 @@
#!/usr/bin/env ruby
# encoding: utf-8
require 'spreadsheet'
require 'rubyXL'

module Spreadsheet
class << self
def open io_or_path, mode="rb+"
if File.extname(io_or_path).downcase == '.xlsx'
RubyXL::Parser.parse(io_or_path)
else
if io_or_path.respond_to? :seek
Excel::Workbook.open(io_or_path)
elsif block_given?
File.open(io_or_path, mode) do |fh|
yield open(fh)
end
else
open File.open(io_or_path, mode)
end
end
end
end
def Spreadsheet.date_cell(row, idx)
if row.kind_of?(Spreadsheet::Excel::Row)
row.at(idx) && row.date(idx)
else
data = row[idx]
return Date.new(1899,12,30)+data.value.to_i if data.is_a?(RubyXL::Cell)
end
end
end

module RubyXL
class Worksheet < PrivateClass
def row(row_index)
x = @sheet_data[row_index]
def x.date(column_index)
data = self[column_index]
return Date.new(1899,12,30)+data.value.to_i if data.is_a?(RubyXL::Cell)
end unless defined?(x.date)
x
end
end
class Workbook
def worksheet(idx)
self[idx]
end
end
class Cell
def to_i
self.value.to_i
end
def to_s
self.value.to_s
end
end
end

32 changes: 11 additions & 21 deletions lib/swissmedic-diff.rb
Expand Up @@ -5,6 +5,10 @@
require 'ostruct'
require 'spreadsheet'
require 'rubyXL'
require 'pp'

# add some monkey patches for Spreadsheet and rubyXL
require File.join(File.dirname(__FILE__), 'compatibility.rb')

#= diff command (compare two xls fles) for swissmedic xls file.
#
Expand Down Expand Up @@ -96,11 +100,7 @@ def diff(target, latest, ignore = [])
@diff.changes = changes = {}
@diff.newest_rows = newest_rows
Spreadsheet.client_encoding = 'UTF-8'
if File.extname(target).eql?('.xlsx')
tbook = RubyXL::Parser.parse(File.expand_path(target))
else
tbook = Spreadsheet.open(target)
end
tbook = Spreadsheet.open(target)
sheet = tbook.worksheet(0)
if new_column = cell(sheet.row(2), COLUMNS.size)
raise "New column #{COLUMNS.size} (#{new_column})"
Expand Down Expand Up @@ -181,11 +181,7 @@ def known_data(latest)
[known_regs, known_seqs, known_pacs, newest_rows]
end
def _known_data(latest, known_regs, known_seqs, known_pacs, newest_rows)
if File.extname(latest).eql?('.xlsx')
lbook = RubyXL::Parser.parse(File.expand_path(latest)).worksheets[0]
else
lbook = Spreadsheet.open(latest)
end
lbook = Spreadsheet.open(latest)
idx, prr, prp = nil
multiples = {}
each_valid_row(lbook) { |row|
Expand Down Expand Up @@ -222,6 +218,8 @@ def rows_diff(row, other, ignore = [])
COLUMNS.each_with_index { |key, idx|
if(!ignore.include?(key) \
&& _comparable(key, row, idx) != _comparable(key, other, idx))
# binding.pry if key == :expiry_date

flags.push key
end
}
Expand Down Expand Up @@ -273,7 +271,7 @@ def _comparable(key, row, idx)
if cell = row[idx]
case key
when :registration_date, :expiry_date
row[idx]
Spreadsheet.date_cell(row, idx)
when :seqnr
sprintf "%02i", cell.to_i
else
Expand All @@ -297,11 +295,7 @@ def _comparable(key, row, idx)
#return ::
def each_valid_row(spreadsheet)
skipRows = rows_to_skip(spreadsheet)
if spreadsheet.class.eql?(RubyXL::Worksheet)
worksheet = spreadsheet
else
worksheet = spreadsheet.worksheet(0)
end
worksheet = spreadsheet.worksheet(0)
row_nr = 0
worksheet.each() {
|row|
Expand All @@ -323,11 +317,7 @@ def rows_to_skip(spreadsheet)
# Packungen.xls of swissmedic before October 2013 had 3 leading rows
# Packungen.xls of swissmedic after October 2013 have 4 leading rows
j = 0
if spreadsheet.class.eql?(RubyXL::Worksheet)
j += 1 while spreadsheet[j][0] and spreadsheet[j][0].value.to_i == 0
else
j += 1 while spreadsheet.worksheet(0).row(j)[0].to_i == 0
end
j += 1 while spreadsheet.worksheet(0).row(j)[0].to_i == 0
j
end

Expand Down
Binary file removed test/data/Packungen-2014.xlsx
Binary file not shown.
Binary file added test/data/Packungen_2013_small.xls
Binary file not shown.
Binary file added test/data/Packungen_2014_small.xlsx
Binary file not shown.
60 changes: 54 additions & 6 deletions test/test_swissmedic-diff.rb
Expand Up @@ -26,19 +26,67 @@ def setup
File.dirname(__FILE__)
@workbook = Spreadsheet.open(@data)
end
# This is not a unit test as it takes way too long (> 1 minute)
# Instead it might just tell you how to test with real data

def test_diff_xls_and_xlsx
@diff = SwissmedicDiff.new
last_month = File.expand_path 'data/Packungen.xls', File.dirname(__FILE__)
this_month = File.expand_path 'data/Packungen-2014.xlsx', File.dirname(__FILE__)
last_month = File.expand_path 'data/Packungen_2013_small.xls', File.dirname(__FILE__)
this_month = File.expand_path 'data/Packungen_2014_small.xlsx', File.dirname(__FILE__)
result = @diff.diff last_month, this_month, [:atc_class, :sequence_date]
assert(result.changes.flatten.index('Zulassungs-Nummer') == nil, "Should not find Zulassungs-Nummer in changes")
assert(result.news.first.index('00275'), "Should find 00275 in news")
assert(result.news.first.index('00277') == nil, "Should not find 00277 in news")
# puts "Got #{result.news.size} news, #{result.changes.size} changes, #{result.updates.size} updates."
assert_equal(8, result.news.size)
assert_equal(3, result.changes.size)
assert_equal(1, result.updates.size)
assert(result.news.first.index('00280'), "Should find 00280 in news")
assert(result.news.flatten.index('65034'), "Should find 65034 in news")
assert(result.news.flatten.index('00277') == nil, "Should not find 00277 in news")
assert(result.news.flatten.index('Zulassungs-Nummer') == nil, "Should not find Zulassungs-Nummer in changes")
end

def test_date_xls
tbook = Spreadsheet.open(File.expand_path('data/Packungen.xls', File.dirname(__FILE__)))
sheet = tbook.worksheet(0)
assert_equal(nil, sheet.row(0)[8]) # sequence_date
assert_equal(2010, sheet.row(4).date(8).year)
assert_equal(26, sheet.row(4).date(8).day)
# assert_equal(26, Spreadsheet.date_cell(sheet.row(4), 8))
assert_equal(26, Spreadsheet.date_cell(sheet.row(4), 8).day)
assert_equal(2010, Spreadsheet.date_cell(sheet.row(4), 8).year)
end

def test_date_xlsx
tbook = Spreadsheet.open(File.expand_path('data/Packungen_2014_small.xlsx', File.dirname(__FILE__)))
sheet = tbook.worksheet(0)
# sheet = RubyXL::Parser.parse(File.expand_path(File.expand_path 'data/Packungen_2014_small.xlsx', File.dirname(__FILE__)))[0]
assert_equal(nil, sheet.row(0)[8]) # sequence_date
assert_equal(2010, sheet.row(4).date(8).year)
assert_equal(26, sheet.row(4).date(8).day)
assert_equal(26, Spreadsheet.date_cell(sheet.row(4), 8).day)
assert_equal(2010, Spreadsheet.date_cell(sheet.row(4), 8).year)
end

def test_diff_xlsx_and_xls
@diff = SwissmedicDiff.new
last_month = File.expand_path 'data/Packungen_2014_small.xlsx', File.dirname(__FILE__)
this_month = File.expand_path 'data/Packungen.xls', File.dirname(__FILE__)
result = @diff.diff last_month, this_month, [:atc_class, :sequence_date]
assert_equal(7, result.news.size)
assert_equal(2, result.updates.size)
assert_equal(10, result.changes.size)
assert(result.changes.flatten.index('00275'), "Should find 00275 in changes")
assert(result.changes.flatten.index('00277'), "Should find 00277 in changes")
assert(result.news.flatten.index('Zulassungs-Nummer') == nil, "Should not find Zulassungs-Nummer in changes")
end
def test_diff_xlsx_and_xlsx
@diff = SwissmedicDiff.new
last_month = File.expand_path 'data/Packungen_2014_small.xlsx', File.dirname(__FILE__)
this_month = File.expand_path 'data/Packungen_2014_small.xlsx', File.dirname(__FILE__)
result = @diff.diff last_month, this_month, [:atc_class, :sequence_date]
assert_equal({}, result.changes)
assert_equal([], result.news)
assert_equal([], result.updates)
end

# This is not a unit test as it takes way too long (> 1 minute)
# Instead it might just tell you how to test with real data
def test_real_diff
Expand Down

0 comments on commit a5f8494

Please sign in to comment.