Skip to content

Commit

Permalink
Adapt swissmedic-import for xlsx files. Remove some unused methods
Browse files Browse the repository at this point in the history
  • Loading branch information
ngiger committed Jan 15, 2014
1 parent 1460099 commit 69c0c28
Showing 1 changed file with 40 additions and 133 deletions.
173 changes: 40 additions & 133 deletions src/plugin/swissmedic.rb
Expand Up @@ -3,9 +3,6 @@
# ODDB::SwissmedicPlugin -- oddb.org -- 11.03.2013 -- yasaka@ywesee.com
# ODDB::SwissmedicPlugin -- oddb.org -- 27.12.2011 -- mhatakeyama@ywesee.com
# ODDB::SwissmedicPlugin -- oddb.org -- 18.03.2008 -- hwyss@ywesee.com
# TODO: ngngng. fix_compositions ould probably be removed, as I did not find any reference
# TODO: ngngng. fix_sequences ould probably be removed, as I did not find any reference
# TODO: ngngng. fix_packages ould probably be removed, as I did not find any reference

require 'fileutils'
require 'mechanize'
Expand All @@ -14,7 +11,6 @@
require 'pp'
require 'util/persistence'
require 'util/today'
require 'rubyXL'
require 'swissmedic-diff'
require 'util/logfile'

Expand All @@ -30,9 +26,10 @@ class SwissmedicPlugin < Plugin
def initialize(app=nil, archive=ARCHIVE_PATH)
super app
@index_url = 'https://www.swissmedic.ch/arzneimittel/00156/00221/00222/00230/index.html?lang=de'
@archive = File.join archive, 'xlsx'
@archive = File.join archive, 'xls'
FileUtils.mkdir_p @archive
@latest = File.join @archive, 'Packungen-latest.xlsx'
@latest_preparations = File.join @archive, "Präparateliste-latest.xlsx"
@known_export_registrations = 0
@known_export_sequences = 0
@export_registrations = {}
Expand All @@ -43,7 +40,7 @@ def initialize(app=nil, archive=ARCHIVE_PATH)
@empty_compositions = []
end
def debug_msg(msg)
# $stdout.puts Time.now.to_s + ': ' + msg
# $stdout.puts Time.now.to_s + ': ' + msg; $stdout.flush
if not defined?(@checkLog) or not @checkLog
name = LogFile.filename(File.basename(__FILE__), Time.now)
FileUtils.makedirs(File.dirname(name))
Expand All @@ -54,18 +51,21 @@ def debug_msg(msg)
@checkLog.flush
end
def update(agent=Mechanize.new, target=get_latest_file(agent))
debug_msg "#{Time.now} #{__FILE__} update target #{target.inspect}"
debug_msg "#{__FILE__}: #{__LINE__} update target #{target.inspect} latest #{@latest.inspect}"
if(target)
start_time = Time.new
initialize_export_registrations agent
# keep_active_registrations_praeparateliste
# keep_active_registrations_praeparateliste_with_export_flag_true
debug_msg "calling diff #{target} #{@latest}"
diff target, @latest, [:atc_class, :sequence_date]
if File.exists?(@latest.sub('.xlsx', '.xls'))
debug_msg "#{__FILE__}: #{__LINE__} calling diff #{target} #{@latest}"
diff target, @latest.sub('.xlsx', '.xls'), [:atc_class, :sequence_date]
else
diff target, @latest, [:atc_class, :sequence_date]
end
# check diff from stored data about date-fields of Registration
check_date!
debug_msg "Found #{@diff.news.size} news, #{@diff.updates.size} updates and #{@diff.replacements.size} replacements"
debug_msg "first news: #{@diff.news.first.inspect}"
debug_msg "#{__FILE__}: #{__LINE__} Found #{@diff.news.size} news, #{@diff.updates.size} updates, #{@diff.replacements.size} replacements and #{@diff.package_deletions.size} package_deletions"
debug_msg "#{__FILE__}: #{__LINE__} changes: #{@diff.changes.inspect}"
# debug_msg "#{__FILE__}: #{__LINE__} first news: #{@diff.news.first.inspect}"
update_registrations @diff.news + @diff.updates, @diff.replacements
set_all_export_flag_false
update_export_sequences @export_sequences
Expand All @@ -80,7 +80,7 @@ def update(agent=Mechanize.new, target=get_latest_file(agent))
deactivate @diff.registration_deletions
end_time = Time.now - start_time
@update_time = (end_time / 60.0)
FileUtils.cp target, @latest
FileUtils.cp target, @latest, :verbose => true
@change_flags = @diff.changes.inject({}) { |memo, (iksnr, flags)|
memo.store Persistence::Pointer.new([:registration, iksnr]), flags
memo
Expand Down Expand Up @@ -131,7 +131,7 @@ def cell(row, pos)
end
end
def date_cell(row, idx)
row.at(idx) && row.date(idx)
Spreadsheet.date_cell(row, idx)
end
def recheck_deletions(deletions)
key_list = []
Expand All @@ -149,7 +149,7 @@ def recheck_deletions(deletions)
def deactivate(deactivations)
deactivations.each { |row|
seqnr = "%02i" % cell(row, column(:seqnr)).to_i
debug_msg "#{__FILE__}: #{__LINE__} #{Time.now}: deactivate iksnr '#{row[0]}' seqnr #{seqnr} pack #{row[10]}"
debug_msg "#{__FILE__}: #{__LINE__}: deactivate iksnr '#{row[0]}' seqnr #{seqnr} pack #{row[10]}"
if row.length == 1 # only in the case of registration_deletions
@app.update pointer(row), {:inactive_date => @@today, :renewal_flag => nil, :renewal_flag_swissmedic => nil}, :swissmedic
else # the case of sequence_deletions
Expand All @@ -160,8 +160,9 @@ def deactivate(deactivations)
def delete(deletions)
deletions.each { |row|
seqnr = "%02i" % cell(row, column(:seqnr)).to_i
debug_msg "#{__FILE__}: #{__LINE__} #{Time.now}: delete iksnr '#{row[0]}' seqnr #{seqnr} pack #{row[10]}"
debug_msg "#{__FILE__}: #{__LINE__}: delete iksnr '#{row[0]}' seqnr #{seqnr} pack #{row[10]}"
ptr = pointer(row)
debug_msg "#{__FILE__}: #{__LINE__}: delete iksnr '#{row[0]}' ptr #{ptr.inspect} row #{row.inspect}"
@app.delete ptr if ptr
}
end
Expand Down Expand Up @@ -223,68 +224,7 @@ def _known_data(latest, known_regs, known_seqs, known_pacs, newest_rows)
}
end
end
# TODO: ngngng. fix_packages ould probably be removed, as I did not find any reference
def fix_packages(opts={})
row = nil
handle = SwissmedicDiff.new
handle.each_valid_row(Spreadsheet.open(@latest)) { |row|
reg = update_registration(row, opts) if row
seq = update_sequence(reg, row) if reg
if seq
comps = update_compositions(seq, row)
comps.each_with_index do |comp, idx|
update_galenic_form(seq, comp, row, opts)
end
end
update_package(reg, seq, row, {}, opts) if reg
}
rescue StandardError => err
puts "#{err.class} when fixing #{source_row(row).pretty_inspect if row}"
puts err.message
puts err.backtrace[0,10]
end
# TODO: ngngng. fix_compositions ould probably be removed, as I did not find any reference
def fix_compositions(opts={})
row = nil
handle = SwissmedicDiff.new
handle.each_valid_row(Spreadsheet.open(@latest)) { |row|
reg = update_registration(row, opts) if row
seq = update_sequence(reg, row) if reg
if seq
comps = update_compositions(seq, row)
comps.each_with_index do |comp, idx|
update_galenic_form(seq, comp, row, opts)
end
end
}
rescue StandardError => err
puts "#{err.class} when fixing #{source_row(row).pretty_inspect if row}"
puts err.message
puts err.backtrace[0,10]
end
def fix_registrations(opts={})
row = nil
tbook = Spreadsheet.open(@latest)
tbook.worksheet(0).each(rows_to_skip(tbook)) { |row|
update_registration(row, opts) if row
}
rescue SystemStackError => err
puts "System Stack Error when fixing #{source_row(row).pretty_inspect}"
puts err.backtrace[-100..-1]
end
# TODO: ngngng. fix_sequences ould probably be removed, as I did not find any reference
def fix_sequences(opts={})
row = nil
tbook = Spreadsheet.open(@latest)
tbook.worksheet(0).each(rows_to_skip(tbook)) { |row|
reg = update_registration(row, opts) if row
seq = update_sequence(reg, row) if reg
}
rescue SystemStackError => err
puts "System Stack Error when fixing #{source_row(row).pretty_inspect}"
puts err.backtrace[-100..-1]
end
def get_latest_file(agent, keyword='Packungen')
def get_latest_file(agent, keyword='Packungen', extension = '.xlsx')
page = agent.get @index_url
links = page.links.select do |link|
ptrn = keyword.gsub /[^A-Za-z]/u, '.'
Expand All @@ -293,7 +233,13 @@ def get_latest_file(agent, keyword='Packungen')
link = links.first or raise "could not identify url to #{keyword}.xlsx"
file = agent.get(link.href)
download = file.body
latest_name = File.join @archive, "#{keyword}-latest.xlsx"
latest_name = File.join @archive, "#{keyword}-latest"+extension
if extension == '.xlsx'
latest_xls = latest_name.sub('.xlsx', '.xls')
if File.exist?(latest_xls)
latest_name = latest_xls
end
end
latest = ''
if(File.exist? latest_name)
latest = File.read latest_name
Expand All @@ -304,15 +250,13 @@ def get_latest_file(agent, keyword='Packungen')
target = File.join @archive, @@today.strftime("#{keyword}-%Y.%m.%d.xlsx")
if(!File.exist?(latest_name) or download.size != File.size(latest_name))
File.open(target, 'w') { |fh| fh.puts(download) }
target
target
end
end
def initialize_export_registrations(agent)
latest_name = File.join @archive, "Präparateliste-latest.xlsx"
if target = get_latest_file(agent, 'Präparateliste')
FileUtils.cp target, latest_name
if target = get_latest_file(agent, 'Präparateliste', '.xlsx')
FileUtils.cp target, @latest_preparations, :verbose => true
end
debug_msg "target #{target} latest_name is #{latest_name}"
seq_indices = {}
[ :seqnr, :export_flag ].each do |key|
seq_indices.store key, PREPARATIONS_COLUMNS.index(key)
Expand All @@ -321,11 +265,11 @@ def initialize_export_registrations(agent)
[ :iksnr ].each do |key|
reg_indices.store key, PREPARATIONS_COLUMNS.index(key)
end
RubyXL::Parser.parse(File.expand_path(latest_name)) do |workbook|
Spreadsheet.open(@latest_preparations) do |workbook|
iksnr_idx = reg_indices.delete(:iksnr)
seqnr_idx = seq_indices.delete(:seqnr)
export_flag_idx = seq_indices.delete(:export_flag)
workbook.worksheet[0].each(rows_to_skip(workbook)) do |row|
workbook.worksheet(0).each(rows_to_skip(workbook)) do |row|
iksnr = "%05i" % row[iksnr_idx].to_i
seqnr = row[seqnr_idx]
export = row[export_flag_idx]
Expand All @@ -341,45 +285,6 @@ def initialize_export_registrations(agent)
end
@export_registrations
end
def keep_active_registrations_praeparateliste
latest_name = File.join @archive, "Präparateliste-latest.xlsx"
indices = {
:iksnr => PREPARATIONS_COLUMNS.index(:iksnr),
:seqnr => PREPARATIONS_COLUMNS.index(:seqnr)
}
Spreadsheet.open(latest_name) do |workbook|
iksnr_idx = indices[:iksnr]
seqnr_idx = indices[:seqnr]
workbook.worksheet(0).each(rows_to_skip(workbook)) do |row|
iksnr = "%05i" % row[iksnr_idx].to_i
seqnr = row[seqnr_idx]
@active_registrations_praeparateliste[iksnr] = true
end
end
@active_registrations_praeparateliste
end
def keep_active_registrations_praeparateliste_with_export_flag_true
latest_name = File.join @archive, "Präparateliste-latest.xlsx"
indices = {
:iksnr => PREPARATIONS_COLUMNS.index(:iksnr),
:seqnr => PREPARATIONS_COLUMNS.index(:seqnr),
:export_flag => PREPARATIONS_COLUMNS.index(:export_flag),
}
Spreadsheet.open(latest_name) do |workbook|
iksnr_idx = indices[:iksnr]
seqnr_idx = indices[:seqnr]
export_flag_idx = indices[:export_flag]
workbook.worksheet(0).each(rows_to_skip(workbook)) do |row|
iksnr = "%05i" % row[iksnr_idx].to_i
seqnr = row[seqnr_idx]
export = row[export_flag_idx]
if export =~ /E/
@active_registrations_praeparateliste[iksnr] = true
end
end
end
@active_registrations_praeparateliste
end
def mail_notifications
salutations = {}
flags = {}
Expand All @@ -388,7 +293,8 @@ def mail_notifications
companies = all_flags.inject({}) { |memo, (pointer, flgs)|
if((reg = pointer.resolve(@app)) && (cmp = reg.company) \
&& (email = cmp.swissmedic_email))
salutations.store(email, cmp.swissmedic_salutation)

salutations.store(email, cmp.swissmedic_salutation)
flags.store(pointer, flgs)
(memo[email] ||= []).push(reg)
end
Expand Down Expand Up @@ -576,7 +482,8 @@ def update_compositions(seq, row, opts={:create_only => false})
names = namestr.split(/\s*,(?!\d|[^(]+\))\s*/u).collect { |name| capitalize(name) }.uniq
substances = names.collect { |name| update_substance(name) }
cell_content = cell(row, column(:composition))
debug_msg("update_compositions: iksnr #{row[0]} #{seq.seqnr} seq #{seq} opts #{opts} cell_content #{cell_content}")
iksnr = cell(row, column(:iksnr)).to_i
debug_msg("update_compositions: row[0] #{row[0]} iksnr #{iksnr} #{seq.seqnr} seq #{seq} opts #{opts} cell_content #{cell_content}")
unless cell_content
@empty_compositions << "iksnr #{"%05i" % cell(row, column(:iksnr)).to_i} seq #{seq}"
return []
Expand Down Expand Up @@ -707,7 +614,7 @@ def update_package(reg, seq, row, replacements={},
opts={:create_only => false})
cd = cell(row, column(:ikscd))
unless seq.pointer
debug_msg "#{__FILE__}: #{__LINE__} #{Time.now}: update_package problem '#{row[0]}' cd #{cd} sequence with pointer"
debug_msg "#{__FILE__}: #{__LINE__}: update_package problem '#{row[0]}' cd #{cd} sequence with pointer"
return
end
pidx = cell(row, COLUMNS.size).to_i
Expand Down Expand Up @@ -741,7 +648,7 @@ def update_package(reg, seq, row, replacements={},
:size => [cell(row, column(:size)), cell(row, column(:unit))].compact.join(' '),
}
if package.sequence and package.sequence.seqnr != seq.seqnr
debug_msg "#{__FILE__}: #{__LINE__} #{Time.now}: update_package iksnr '#{row[0]}' cd #{cd} should correct seqnr #{package.sequence.seqnr} -> #{seq.seqnr}?"
debug_msg "#{__FILE__}: #{__LINE__}: update_package iksnr '#{row[0]}' cd #{cd} should correct seqnr #{package.sequence.seqnr} -> #{seq.seqnr}?"
end
if(comform = @app.commercial_form_by_name(cell(row, column(:unit))))
args.store :commercial_form, comform.pointer
Expand Down Expand Up @@ -820,7 +727,7 @@ def update_registrations(rows, replacements)
:date => @@today, }
rows.each { |row|
seqnr = "%02i" % cell(row, column(:seqnr)).to_i
debug_msg "#{__FILE__}: #{__LINE__} #{Time.now}: update_registrations iksnr '#{row[0]}' seqnr #{seqnr} pack #{row[10]}"
next if row[0].to_s.eql?('00000')
reg = update_registration(row, opts) if row
seq = update_sequence(reg, row, opts) if reg
if seq
Expand All @@ -838,7 +745,7 @@ def update_sequence(registration, row, opts={:create_only => false})
if registration.sequence('00')
ptr = registration.sequence('00').pointer
if ptr
debug_msg"#{__FILE__}: #{__LINE__} #{Time.now}: delete sequence('00') seqnr #{seqnr} ptr #{ptr}"
debug_msg"#{__FILE__}: #{__LINE__}: delete sequence('00') seqnr #{seqnr} ptr #{ptr}"
registration.sequence('00').delete_package('000')
registration.delete_sequence('00')
end
Expand Down Expand Up @@ -889,7 +796,7 @@ def update_sequence(registration, row, opts={:create_only => false})
args.store :indication, indication.pointer
end
res = @app.update ptr, args, :swissmedic
debug_msg "#{__FILE__}: #{__LINE__} #{Time.now}: res #{res} == #{sequence}? seqnr #{sequence ? sequence.seqnr : 'nil'}"
debug_msg "#{__FILE__}: #{__LINE__}: res #{res} == #{sequence}? seqnr #{sequence ? sequence.seqnr : 'nil'}"
res
end
def update_substance(name)
Expand Down

0 comments on commit 69c0c28

Please sign in to comment.