Skip to content

Commit

Permalink
Add selling units ot generated gtin2atc.csv file. Code
Browse files Browse the repository at this point in the history
  • Loading branch information
ngiger committed Mar 4, 2015
1 parent 0dd12c3 commit d89768c
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 17 deletions.
2 changes: 2 additions & 0 deletions History.txt
@@ -1,5 +1,7 @@
=== 0.1.4 03.03.2015

* Add selling units ot generated gtin2atc.csv file
* Use ';' as column separator for generated CSV files
* Create file pharmacode_gtin_not_found.txt

=== 0.1.3 27.01.2015
Expand Down
47 changes: 37 additions & 10 deletions lib/gtin2atc/builder.rb
Expand Up @@ -14,6 +14,7 @@ class Builder
AtcNotInSwissmedic = 'atc not in swissmedic'
AtcNotInBag = 'atc not in bag'
AtcDifferent = 'atc differed'
CsvOutputOptions = { :col_sep => ';', :encoding => 'UTF-8'}
def initialize(opts)
Util.set_logging(opts[:log])
@do_compare = opts[:compare]
Expand Down Expand Up @@ -93,6 +94,29 @@ def swissindex_xml_extractor
Util.debug_msg "swissindex_xml_extractor extracted #{data.size} items"
data
end
def oddb_calc_xml_extractor
filename = 'oddb_calc.xml'
data = {}
unless File.exists?('oddb_calc.xml')
puts "Unable to open #{filename}"
else
xml = IO.read(filename)
Util.debug_msg "oddb_calc_xml_extractor xml is #{xml.size} bytes long"
result = ARTICLESEntry.parse(xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
result.ARTICLES.ARTICLE.each do |article|
item = {}
gtin = article.GTIN.to_i
item[:gtin] = gtin
item[:PKG_SIZE] = article.PKG_SIZE
item[:SELLING_UNITS] = article.SELLING_UNITS
item[:MEASURE] = article.MEASURE
data[gtin] = item
puts "#{gtin.inspect} : #{item}"
end
Util.debug_msg "oddb_calc_xml_extractor extracted #{data.size} items"
end
data
end
def bag_xml_extractor
data = {}
@bag = BagDownloader.new
Expand Down Expand Up @@ -121,24 +145,27 @@ def bag_xml_extractor
Util.debug_msg "bag_xml_extractor extracted #{data.size} items. Skipped #{@bag_entries_without_gtin} entries without gtin"
data
end

def run(gtins_to_parse=[])
Util.debug_msg("run #{gtins_to_parse}")
Util.debug_msg("@use_swissindex true")
@oddb_calc = oddb_calc_xml_extractor
@data_epha_atc = epha_atc_extractor
@data_swissindex = swissindex_xml_extractor
emitted_ids = []
output_name = File.join(Util.get_archive, @do_compare ? 'gtin2atc_swissindex.csv' : 'gtin2atc.csv')
CSV.open(output_name,'w+') do |csvfile|
csvfile << ["gtin", "ATC", 'pharmacode', 'description', 'daily drug dose']
CSV.open(output_name,'w+', CsvOutputOptions) do |csvfile|
csvfile << ["gtin", "ATC", 'pharmacode', 'description', 'daily drug dose', 'selling units']
@data_swissindex.sort.each do |gtin, item|
if @do_compare or gtins_to_parse.size == 0 or
gtins_to_parse.index(gtin.to_s) or
gtins_to_parse.index(item[:pharmacode])
gtins_to_parse.index(item[:pharmacode].to_s)
atc = item[:atc_code]
ddd = @data_epha_atc[atc]
emitted_ids << gtin if gtin
emitted_ids << item[:pharmacode] if item[:pharmacode]
csvfile << [gtin, atc, item[:pharmacode], item[:description], ddd]
selling_units = @oddb_calc[gtin] ? @oddb_calc[gtin][:SELLING_UNITS] : nil
emitted_ids << gtin.to_i if gtin
emitted_ids << item[:pharmacode].to_i if item[:pharmacode]
csvfile << [gtin, atc, item[:pharmacode], item[:description], ddd, selling_units]
end
end
end
Expand All @@ -147,16 +174,16 @@ def run(gtins_to_parse=[])
missing_ids = []
gtins_to_parse.each{
|id|
next if emitted_ids.index(id)
next if emitted_ids.index(id.to_i)
missing_ids << id
}
File.open('pharmacode_gtin_not_found.txt', 'w+') { |f| f.write missing_ids.join("\n") }
File.open('pharmacode_gtin_not_found.txt', 'w+', CsvOutputOptions) { |f| f.write missing_ids.join("\n") }
msg = "swissindex: Could not find info for #{missing_ids.size} missing ids see file pharmacode_gtin_not_found.txt"
Util.debug_msg(msg)
return unless @do_compare
@data_bag = bag_xml_extractor
output_name = File.join(Util.get_archive, 'gtin2atc_bag.csv')
CSV.open(output_name,'w+') do |csvfile|
CSV.open(output_name,'w+', CsvOutputOptions) do |csvfile|
csvfile << ["gtin", "ATC", 'description']
@data_bag.sort.each do |gtin, item|
csvfile << [gtin, item[:atc_code], item[:description]]
Expand All @@ -165,7 +192,7 @@ def run(gtins_to_parse=[])
Util.debug_msg "bag: Extracted #{gtins_to_parse.size} of #{@data_bag.size} items into #{output_name} for #{gtins_to_parse}"
@data_swissmedic = swissmedic_xls_extractor
output_name = File.join(Util.get_archive, 'gtin2atc_swissmedic.csv')
CSV.open(output_name,'w+') do |csvfile|
CSV.open(output_name,'w+', CsvOutputOptions) do |csvfile|
csvfile << ["gtin", "ATC", 'description']
@data_swissmedic.sort.each do |gtin, item|
csvfile << [gtin, item[:atc_code], item[:pharmacode], item[:description]]
Expand Down
41 changes: 41 additions & 0 deletions lib/gtin2atc/xml_definitions.rb
Expand Up @@ -248,3 +248,44 @@ class MedicalInformationsEntry
element :medicalInformations, :class => MedicalInformationsContent
end

class COMPOSITIONContent
include SAXMachine
element :NAME
element :QTY
element :UNIT
end

class COMPOSITIONEntry
include SAXMachine
element :ARTICLEs, :class => COMPOSITIONContent
end

class ARTICLEContent
include SAXMachine
element :GTIN
element :NAME
element :PKG_SIZE
element :MEASURE
element :SELLING_UNITS
element :GALENIC_FORM
element :GALENIC_GROUP
element :COMPOSITIONS, :class => COMPOSITIONContent
end


class ARTICLEEntryXX
include SAXMachine
element :ARTICLE, :class => ARTICLEContent
end

class ARTICLESContent
include SAXMachine
attribute :ReleaseDate
elements :ARTICLE, :class => ARTICLEContent
end

class ARTICLESEntry
include SAXMachine
element :ARTICLES, :class => ARTICLESContent
end

14 changes: 7 additions & 7 deletions spec/builder_spec.rb
Expand Up @@ -51,8 +51,8 @@ def check_csv(filename)
puts inhalt
/^\d{13},\w{4}/.should match inhalt[1]
# Packungsgrösse, Dosierung, DDD, Route of Administration
/^gtin,ATC,pharmacode,description,daily drug dose/.should match inhalt.first
/^7680316440115,B03AA07,20244,FERRO-GRADUMET Depottabl,"0,2 g O Fe2\+"/.should match inhalt.join("\n")
/^gtin;ATC;pharmacode;description;daily drug dose/.should match inhalt.first
/^7680316440115;B03AA07;20244;FERRO-GRADUMET Depottabl,"0,2 g O Fe2\+"/.should match inhalt.join("\n")
end

context 'when 20273 41803 (Pharmacodes) is given' do
Expand Down Expand Up @@ -102,8 +102,8 @@ def check_csv(filename)
check_csv(CSV_NAME)
inhalt = IO.readlines(CSV_NAME)
inhalt.size.should eq 2+1 # one header lines + two items
inhalt[1].chomp.should eq '7680147690482,N07BC02,41803,KETALGIN Inj Lös 10 mg/ml,"25 mg O,P"'
inhalt[2].chomp.should eq '7680353660163,B03AE10,20273,KENDURAL Depottabl,'
inhalt[1].chomp.should eq '7680147690482;N07BC02;41803;KETALGIN Inj Lös 10 mg/ml;25 mg O,P;'
inhalt[2].chomp.should eq '7680353660163;B03AE10;20273;KENDURAL Depottabl;;'
end
end

Expand All @@ -123,15 +123,15 @@ def check_csv(filename)
/7680353660163/.match(inhalt[1]).should == nil
/7680147690482/.match(inhalt[2]).should == nil
/7680353660163/.match(inhalt[2]).should_not == nil
/7680353660163,B03AE10,20273,KENDURAL Depottabl/.match(inhalt[2]).should_not == nil
/7680353660163;B03AE10;20273;KENDURAL Depottabl/.match(inhalt[2]).should_not == nil
end
end

def check_csv(filename)
File.exists?(filename).should eq true
inhalt = IO.readlines(filename)
/^gtin,ATC/.match(inhalt.first).should_not == nil
/^\d{13},\w{4}/.match(inhalt[1]).should_not == nil
/^gtin;ATC/.match(inhalt.first).should_not == nil
/^\d{13};\w{4}/.match(inhalt[1]).should_not == nil
end

context 'when --compare is given' do
Expand Down

0 comments on commit d89768c

Please sign in to comment.