Skip to content

Commit

Permalink
Use parslet for --calc. Still many failing unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ngiger committed Apr 20, 2015
1 parent 8fd9fb5 commit 55d3648
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 164 deletions.
6 changes: 3 additions & 3 deletions lib/oddb2xml/builder.rb
Expand Up @@ -693,9 +693,9 @@ def build_calc
xml.UNIT substance.unit
end
if substance.chemical_substance
xml.CHEMICAL_SUBSTANCE substance.chemical_substance
xml.CHEMICAL_QTY substance.chemical_qty
xml.CHEMICAL_UNIT substance.chemical_unit
xml.CHEMICAL_SUBSTANCE substance.chemical_substance.name
xml.CHEMICAL_QTY substance.chemical_substance.qty
xml.CHEMICAL_UNIT substance.chemical_substance.unit
end
}
}
Expand Down
2 changes: 1 addition & 1 deletion lib/oddb2xml/calc.rb
@@ -1,7 +1,7 @@
# encoding: utf-8

require 'oddb2xml/util'
require 'oddb2xml/parse_compositions'
require 'oddb2xml/parslet_compositions'
require 'yaml'

module Oddb2xml
Expand Down
134 changes: 0 additions & 134 deletions lib/oddb2xml/parse_compositions.rb

This file was deleted.

42 changes: 39 additions & 3 deletions lib/oddb2xml/parslet_compositions.rb
Expand Up @@ -11,6 +11,33 @@
include Parslet
VERBOSE_MESSAGES = false

module ParseUtil
def ParseUtil.capitalize(string)
string.split(/\s+/u).collect { |word| word.capitalize }.join(' ').strip
end

def ParseUtil.parse_compositions(composition_text, active_agents_string = '')
active_agents = active_agents_string ? active_agents_string.downcase.split(/,\s+/) : []
comps = []
lines = composition_text.gsub(/\r\n?/u, "\n").split(/\n/u)
lines.select {
|line|
composition = ParseComposition.from_string(line)
if composition and composition.substances.size > 0
composition.substances.
each {
|substance_item|
substance_item.is_active_agent = (active_agents.find {|x| x.downcase.eql?(substance_item.name.downcase) } != nil)
substance_item.is_active_agent = true if substance_item.chemical_substance and active_agents.find {|x| x.downcase.eql?(substance_item.chemical_substance.name.downcase) }
}
comps << composition
end
}
comps << ParseComposition.new(composition_text.split(/,|:|\(/)[0]) if comps.size == 0
comps
end
end

class DoseParser < Parslet::Parser

# Single character rules
Expand Down Expand Up @@ -522,7 +549,7 @@ class ParseSubstance
attr_accessor :description, :more_info, :salts
def initialize(name, dose=nil)
puts "ParseSubstance.new from #{name.inspect} #{dose.inspect}" if VERBOSE_MESSAGES
@name = name.to_s.split(/\s/).collect{ |x| x.capitalize }.join(' ').strip
@name = ParseUtil.capitalize(name.to_s)
@name.sub!(/\baqua\b/i, 'aqua')
@name.sub!(/\DER\b/i, 'DER')
@name.sub!(/\bad pulverem\b/i, 'ad pulverem')
Expand Down Expand Up @@ -632,8 +659,17 @@ def ParseComposition.from_string(string)
result = ParseComposition.new(cleaned)
parser3 = CompositionParser.new
transf3 = SubstanceTransformer.new
puts "#{__LINE__}: ==> #{parser3.parse_with_debug(cleaned)}" if VERBOSE_MESSAGES
ast = transf3.apply(parser3.parse(cleaned))
begin
if defined?(RSpec)
ast = transf3.apply(parser3.parse(cleaned))
puts "#{__LINE__}: ==> #{ast}" if VERBOSE_MESSAGES
else
ast = transf3.apply(parser3.parse(cleaned))
end
rescue Parslet::ParseFailed => error
puts "#{__LINE__}: failed parsing ==> #{cleaned}"
return nil
end
result.source = string
result.label = ast[:label].to_s if ast[:label]
result.label_description = ast[:label_description].to_s if ast[:label_description]
Expand Down
48 changes: 26 additions & 22 deletions spec/calc_spec.rb
Expand Up @@ -12,7 +12,7 @@
include Oddb2xml

describe Oddb2xml::Calc do
RunAllTests = false
RunAllTests = true

after(:each) do
FileUtils.rm(Dir.glob(File.join(Oddb2xml::WorkDir, '*.*')))
Expand Down Expand Up @@ -330,6 +330,10 @@ def url
XPath.match( doc, "//ARTICLE[GTIN='7680434541015']/COMPOSITIONS/COMPOSITION/SUBSTANCES/SUBSTANCE/UNIT").first.text.should eq 'g/100 g'
XPath.match( doc, "//ARTICLE[GTIN='7680434541015']/COMPOSITIONS/COMPOSITION/SUBSTANCES/SUBSTANCE/CHEMICAL_QTY").first.text.should eq '10-50'
XPath.match( doc, "//ARTICLE[GTIN='7680434541015']/COMPOSITIONS/COMPOSITION/SUBSTANCES/SUBSTANCE/CHEMICAL_UNIT").first.text.should eq 'mg/100 g'
XPath.match( doc, "//ARTICLE[GTIN='7680656770019']/COMPOSITIONS/COMPOSITION/SUBSTANCES/SUBSTANCE/CHEMICAL_SUBSTANCE").first.text.should eq 'Glatiramerum'
XPath.match( doc, "//ARTICLE[GTIN='7680656770019']/COMPOSITIONS/COMPOSITION/SUBSTANCES/SUBSTANCE/CHEMICAL_QTY").first.text.should eq '18'
XPath.match( doc, "//ARTICLE[GTIN='7680656770019']/COMPOSITIONS/COMPOSITION/SUBSTANCES/SUBSTANCE/CHEMICAL_UNIT").first.text.should eq 'mg'
XPath.match( doc, "//ARTICLE[GTIN='7680656770019']/COMPOSITIONS/COMPOSITION/SUBSTANCES/SUBSTANCE/IS_ACTIVE_AGENT").first.text.should eq 'true'
end
end

Expand Down Expand Up @@ -436,9 +440,9 @@ def url
)
# Line_1 = 'I) Glucoselösung: glucosum anhydricum 150 g ut glucosum monohydricum, natrii dihydrogenophosphas dihydricus 2.34 g, zinci acetas dihydricus 6.58 mg, aqua ad iniectabilia q.s. ad solutionem pro 500 ml.'
specify { expect(result.compositions.first.substances.first.name).to eq 'Glucosum Anhydricum'}
specify { expect(result.compositions.first.substances.first.chemical_substance).to eq 'Glucosum Monohydricum'}
specify { expect(result.compositions.first.substances.first.qty.to_f).to eq 150.0}
specify { expect(result.compositions.first.substances.first.unit).to eq 'g/500 ml'}
specify { expect(result.compositions.first.substances.first.chemical_substance.name).to eq 'Glucosum Monohydricum'}
specify { expect(result.compositions.first.substances.first.chemical_substance.qty.to_f).to eq 150.0}
specify { expect(result.compositions.first.substances.first.chemical_substance.unit).to eq 'g/500 ml'}

specify { expect(result.compositions[0].source).to eq Line_1}
specify { expect(result.compositions[0].label).to eq 'I'}
Expand Down Expand Up @@ -477,8 +481,8 @@ def url
specify { expect(leucinum_I).to eq nil}
leucinum_II = result.compositions[1].substances.find{ |x| x.name.eql?('Leucinum') }
specify { expect(leucinum_II).to eq nil}
aqua = result.compositions[2].substances.find{ |x| /aqua ad/i.match(x.name) }
specify { expect(aqua.name).to eq "Aqua Ad Iniectabilia Q.s. Ad Solutionem Pro"}
# aqua = result.compositions[2].substances.find{ |x| /aqua ad/i.match(x.name) }
# specify { expect(aqua.name).to eq "Aqua Ad Iniectabilia Q.s. Ad Solutionem Pro"}
end

context 'find correct result compositions for 00613 Pentavac' do
Expand Down Expand Up @@ -573,12 +577,12 @@ def url
substance3 = info.compositions.first.substances.find{ |x| x.name.match(/amiloridi hydrochloridum/i) }
specify { expect(substance3.class).to eq Struct::ParseSubstance }
if substance3
specify { expect(substance3.name).to eq 'Amiloridi Hydrochloridum Anhydricum' }
specify { expect(substance3.chemical_substance).to eq 'Amiloridi Hydrochloridum Dihydricum' }
specify { expect(substance3.name).to eq 'Amiloridi Hydrochloridum Dihydricum' }
specify { expect(substance3.chemical_substance.name).to eq 'Amiloridi Hydrochloridum Anhydricum' }
specify { expect(substance3.qty.to_f).to eq 5 }
specify { expect(substance3.unit).to eq 'mg' }
specify { expect(substance3.chemical_qty.to_f).to eq 5.67 }
specify { expect(substance3.chemical_unit).to eq 'mg' }
specify { expect(substance3.chemical_substance.qty.to_f).to eq 5.67 }
specify { expect(substance3.chemical_substance.unit).to eq 'mg' }
specify { expect(substance3.is_active_agent).to eq true }
end

Expand Down Expand Up @@ -638,9 +642,9 @@ def url
specify { expect(sennosidum.dose).to eq '78-104 mg' }
specify { expect(sennosidum.qty.to_f).to eq 78.0}
specify { expect(sennosidum.unit).to eq 'mg'}
specify { expect(sennosidum.chemical_substance).to eq 'Sennosidum B' }
specify { expect(sennosidum.chemical_qty.to_f).to eq 12.5 }
specify { expect(sennosidum.chemical_unit).to eq 'mg' }
specify { expect(sennosidum.chemical_substance.name).to eq 'Sennosidum B' }
specify { expect(sennosidum.chemical_substance.qty.to_f).to eq 12.5 }
specify { expect(sennosidum.chemical_substance.unit).to eq 'mg' }
end
end

Expand Down Expand Up @@ -668,9 +672,9 @@ def url
specify { expect(viscum.dose).to eq '0.01 mg/ml' }
specify { expect(viscum.qty.to_f).to eq 0.01}
specify { expect(viscum.unit).to eq 'mg/ml'}
specify { expect(viscum.chemical_substance).to eq nil }
specify { expect(viscum.chemical_qty).to eq nil }
specify { expect(viscum.chemical_unit).to eq nil }
specify { expect(viscum.chemical_substance.name).to eq nil }
specify { expect(viscum.chemical_substance.qty).to eq nil }
specify { expect(viscum.chemical_substance.unit).to eq nil }
end
end
context 'find correct result compositions for 56829 sequence 23 Iscador Ag 0,01 mg' do
Expand All @@ -688,9 +692,9 @@ def url
specify { expect(viscum.dose).to eq '0.01 mg/ml' }
specify { expect(viscum.qty.to_f).to eq 0.01}
specify { expect(viscum.unit).to eq 'mg/ml'}
specify { expect(viscum.chemical_substance).to eq nil }
specify { expect(viscum.chemical_qty).to eq nil }
specify { expect(viscum.chemical_unit).to eq nil }
specify { expect(viscum.chemical_substance.name).to eq nil }
specify { expect(viscum.chemical_substance.qty).to eq nil }
specify { expect(viscum.chemical_substance.unit).to eq nil }
end
argenti = info.compositions.first.substances.find{ |x| x.name.match(/Argenti/i) }
specify { expect(argenti).not_to eq nil}
Expand All @@ -699,9 +703,9 @@ def url
skip { expect(argenti.dose).to eq '0.01 mg/ml' } # 100 mg/ml
skip { expect(argenti.qty.to_f).to eq 0.01}
skip { expect(argenti.unit).to eq 'mg/ml'}
specify { expect(argenti.chemical_substance).to eq nil }
specify { expect(argenti.chemical_qty).to eq nil }
specify { expect(argenti.chemical_unit).to eq nil }
specify { expect(argenti.chemical_substance.name).to eq nil }
specify { expect(argenti.chemical_substance.qty).to eq nil }
specify { expect(argenti.chemical_substance.unit).to eq nil }
end
end
end
Expand Down
Binary file modified spec/data/swissmedic_package-galenic.xlsx
Binary file not shown.
2 changes: 1 addition & 1 deletion spec/parslet_spec.rb
Expand Up @@ -9,7 +9,7 @@
require "#{Dir.pwd}/lib/oddb2xml/parslet_compositions"
require 'parslet/rig/rspec'

RunAllCompositionsTests = true # takes about five minutes to run!
RunAllCompositionsTests = false # takes about five minutes to run!
# Testing whether 8937 composition lines can be parsed. Found 380 errors in 293 seconds
# 520 examples, 20 failures, 1 pending

Expand Down

0 comments on commit 55d3648

Please sign in to comment.