Skip to content

Commit

Permalink
Added support to recognize preaparatio. Fix 'et'
Browse files Browse the repository at this point in the history
  • Loading branch information
ngiger committed Apr 15, 2015
1 parent 1e207fe commit 48fe298
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 50 deletions.
41 changes: 33 additions & 8 deletions lib/oddb2xml/parslet_compositions.rb
Expand Up @@ -38,7 +38,7 @@ class DoseParser < Parslet::Parser
rule(:radio_isotop) { match['a-zA-Z'].repeat(1) >> lparen >> digits >> str('-') >> match['a-zA-Z'].repeat(1-3) >> rparen >>
((space? >> match['a-zA-Z']).repeat(1)).repeat(0)
} # e.g. Xenonum (133-Xe) or yttrii(90-Y) chloridum zum Kalibrierungszeitpunkt
rule(:identifier) { (match['a-zA-Zéàèèçïöäüâ'] | digit >> str('-')) >> match['0-9a-zA-Z\-éàèèçïöäüâ\'\/\.:%'].repeat(0) }
rule(:identifier) { (match['a-zA-Zéàèèçïöäüâ'] | digit >> str('-')) >> match['0-9a-zA-Z\-éàèèçïöäüâ\'\/\.%'].repeat(0) }
# handle stuff like acidum 9,11-linolicum specially. it must contain at least one a-z
rule(:identifier_with_comma) { match['0-9,\-'].repeat(0) >> match['a-zA-Z'].repeat(1) >> match['0-9a-zA-Z,\-\(\)'].repeat(0) }
rule(:one_word) { match(['a-zA-Zé,']).repeat(1) >> match(['0-9a-zA-Z\-éàèèçïöäüâ\'\/\.:%']).repeat(0) >> space? }
Expand Down Expand Up @@ -161,16 +161,21 @@ class SubstanceParser < DoseParser
(space >> (identifier >> space? >> dose.maybe >> (space? >> str('et') >> space?).maybe).repeat(1)).maybe}
rule(:substance_residui) { str('residui:') >> space >> substance }
rule(:substance_conserv) { str('conserv.:') >> space >> substance }
rule(:substance_corresp) { substance.as(:substance) >> space >> str('corresp.') >> space >> (str('suspensio reconstituta') >> space).maybe >>
rule(:substance_corresp) { substance.as(:substance) >>str('corresp.') >> space >> (str('suspensio reconstituta') >> space).maybe >>
(substance_et | substance).as(:substance_corresp) }
rule(:substance_ut) { substance.as(:substance_ut) >> space >> str('ut') >> space >> substance }
rule(:substance_et) { (substance.as(:substance_et) >> space >> str('et') >> space).repeat(1) >> (substance_corresp | substance) }
rule(:substance_et) { (substance.as(:substance) >> str('et') >> space).repeat(1) >> (substance_corresp | substance).as(:substance) }
rule(:praeparatio) { ((identifier >> space?).repeat(1).as(:description) >> str(':') >> space).maybe>>
(identifier >> space?).repeat(1).as(:substance_name) >>
number.as(:qty) >> space >> str('U.:') >> space >>
(identifier >> space?).repeat(1).as(:more_info) >>
space?
}

#rule(:one_substance) { (str(',') >> space).maybe >> (excipiens) } # Sometimes it is handy for debugging to be able to debug just one the different variants
rule(:one_substance) { (str(',') >> space).maybe >> (der | excipiens | histamin | named_substance | substance_residui | substance_conserv | substance_et | substance_ut | substance_corresp | substance ) }
rule(:one_substance) { (str(',') >> space).maybe >> (der | excipiens | praeparatio | histamin | named_substance | substance_residui | substance_conserv | substance_et | substance_ut | substance_corresp | substance ) }
# rule(:one_substance) { (str(',') >> space).maybe >> ( substance | excipiens) }
rule(:one_substance) { ( excipiens | substance ) >> (str(',') >> space).maybe }
rule(:one_substance) { ( substance ) >> (str(',') >> space).maybe }
rule(:one_substance) { ( substance_et | praeparatio | substance | excipiens) >> (str(',') >> space).maybe }
rule(:all_substances) { one_substance.repeat(1) }
root :all_substances
end
Expand All @@ -181,7 +186,7 @@ def SubstanceTransformer.clear_substances
@@substances = []
end
def SubstanceTransformer.substances
@@substances
@@substances.clone
end
def SubstanceTransformer.add_substance(substance)
@@substances << substance
Expand Down Expand Up @@ -212,6 +217,18 @@ def SubstanceTransformer.add_substance(substance)
puts "#{__LINE__}: dictionary #{dictionary}"
ParseSubstance.new(dictionary[:substance_name].to_s.sub(/^excipiens /i, ''), dictionary[:dose_corresp])
}
rule(:description => simple(:description),
:substance_name => simple(:substance_name),
:qty => simple(:qty),
:more_info => simple(:more_info),
) {
|dictionary|
puts "#{__LINE__}: dictionary #{dictionary}"
substance = ParseSubstance.new(dictionary[:substance_name], ParseDose.new(dictionary[:qty].to_s))
substance.more_info = dictionary[:more_info].to_s
substance.description = dictionary[:description].to_s
substance
}
rule(:der => simple(:der),
) {
|dictionary|
Expand Down Expand Up @@ -275,7 +292,8 @@ class ParseDose
def initialize(qty=nil, unit=nil)
puts "ParseDose.new from #{qty.inspect} #{unit.inspect} #{unit.inspect}" if VERBOSE_MESSAGES
if qty and (qty.is_a?(String) || qty.is_a?(Parslet::Slice))
@qty = qty.to_s.index('.') ? qty.to_s.to_f : qty.to_s.to_i
string = qty.to_s.gsub("'", '')
@qty = string.index('.') ? string.to_f : string.to_i
elsif qty
@qty = qty.eval
else
Expand Down Expand Up @@ -303,6 +321,7 @@ def ParseDose.from_string(string)

class ParseSubstance
attr_accessor :name, :qty, :unit, :chemical_substance, :chemical_qty, :chemical_unit, :is_active_agent, :dose, :cdose, :is_excipiens
attr_accessor :description, :more_info
def initialize(name, dose=nil)
puts "ParseSubstance.new from #{name.inspect} #{dose.inspect}" if VERBOSE_MESSAGES
@name = name.to_s.split(/\s/).collect{ |x| x.capitalize }.join(' ').strip
Expand Down Expand Up @@ -474,6 +493,9 @@ def initialize(source)
puts "ParseComposition.new from #{source.inspect} @substances #{@substances.inspect}" if VERBOSE_MESSAGES
@source = source.to_s
end
def ParseComposition.reset
@@errorHandler = HandleSwissmedicErrors.new( ErrorsToFix )
end
def ParseComposition.report
@@errorHandler.report
end
Expand All @@ -484,6 +506,9 @@ def ParseComposition.from_string(string)
cleaned = string.gsub(/^"|["\n\.]+$/, '')
value = nil
puts "ParseComposition.from_string #{string}" if VERBOSE_MESSAGES
cleaned = @@errorHandler.apply_fixes(cleaned)
puts "ParseComposition.new cleaned #{cleaned}" if VERBOSE_MESSAGES and not cleaned.eql?(string)

SubstanceTransformer.clear_substances
result = ParseComposition.new(cleaned)
parser3 = CompositionParser.new
Expand Down
105 changes: 63 additions & 42 deletions spec/parslet_spec.rb
Expand Up @@ -10,10 +10,11 @@
require 'parslet/rig/rspec'

RunAllCompositionsTests = false # takes over a minute!
RunAllTests = true
RunFailingSpec = false
RunMostImportantParserTests = true
RunExcipiensTest = true
RunDoseTests = true
RunAllTests = true
RunMostImportantParserTests = true
TryRun = true

describe HandleSwissmedicErrors do
Expand All @@ -33,17 +34,17 @@
context 'should be used when calling ParseComposition' do
replacement = '\1, \2'
test_string = 'sulfuris D6 2,2 mg hypericum perforatum D2 0,66'
composition = ParseComposition.from_string(test_string)
report = ParseComposition.reset
composition = ParseComposition.from_string(test_string).clone
report = ParseComposition.report
specify { expect(composition.substances.size).to eq 2 }
specify { expect(composition.substances.first.name).to eq 'Sulfuris D6' }
specify { expect(composition.substances.last.name).to eq 'Hypericum Perforatum D2' }
report = ParseComposition.report
specify { expect(/report/i.match(report[0]).class).to eq MatchData }
specify { expect(report[1].index(replacement).class).to eq Fixnum }
end


end
end if RunDoseTests

def run_composition_tests(strings)
strings.each {
Expand All @@ -70,6 +71,12 @@ def run_substance_tests(hash_string_to_name)

describe ParseDose do

context "should return correct dose for '50'000 U.I.' (number has ')" do
dose = ParseDose.from_string("50'000 U.I.")
specify { expect(dose.qty).to eq 50000.0 }
specify { expect(dose.unit).to eq 'U.I.' }
end if RunMostImportantParserTests

context "should return correct dose for '3,45' (number has comma, no decimal point)" do
dose = ParseDose.from_string("3,45")
specify { expect(dose.qty).to eq 3.45 }
Expand Down Expand Up @@ -171,7 +178,7 @@ def run_substance_tests(hash_string_to_name)
specify { expect(dose.unit).to eq 'g' }
end if RunFailingSpec

end
end if RunDoseTests


describe ParseSubstance do
Expand Down Expand Up @@ -286,7 +293,7 @@ def run_substance_tests(hash_string_to_name)

"conserv.: E 217, E 219, natrii dehydroacetas" => "E 217",
"excipiens ad solutionem pro 1 ml corresp. 50 µg pro dosi" => "Excipiens Ad Solutionem Pro 1 Ml Corresp. 50 µg Pro Dosi",
"acari allergeni extractum 50'000 U.:" => 'Acari Allergeni Extractum',
"acari allergeni extractum 50'000 U." => 'Acari Allergeni Extractum',
"acari allergeni extractum (acarus siro) 50'000 U." => 'Acari Allergeni Extractum (acarus Siro)',
'silybum marianum D3 0.3 ml ad solutionem pro 2 ml' => "Silybum Marianum D3",
"excipiens ad solutionem pro 1 ml corresp. 50 µg pro dosi" => "line #{__LINE__}",
Expand All @@ -301,18 +308,58 @@ def run_substance_tests(hash_string_to_name)
run_substance_tests(failing_tests) if RunFailingSpec
run_substance_tests(excipiens_tests) if RunExcipiensTest
# run_substance_tests(tests) if RunAllTests
# run_substance_tests( { "acari allergeni extractum 50'000 U.:" => 'Acari Allergeni Extractum', })
run_composition_tests( ["acari allergeni extractum 50'000 U.",
"pollinis allergeni extractum 50'000 U.: fraxinus excelsior, conserv.: phenolum, excipiens ad solutionem pro 1 ml."]) if false
if RunMostImportantParserTests
context "should return correct substance for given with et (IKSNR 11879)" do
string = "calcii lactas pentahydricus 25 mg et calcii hydrogenophosphas anhydricus 300 mg"
composition = ParseComposition.from_string(string)
specify { expect(composition.substances.size).to eq 2 }
pentahydricus = composition.substances.find{ |x| /pentahydricus/i.match(x.name) }
anhydricus = composition.substances.find{ |x| /anhydricus/i.match(x.name) }
specify { expect(pentahydricus.name).to eq 'Calcii Lactas Pentahydricus' }
specify { expect(pentahydricus.qty).to eq 25.0}
specify { expect(pentahydricus.unit).to eq 'mg' }
specify { expect(anhydricus.name).to eq 'Calcii Hydrogenophosphas Anhydricus' }
specify { expect(anhydricus.qty).to eq 300.0 }
specify { expect(anhydricus.unit).to eq 'mg' }
end

context "should return correct substance for given with et and corresp. (IKSNR 11879)" do
string = "calcii lactas pentahydricus 25 mg et calcii hydrogenophosphas anhydricus 300 mg corresp. calcium 100 mg"

composition = ParseComposition.from_string(string)
specify { expect(composition.substances.size).to eq 3 }
calcium = composition.substances.find{ |x| /calcium/i.match(x.name) }
pentahydricus = composition.substances.find{ |x| /pentahydricus/i.match(x.name) }
anhydricus = composition.substances.find{ |x| /anhydricus/i.match(x.name) }
specify { expect(pentahydricus.name).to eq 'Calcii Lactas Pentahydricus' }
specify { expect(pentahydricus.qty).to eq 25.0}
specify { expect(pentahydricus.unit).to eq 'mg' }
specify { expect(anhydricus.name).to eq 'Calcii Hydrogenophosphas Anhydricus' }
specify { expect(anhydricus.qty).to eq 300.0 }
specify { expect(anhydricus.unit).to eq 'mg' }
specify { expect(calcium.name).to eq 'Calcium' }
specify { expect(calcium.qty).to eq 100.0 }
specify { expect(calcium.unit).to eq 'mg' }
end

context "should parse a complex composition" do
source = "Praeparatio cryodesiccata: pollinis allergeni extractum 25'000 U.: urtica dioica"
substance = ParseSubstance.from_string(source)
specify { expect(substance.name).to eq 'Pollinis Allergeni Extractum' }
specify { expect(substance.description).to eq 'Praeparatio cryodesiccata' }
end

context "should parse a complex composition" do
source = 'globulina equina (immunisé avec coeur) 8 mg'
source = 'globulina equina (immunisé avec coeur, tissu pulmonaire, reins de porcins) 8 mg'
composition = ParseSubstance.from_string(source)
end

context "should return correct substance for 9,11-linolicum " do
substance = nil; composition = nil
[ "9,11-linolicum",
# "9,11-linolicum 3.25 mg"
"9,11-linolicum 3.25 mg"
].each {
|string|
substance = ParseSubstance.from_string(string)
Expand All @@ -323,7 +370,7 @@ def run_substance_tests(hash_string_to_name)

specify { expect(substance.qty).to eq 3.25}
specify { expect(substance.unit).to eq 'mg' }
end if TryRun
end

context "should return correct substance for 'pyrazinamidum 500 mg'" do
string = "pyrazinamidum 500 mg"
Expand All @@ -339,20 +386,7 @@ def run_substance_tests(hash_string_to_name)
specify { expect(substance.name).to eq 'Xenonum(133-xe)' }
specify { expect(substance.qty).to eq 74 }
specify { expect(substance.unit).to eq 'MBq' }
end if TryRun

context "should return correct substance for given with et" do
string = "calcii lactas pentahydricus 25 mg et calcii hydrogenophosphas anhydricus 300 mg"
substance = ParseSubstance.from_string(string)
pp substance
# binding.pry
specify { expect(substance.name).to eq 'Calcii Lactas Pentahydricus' }
specify { expect(substance.qty).to eq 25.0}
specify { expect(substance.unit).to eq 'mg' }
specify { expect(substance.chemical_substance.name).to eq 'Calcii Hydrogenophosphas Ahydricus' }
specify { expect(substance.chemical_substance.qty).to eq 300.0 }
specify { expect(substance.chemical_substance.unit).to eq 'mg' }
end if TryRun
end

context "should return correct substance for 'excipiens ad solutionem pro 1 ml corresp. ethanolum 59.5 % V/V'" do
string = "excipiens ad solutionem pro 1 ml corresp. ethanolum 59.5 % V/V"
Expand All @@ -364,21 +398,7 @@ def run_substance_tests(hash_string_to_name)
specify { expect(substance.cdose.to_s).to eq ParseDose.new('59.5', '% V/V').to_s }
specify { expect(substance.qty).to eq 1.0}
specify { expect(substance.unit).to eq 'ml' }
end if TryRun

context "should return correct substance for given with et" do
string = "calcii lactas pentahydricus 25 mg et calcii hydrogenophosphas anhydricus 300 mg corresp. calcium 100 mg"
substance = ParseSubstance.from_string(string)
pp substance

composition = ParseComposition.from_string(string)
specify { expect(substance.name).to eq 'Calcii Lactas Pentahydricus' }
specify { expect(substance.qty).to eq 25.0}
specify { expect(substance.unit).to eq 'mg' }
specify { expect(substance.chemical_substance.name).to eq 'Calcii Hydrogenophosphas Ahydricus' }
specify { expect(substance.chemical_substance.qty).to eq 300.0 }
specify { expect(substance.chemical_substance.unit).to eq 'mg' }
end if TryRun
end

context "should return correct substance for 'excipiens pro compresso'" do
string = "excipiens pro compresso"
Expand All @@ -394,7 +414,7 @@ def run_substance_tests(hash_string_to_name)
specify { expect(substance.name).to eq 'Excipiens Ad Solutionem Pro 3 Ml Corresp. 50 µg' }
specify { expect(substance.qty).to eq 3.0}
specify { expect(substance.unit).to eq 'ml' }
end if TryRun
end

context "should return correct substance for 'excipiens ad pulverem pro 1000 mg'" do
string = "excipiens ad pulverem pro 1000 mg"
Expand Down Expand Up @@ -499,6 +519,7 @@ def run_substance_tests(hash_string_to_name)
specify { expect(haema.qty).to eq 25 }
specify { expect(haema.unit).to eq 'µg' }
end

if RunAllTests
context "should return correct composition for 'minoxidilum'" do
source = 'minoxidilum 2.5 mg, pyrazinamidum 500 mg'
Expand Down

0 comments on commit 48fe298

Please sign in to comment.