Skip to content

Commit

Permalink
Fixed various errors, eg. 2,2'-methylen-bis(6-tert.-butyl-4-methyl-ph…
Browse files Browse the repository at this point in the history
…enolum)
  • Loading branch information
ngiger committed Apr 23, 2015
1 parent e4f8924 commit 2d8454e
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 18 deletions.
21 changes: 17 additions & 4 deletions lib/oddb2xml/compositions_syntax.rb
Expand Up @@ -39,16 +39,22 @@ class CompositionParser < Parslet::Parser
((space? >> match['a-zA-Z']).repeat(1)).repeat(0)
} # e.g. Xenonum (133-Xe) or yttrii(90-Y) chloridum zum Kalibrierungszeitpunkt
rule(:ratio_value) { match['0-9:\-\.'].repeat(1) >> space?} # eg. ratio: 1:1, ratio: 1:1.5-2.4., ratio: 1:0.68-0.95
rule(:identifier) { (match['a-zA-Zéàèèçïöäüâ'] | digit >> str('-')) >> match['0-9a-zA-Z\-éàèèçïöäüâ\'\/\.'].repeat(0) }
# handle stuff like acidum 9,11-linolicum specially. it must contain at least one a-z

# handle stuff like acidum 9,11-linolicum or 2,2'-methylen-bis(6-tert.-butyl-4-methyl-phenolum) specially. it must contain at least one a-z
rule(:umlaut) { match(['éàèèçïöäüâ']) }
rule(:identifier_D12) { match['a-zA-Z'] >> match['0-9'].repeat(1) }
rule(:identifier) { str('spag.') | str('spp.') | str('A + B') | str('ca.') | str('var.') | str('spec.') | identifier_D12 | identifier_without_comma }
rule(:identifier) { str('spag.') | str('spp.') | str('ssp.') |
str('A + B') | str('ca.') | str('var.') | str('spec.') >>
identifier_D12 | identifier_without_comma | identifier_with_comma
}

rule(:identifier_with_comma) {
match['0-9,\-'].repeat(0) >> (match['a-zA-Z']|umlaut) >> (match(['_,']).maybe >> (match['0-9a-zA-Z\-\'\/'] | umlaut)).repeat(0)
}

rule(:identifier_without_comma) {
match['0-9,\-'].repeat(0) >> (match['a-zA-Z']|umlaut) >> (match(['_']).maybe >> (match['0-9a-zA-Z\-\'\/'] | umlaut)).repeat(0)
match['0-9\',\-'].repeat(0) >> (match['a-zA-Z']|umlaut) >> (match(['_']).maybe >> (match['0-9a-zA-Z\-\'\/'] | umlaut)).repeat(0) >>
lparen >> (rparen.absent? >> any).repeat(1) >> rparen
}
rule(:one_word) { match['a-zA-Z'] >> match['0-9'].repeat(1) | match['a-zA-Z'].repeat(1) }
rule(:in_parent) { lparen >> one_word.repeat(1) >> rparen }
Expand All @@ -60,6 +66,7 @@ class CompositionParser < Parslet::Parser
str('g/l') |
str('g/L') |
str('% V/V') |
str('µg/24 h') |
str('µg/g') |
str('µg') |
str('guttae') |
Expand Down Expand Up @@ -113,6 +120,7 @@ class CompositionParser < Parslet::Parser
# Grammar parts
rule(:useage) { (any >> str('berzug:')) | # match Überzug
str('antiox.:') |
str('potenziert mit.:') |
str('arom.:') |
str('conserv.:') |
str('color.:')
Expand All @@ -134,7 +142,9 @@ class CompositionParser < Parslet::Parser
str('Mio ') |
str('et ') |
str('ut ') |
str('Beutel: ') |
str('ut alia: ') |
str('per centum ') |
str('pro dosi') |
str('pro capsula') |
(digits.repeat(1) >> space >> str(':')) | # match 50 %
Expand Down Expand Up @@ -174,6 +184,7 @@ class CompositionParser < Parslet::Parser
}
rule(:simple_substance) { substance_name.as(:substance_name) >> space? >> dose.as(:dose).maybe}
rule(:simple_subtance_with_digits_in_name_and_dose) {
substance_lead.maybe >> space? >>
(name_without_parenthesis >> space? >> ((digits.repeat(1) >> (str(' %') | str('%')) | digits.repeat(1)))).as(:substance_name) >>
space >> dose_with_unit.as(:dose)
}
Expand Down Expand Up @@ -205,6 +216,7 @@ class CompositionParser < Parslet::Parser
str('aqua q.s. ad suspensionem pro ') |
str('q.s. ad pulverem pro ') |
str('pro vase ') |
str('per centum ') |
str('excipiens ad emulsionem pro ') |
str('excipiens ad pulverem pro ') |
str('aqua ad iniectabilia q.s. ad solutionem pro ')
Expand Down Expand Up @@ -232,6 +244,7 @@ class CompositionParser < Parslet::Parser
}

rule(:substance_lead) { useage.as(:more_info) >> space? |
str('Beutel:').as(:more_info) >> space? |
str('residui:').as(:more_info) >> space? |
str('mineralia').as(:mineralia) >> str(':') >> space? |
str('Solvens:').as(:solvens) >> space? |
Expand Down
12 changes: 12 additions & 0 deletions lib/oddb2xml/parslet_compositions.rb
Expand Up @@ -396,6 +396,18 @@ def CompositionTransformer.corresp
@@excipiens = dose
@@substances << substance
}
rule(:substance_name => simple(:substance_name),
:dose => simple(:dose),
:dose_pro => simple(:dose_pro),
) {
|dictionary|
puts "#{File.basename(__FILE__)}:#{__LINE__}: dictionary #{dictionary}" if VERBOSE_MESSAGES
dose = dictionary[:dose_pro].is_a?(ParseDose) ? dictionary[:dose_pro] : ParseDose.new(dictionary[:dose_pro].to_s)
dose_pro = dictionary[:dose_pro].is_a?(ParseDose) ? dictionary[:dose_pro] : ParseDose.new(dictionary[:dose_pro].to_s)
substance = ParseSubstance.new(dictionary[:substance_name], dose)
@@excipiens = dose_pro
@@substances << substance
}

rule(:dose_pro => simple(:dose_pro),
) {
Expand Down
18 changes: 10 additions & 8 deletions spec/composition_syntax_spec.rb
Expand Up @@ -8,18 +8,18 @@
RunAllParsingExamples = false # Takes over 3 minutes to run, all the other ones just a few seconds
GoIntoPry = false
describe CompositionParser do
let(:parser) { CompositionParser.new }
context "identifier parsing" do
let(:identifier_parser) { parser.identifier }
let(:identifier_parser) { parser.substance }

it "parses identifier" do
res1 = identifier_parser.parse_with_debug('D2')
res1 = identifier_parser.parse_with_debug("acidum lacticum 90 % 4.55 mg")
pp res1
res2 = identifier_parser.parse_with_debug('calendula' )
pp res2
binding.pry
end
end if false

end
describe CompositionParser do
let(:parser) { CompositionParser.new }
context "should help me find problems" do
let(:substance_name_parser) { parser.substance_name }
Expand Down Expand Up @@ -258,10 +258,10 @@
expect(identifier_parser).to parse("calcium")
expect(identifier_parser).to parse("D2")
expect(identifier_parser).to parse("9,11-linolicum")
expect(identifier_parser).to parse("xenonum(133-Xe)")
expect(identifier_parser).to_not parse("10")
expect(identifier_parser).to_not parse("pro asdf")
expect(identifier_parser).to_not parse("calcium,")
expect(identifier_parser).to_not parse("xenonum(133-Xe)")
end
end

Expand Down Expand Up @@ -324,6 +324,8 @@
should_pass = [
'calcium',
'calcium 10 mg',
'ferrum-quarz 50% 20 mg',
'macrogolum 3350',
'pollinis allergeni extractum (Phleum pratense) 10 U.',
'phenoxymethylpenicillinum kalicum 1 U.I.',
'phenoxymethylpenicillinum kalicum 1 Mio. U.I.',
Expand Down Expand Up @@ -352,7 +354,6 @@
'calcium',
# 'macrogolum 3350',
'calendula officinalis D2',
# 'ferrum-quarz 50%',
'pollinis allergeni extractum (Phleum pratense)',
'retinoli palmitas',
].each {
Expand Down Expand Up @@ -382,6 +383,7 @@
context "simple_substance parsing" do
let(:simple_substance_parser) { parser.simple_substance }
should_pass = [
"2,2'-methylen-bis(6-tert.-butyl-4-methyl-phenolum)",
"calcium part_b",
"calcium 10",
"calcium 10 mg",
Expand Down Expand Up @@ -496,4 +498,4 @@
end

end
end
end
44 changes: 38 additions & 6 deletions spec/parslet_spec.rb
Expand Up @@ -10,14 +10,14 @@
require 'parslet/rig/rspec'

hostname = Socket.gethostbyname(Socket.gethostname).first
RunAllCompositionsTests = /travis/i.match(hostname) != nil # takes about five minutes to run!
RunAllCompositionsTests = /travis|localhost/i.match(hostname) != nil # takes about five minutes to run!
puts "hostname is #{hostname} RunAllCompositionsTests #{RunAllCompositionsTests}"
# Testing whether 8937 composition lines can be parsed. Found 380 errors in 293 seconds
# 520 examples, 20 failures, 1 pending

RunCompositionExamples = true
RunSubstanceExamples = true
RunFailingSpec = true
RunFailingSpec = false
RunExcipiensTest = true
RunSpecificTests = true
RunMostImportantParserTests = true
Expand All @@ -27,17 +27,49 @@
VERBOSE_MESSAGES = true
pp composition; binding.pry
)

end

describe ParseComposition do
context "allow substance complicated, calculated dose '6.0 +/-1.2 µg'" do
string =
"piscis oleum 500 mg corresp. acida carboxylica omega-3 oligoinsaturata 150 mg ut acidum eicosapentaenoicum 90 mg et acidum docosahexaenoicum 60 mg, excipiens pro capsula"
composition = ParseComposition.from_string(string)
pp composition; binding.pry
specify { expect(composition.substances.first.name).to eq 'Magnesii Aspartas Dihydricus' } # TODO:
specify { expect(composition.substances.first.chemical_substance.name).to eq 'Magnesium' } # TODO:
specify { expect(composition.substances.size).to eq 5 }
end

context "allow substance complicated, calculated dose '6.0 +/-1.2 µg'" do
string =
"enzephalitidis japanensis virus antigenum (Stamm: SA-14-2) 6.0 +/-1.2 µg, aluminium ut aluminii oxidum hydricum, kalii dihydrogenophosphas, dinatrii phosphas anhydricus, natrii chloridum, aqua q.s. ad solutionem pro 0.5 ml"
composition = ParseComposition.from_string(string)
pp composition; binding.pry
specify { expect(composition.substances.first.name).to eq 'Magnesii Aspartas Dihydricus' }
specify { expect(composition.substances.first.chemical_substance.name).to eq 'Magnesium' }
specify { expect(composition.substances.size).to eq 5 }
end
end if RunFailingSpec

describe ParseComposition do
context "allow 2,2'-methylen-bis(6-tert.-butyl-4-methyl-phenolum)" do
string = "2,2'-methylen-bis(6-tert.-butyl-4-methyl-phenolum)"
composition = ParseComposition.from_string(string)
specify { expect(composition.substances.first.name).to eq "2,2'-methylen-bis(6-tert.-butyl-4-methyl-phenolum)" }
specify { expect(composition.substances.size).to eq 1 }
end

context "allow substancename with 90 % " do
string =
"acidum nitricum 70 per centum 580.66 mg, acidum aceticum glaciale 41.08 mg, acidum oxalicum dihydricum 57.32 mg, acidum lacticum 90 % 4.55 mg, cupri(II) nitras trihydricus 0.048 mg, excipiens ad solutionem pro 1 ml"
string = "acidum lacticum 90 % 4.55 mg"
composition = ParseComposition.from_string(string)
specify { expect(composition.substances.first.name).to eq 'Acidum Lacticum 90 %' }
specify { expect(composition.substances.first.dose.to_s).to eq '4.55 mg' }
composition = ParseComposition.from_string(string)
specify { expect(composition.substances.size).to eq 5 }
substance = composition.substances.find{ |x| /lacticum/i.match(x.name) }
specify { expect(composition.substances.first.name).to eq "Acidum Nitricum" }
specify { expect(composition.substances.first.dose.to_s).to eq "580.66 mg/ml" }
specify { expect(substance.name).to eq 'Acidum Lacticum 90 %' }
specify { expect(substance.dose.to_s).to eq '4.55 mg/ml' }
end
context "allow substance with two corresp" do
string = "magnesii aspartas dihydricus 3.895 g corresp. magnesium 292 mg corresp. 12 mmol, arom.: bergamottae aetheroleum et alia, natrii cyclamas, saccharinum natricum, excipiens ad granulatum pro charta"
Expand Down

0 comments on commit 2d8454e

Please sign in to comment.