Skip to content

Commit

Permalink
Parsing Pseudo_FI works for german
Browse files Browse the repository at this point in the history
  • Loading branch information
ngiger committed Mar 3, 2014
1 parent a4455f2 commit 8b701ef
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .gitignore
@@ -1,7 +1,7 @@
.bundle
.rbenv-version
.ruby-version

.lock*
data/csv/
data/downloads/
data/html/
Expand Down
104 changes: 104 additions & 0 deletions ext/fiparse/src/textinfo_pseudo_fachinfo.rb
@@ -0,0 +1,104 @@
#!/usr/bin/env ruby
# encoding: utf-8

require 'model/text'
require 'util/logfile'

module ODDB
module FiParse
class TextinfoPseudoFachinfo
LANGUAGES = [:de, :fr]
LOCALIZED_CHAPTER_EXPRESSION = {
:de => {
:composition => /^Zusammensetzung|Wirkstoffe|Hilsstoffe/u, # 2
# :indications => /^Indikationen(\s+|\s*(\/|und)\s*)Anwendungsmöglichkeiten$/u, # 4
:usage => /^Dosierung\s*(\/|und)\s*Anwendung/u, # 5
:contra_indications => /^Kontraindikationen($|\s*\(\s*absolute\s+Kontraindikationen\s*\)$)/u, # 6
:restrictions => /^Warnhinweise\s+und\s+Vorsichtsmassnahmen($|\s*\/\s*(relative\s+Kontraindikationen|Warnhinweise\s*und\s*Vorsichtsmassnahmen)$)/u, # 7
:interactions => /^Interaktionen$/u, # 8
:unwanted_effects => /^Unerw.nschte Wirkungen/, # 11
:effects => /^Eigenschaften\s*\/\s*Wirkungen($|\s*\(\s*(ATC\-Code|Wirkungsmechanismus|Pharmakodyamik|Klinische\s+Wirksamkeit)\s*\)\s*$)/iu, # 13
:other_advice => /^Sonstige\s*Hinweise($|\s*\(\s*(Inkompatibilitäten|Beeinflussung\s*diagnostischer\s*Methoden|Haltbarkeit|Besondere\s*Lagerungshinweise|Hinweise\s+für\s+die\s+Handhabung)\s*\)$)/u, # 16
:packages => /^Packungen($|\s*\(\s*mit\s+Angabe\s+der\s+Abgabekategorie\s*\)$)/u, # 18
:date => /^Stand\s+der\s+Information$/iu, # 20
:fabrication => /^Herstellerin/u,
:distributor => /^Vertriebsfirma/u,
:date => /^Stand der Information/u,
},
:fr => {
:composition => /^Composition$/u, # 2
# :indications => /^Indications/u, # 4
:usage => /^Posologiei/u, # 5
:contra_indications => /^Contre\-indications/iu, # 6
:restrictions => /^Mises/u, # 7
:interactions => /^Interactions/u, # 8
:unwanted_effects => /^Effets/u, # 11
:effects => /^Propriétés/iu, # 13
:other_advice => /^Remarques/u, # 16
:iksnrs => /^Numéro\s+dautorisation$/u, # 17
:packages => /^Présentation/iu, # 18
:registration_owner => /^Titulaire\s+de\s+lautorisation$/u, # 19
:date => /^Mise à jour/iu, # 20
:fabrication => /^Fabricant$/u,
:distributor => /^Distributeur/u,
:date => /^Mise à jour de l’information/u,
}
}
def to_textinfo(allChapters)
fi = PseudoFachinfoDocument.new
allChapters.each{ |name, chapter| eval "fi.#{name.to_s} = chapter"; }
fi
end
def extract(docx_file)
LogFile.debug("extract #{docx_file.path} #{File.exists?(docx_file)}")
return false unless File.exists?(docx_file)
doc = YDocx::Document.open(docx_file, {:format => :plain})
xml_file = docx_file.path.sub('.docx', '.xml')
doc.to_xml(xml_file)
doc = Nokogiri::XML(open(xml_file))
lang = nil
doc.xpath("//chapters/chapter/heading").each {
|heading|
LANGUAGES.each {|try_lang| LOCALIZED_CHAPTER_EXPRESSION[try_lang].each {
|chapter, expression|
if heading.text.match(LOCALIZED_CHAPTER_EXPRESSION[try_lang][chapter])
lang = try_lang
break
end
}
break if lang
}
break if lang
}
LogFile.debug("lang #{lang.inspect}")
return nil unless lang
allChapters = {}
# require 'pry'; binding.pry
doc.xpath("//heading").each {
|chapter|
LOCALIZED_CHAPTER_EXPRESSION[lang].each {
|name, expression|
if name.match(/interaction/i)
# require 'pry'; binding.pry
end
if chapter.text.match(LOCALIZED_CHAPTER_EXPRESSION[lang][name])
txtChapter = Text::Chapter.new
txtChapter.heading = name.to_s.strip
doc.xpath("//chapters/chapter[contains(heading, '#{chapter.text}')]/paragraph").each{
|para|
inhalt=para.text
txtChapter.next_section.next_paragraph << inhalt
allChapters[name] = txtChapter
}
end
}
}
info = self.to_textinfo(allChapters)
info.iksnrs = []
info.packages.paragraphs.each{ |pack| m=pack.match(/\d{13}/); info.iksnrs << m[0] if m } if info.packages
info
end
end
private
end
end
Binary file added ext/fiparse/test/data/docx/Sinovial_DE.docx
Binary file not shown.
Binary file added ext/fiparse/test/data/docx/Sinovial_FR.docx
Binary file not shown.
Binary file added ext/fiparse/test/data/docx/simple.docx
Binary file not shown.
74 changes: 74 additions & 0 deletions ext/fiparse/test/test_pseudo_fachinfo.rb
@@ -0,0 +1,74 @@
#!/usr/bin/env ruby
# encoding: utf-8
# FiParse::TestPatinfoHpricot -- oddb -- 09.04.2012 -- yasaka@ywesee.com
# FiParse::TestPatinfoHpricot -- oddb -- 17.08.2006 -- hwyss@ywesee.com

require 'hpricot'

$: << File.expand_path('../src', File.dirname(__FILE__))
$: << File.expand_path('../../../src', File.dirname(__FILE__))
$: << File.expand_path('../../../test', File.dirname(__FILE__))

gem 'minitest'
require 'minitest/autorun'
require 'fachinfo_hpricot'
require 'fiparse'
require 'textinfo_pseudo_fachinfo'
require 'plugin/text_info'
require 'stub/cgi'
require 'flexmock'

module ODDB
class FachinfoDocument
def odba_id
1
end
end
module FiParse
class TestPseudoFachinfoDocument <Minitest::Test
def test_fachinfo_simple
@@path = File.expand_path('data/docx/simple.docx', File.dirname(__FILE__))
@@writer = TextinfoPseudoFachinfo.new
open(@@path) { |fh| @@fachinfo = @@writer.extract(fh)}
assert_instance_of(PseudoFachinfoDocument, @@fachinfo)
assert_equal( @@fachinfo.date.paragraphs.first.to_s, 'April 2010.')
assert_equal( 2, @@fachinfo.composition.paragraphs.size)
assert_equal( @@fachinfo.composition.paragraphs.first.to_s, '1 vorgefüllte 2,25 ml-Einweg-Fertigspritze aus Glas enthält: 32 mg Hyaluronsäure-Natriumsalz in 2 ml gepufferter physiologischer Lösung.')
assert_equal( @@fachinfo.composition.paragraphs[1].to_s, 'Der Inhalt der Spritzen ist steril und pyrogenfrei.')
end

def test_fachinfo_sinovial_DE
@@path = File.expand_path('data/docx/Sinovial_DE.docx', File.dirname(__FILE__))
@@writer = TextinfoPseudoFachinfo.new
open(@@path) { |fh| @@fachinfo = @@writer.extract(fh)}
assert_instance_of(PseudoFachinfoDocument, @@fachinfo)
assert_equal( @@fachinfo.date.paragraphs.first.to_s, 'April 2010.')
assert_equal( 5, @@fachinfo.composition.paragraphs.size)
ODDB::PseudoFachinfoDocument::CHAPTERS.each {
|chapter|
next if chapter == :unwanted_effects
cmd ="assert(@@fachinfo.#{chapter.to_s} != nil, '@@fachinfo.#{chapter.to_s} may not be nil')"
eval cmd
}
assert_equal(["7612291078458", "7612291078472"], @@fachinfo.iksnrs)
end

def test_fachinfo_sinovial_FR
@@path = File.expand_path('data/docx/Sinovial_FR.docx', File.dirname(__FILE__))
@@writer = TextinfoPseudoFachinfo.new
open(@@path) { |fh| @@fachinfo = @@writer.extract(fh)}
assert_instance_of(PseudoFachinfoDocument, @@fachinfo)
assert(@@fachinfo.date)
assert_equal( @@fachinfo.date.paragraphs.first.to_s, 'Avril 2010.')
assert_equal( 5, @@fachinfo.composition.paragraphs.size)
ODDB::PseudoFachinfoDocument::CHAPTERS.each {
|chapter|
next if chapter == :unwanted_effects
cmd ="assert(@@fachinfo.#{chapter.to_s} != nil, '@@fachinfo.#{chapter.to_s} may not be nil')"
eval cmd
}
assert_equal(["7612291078458", "7612291078472"], @@fachinfo.iksnrs)
end
end
end
end
2 changes: 1 addition & 1 deletion src/model/fachinfo.rb
Expand Up @@ -246,7 +246,7 @@ class PseudoFachinfoDocument < FachinfoDocument
attr_accessor :contra_indications, :distributor
CHAPTERS = [
:composition,
:indications,
# :indications,
:usage,
:contra_indications,
:restrictions,
Expand Down

0 comments on commit 8b701ef

Please sign in to comment.