Skip to content

Commit

Permalink
Adapted parser to PseudoFachinfo
Browse files Browse the repository at this point in the history
  • Loading branch information
ngiger committed Mar 3, 2014
1 parent 607e275 commit 33682cd
Show file tree
Hide file tree
Showing 10 changed files with 273 additions and 247 deletions.
4 changes: 3 additions & 1 deletion ext/fiparse/src/textinfo_pseudo_fachinfo.rb
Expand Up @@ -7,6 +7,7 @@
module ODDB
module FiParse
class TextinfoPseudoFachinfo
attr_accessor :name
LANGUAGES = [:de, :fr]
LOCALIZED_CHAPTER_EXPRESSION = {
:de => {
Expand Down Expand Up @@ -93,7 +94,8 @@ def extract(docx_file)
info = self.to_textinfo(allChapters)
info.iksnrs = []
info.packages.paragraphs.each{ |pack| m=pack.match(/\d{13}/); info.iksnrs << m[0] if m } if info.packages
info
info.name = doc.xpath("//paragraph").first.text.gsub("\n",'').gsub(/\s+/,' ').gsub(' ®','®').strip
info
end
end
private
Expand Down
3 changes: 3 additions & 0 deletions ext/fiparse/test/test_pseudo_fachinfo.rb
Expand Up @@ -31,6 +31,7 @@ def test_fachinfo_simple
@@writer = TextinfoPseudoFachinfo.new
open(@@path) { |fh| @@fachinfo = @@writer.extract(fh)}
assert_instance_of(PseudoFachinfoDocument, @@fachinfo)
assert_equal('Sinovial® HighVisc 1,6%' , @@fachinfo.name)
assert_equal(2, @@fachinfo.composition.paragraphs.size)
assert_equal('Zusammensetzung', @@fachinfo.composition.heading.to_s)
assert_equal('1 vorgefüllte 2,25 ml-Einweg-Fertigspritze aus Glas enthält: 32 mg Hyaluronsäure-Natriumsalz in 2 ml gepufferter physiologischer Lösung.', @@fachinfo.composition.paragraphs.first.to_s)
Expand All @@ -44,6 +45,7 @@ def test_fachinfo_sinovial_FR
open(@@path) { |fh| @@fachinfo = @@writer.extract(fh)}
assert_instance_of(PseudoFachinfoDocument, @@fachinfo)
assert(@@fachinfo.date)
assert_equal('Sinovial® HighVisc 1,6%' , @@fachinfo.name)
assert_equal( @@fachinfo.date.paragraphs.first.to_s, 'Avril 2010.')
ODDB::PseudoFachinfoDocument::CHAPTERS.each {
|chapter|
Expand All @@ -59,6 +61,7 @@ def test_fachinfo_sinovial_DE
@@writer = TextinfoPseudoFachinfo.new
open(@@path) { |fh| @@fachinfo = @@writer.extract(fh)}
assert_instance_of(PseudoFachinfoDocument, @@fachinfo)
assert_equal('Sinovial® HighVisc 1,6%' , @@fachinfo.name)
assert_equal( @@fachinfo.date.paragraphs.first.to_s, 'April 2010.')
ODDB::PseudoFachinfoDocument::CHAPTERS.each {
|chapter|
Expand Down
3 changes: 1 addition & 2 deletions src/model/fachinfo.rb
Expand Up @@ -243,10 +243,9 @@ class FachinfoDocument2001 < FachinfoDocument
]
end
class PseudoFachinfoDocument < FachinfoDocument
attr_accessor :contra_indications, :distributor
attr_accessor :contra_indications, :distributor, :name
CHAPTERS = [
:composition,
# :indications,
:usage,
:contra_indications,
:restrictions,
Expand Down
119 changes: 16 additions & 103 deletions src/plugin/medical_products.rb
Expand Up @@ -2,69 +2,22 @@
# encoding: UTF-8
$: << File.expand_path('../..', File.dirname(__FILE__))
$: << File.expand_path('../../src', File.dirname(__FILE__))
$: << File.expand_path('../../ext/fiparse/src', File.dirname(__FILE__))

require 'plugin/plugin'
require 'model/text'
require 'util/oddbconfig'
require 'util/persistence'
require 'drb'
require 'ydocx'
require 'src/plugin/text_info'
require 'ydocx/templates/fachinfo'
require 'textinfo_pseudo_fachinfo'

module ODDB
class MedicalProductPlugin < Plugin
@@errors = []
@@products = []
ChapterDef = {
'de' => {
'name' => /^Name\s+des\s+Präparates$/u, # 1
'composition' => /^Zusammensetzung|Wirkstoffe|Hilsstoffe/u, # 2
'galenic_form' => /^Galenische\s+Form\s*(und|\/)\s*Wirkstoffmenge\s+pro\s+Einheit$/iu, # 3
'indications' => /^Indikationen(\s+|\s*(\/|und)\s*)Anwendungsmöglichkeiten$/u, # 4
'usage' => /^Dosierung\s*(\/|und)\s*Anwendung/u, # 5
'contra_indications' => /^Kontraindikationen($|\s*\(\s*absolute\s+Kontraindikationen\s*\)$)/u, # 6
'restrictions' => /^Warnhinweise\s+und\s+Vorsichtsmassnahmen($|\s*\/\s*(relative\s+Kontraindikationen|Warnhinweise\s*und\s*Vorsichtsmassnahmen)$)/u, # 7
'interactions' => /^Interaktionen$/u, # 8
'pregnancy' => /^Schwangerschaft(,\s*|\s*\/\s*|\s+und\s+)Stillzeit$/u, # 9
'driving_ability' => /^Wirkung\s+auf\s+die\s+Fahrtüchtigkeit\s+und\s+auf\s+das\s+Bedienen\s+von\s+Maschinen$/u, # 10
'unwanted_effects' => /^Unerwünschte\s+Wirkungen$/u, # 11
'overdose' => /^Überdosierung$/u, # 12
'effects' => /^Eigenschaften\s*\/\s*Wirkungen($|\s*\(\s*(ATC\-Code|Wirkungsmechanismus|Pharmakodyamik|Klinische\s+Wirksamkeit)\s*\)\s*$)/iu, # 13
'kinetic' => /^Pharmakokinetik($|\s*\((Absorption,\s*Distribution,\s*Metabolisms,\s*Elimination\s|Kinetik\s+spezieller\s+Patientengruppen)*\)$)/iu, # 14
'preclinic' => /^Präklinische\s+Daten$/u, # 15
'other_advice' => /^Sonstige\s*Hinweise($|\s*\(\s*(Inkompatibilitäten|Beeinflussung\s*diagnostischer\s*Methoden|Haltbarkeit|Besondere\s*Lagerungshinweise|Hinweise\s+für\s+die\s+Handhabung)\s*\)$)|^Remarques/u, # 16
'iksnrs' => /^Zulassungsnummer(n|:|$|\s*\(\s*Swissmedic\s*\)$)/u, # 17
'packages' => /^Packungen($|\s*\(\s*mit\s+Angabe\s+der\s+Abgabekategorie\s*\)$)/u, # 18
'registration_owner' => /^Zulassungsinhaberin($|\s*\(\s*Firma\s+und\s+Sitz\s+gemäss\s*Handelsregisterauszug\s*\))/u, # 19
'date' => /^Stand\s+der\s+Information$/iu, # 20
'fabrication' => /^Herstellerin/u,
'distributor' => /^Vertriebsfirma/u,
},
'fr' => {
'name' => /^Nom$/u, # 1
'composition' => /^Composition$/u, # 2
'galenic_form' => /^Forme\s+galénique\s+et\s+quantité\s+de\s+principe\s+actif\s+par\s+unité|^Forme\s*gal.nique/iu, # 3
'indications' => /^Indications/u, # 4
'usage' => /^Posologiei/u, # 5
'contra_indications' => /^Contre\-indications/iu, # 6
'restrictions' => /^Mises/u, # 7
'interactions' => /^Interactions/u, # 8
'pregnancy' => /^Grossesse\s*\/\s*Allaitement/u, # 9
'driving_ability' => /^Effet\s+sur\s+l'aptitude\s+&agrave;\s+la\s+conduite\s+et\s+l'utilisation\s+de\s+machines/u, # 10
'unwanted_effects' => /^Effets/u, # 11
'overdose' => /^Surdosage$/u, # 12
'effects' => /^Propriétés/iu, # 13
'kinetic' => /^Pharmacocinétique$/iu, # 14
'preclinic' => /^Données\s+précliniques$/u, # 15
'other_advice' => /^Remarques/u, # 16
'iksnrs' => /^Numéro\s+d'autorisation$/u, # 17
'packages' => /^Présentation/iu, # 18
'registration_owner' => /^Titulaire\s+de\s+l'autorisation$/u, # 19
'date' => /^Mise à jour/iu, # 20
'fabrication' => /^Fabricant$/u,
'distributor' => /^Distributeur/u,
}
}

def initialize(app, opts = {:files => ['*.docx'], :lang => 'de'})
super(app)
@options = opts
Expand All @@ -82,82 +35,42 @@ def report
def update
@options[:lang] = 'de' unless @options[:lang]
lang = @options[:lang].to_s
data_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', defined?(Minitest) ? 'test' : '.', 'data', 'medical_products', lang))
data_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', defined?(Minitest) ? 'test' : '.', 'data', 'medical_products'))
pp data_dir
LogFile.debug "file #{@options[:files]} lang #{lang} #{lang.class} YDocx #{YDocx::VERSION}"
@options[:files].each{
|param|
files = (Dir.glob(param) + Dir.glob(File.join(data_dir, param))).collect{ |file| File.expand_path(file) }.uniq
files.each {
|file|
LogFile.debug "file is #{file}"
doc = YDocx::Document.open(file, {:lang => @options[:lang]})
xml_file = file.sub('.docx', '.xml')
doc.to_xml(xml_file, {})
out = doc.output_file('xml')
doc_xml = Nokogiri::XML(open(xml_file))
chapters = {}
ean13s = []
parts = {}
number = -1
name = doc_xml.xpath('//chapters/paragraph').text
doc_xml.xpath('//chapters/chapter').each{
|x|
next if x.xpath('heading').size == 0;
chapterName = nil
localizedName = x.xpath('heading').text
ChapterDef[lang].each{
|key, value|
if localizedName.match(value)
chapterName = key
break
end
}
next unless chapterName
chapters[chapterName] = x.xpath('paragraph').text
if chapterName.match(/packages|other_advice/)
x.xpath('paragraph').each {
|p| m = p.text.match(/(\d{13})($|\s|\W)/); ean13s << m[1] if m
m2 = p.text.match(/(\d{13})($|\s|\W)(.+)(\d+)\s+(\w+)/)
LogFile.debug "#{lang} parts m2 #{m2.inspect}" if m2
parts[m2[1]] = [m2[3].strip, m2[4].strip, m2[5].strip ] if m2
}
end
}
unless ean13s.size > 0
msg = "File #{file} does not contain a chapter Packages with an ean13 inside"
@@errors << msg
LogFile.debug "#{msg}"
next
end
distributor = chapters['distributor']
idx = distributor.index(/,|\n/)
distributor = distributor[0..idx-1]
reg = nil
ean13s.each{
LogFile.debug "file is #{file}"
writer = ODDB::FiParse::TextinfoPseudoFachinfo.new
fachinfo = nil
open(file) { |fh| fachinfo = writer.extract(fh)}
fachinfo.iksnrs.each{
|ean|
number = ean[2..2+6] # 7 digits
packNr = ean[9..11] # 3 digits
info = SwissmedicMetaInfo.new(number, nil, name, distributor, nil)
info = SwissmedicMetaInfo.new(number, nil, fachinfo.name, fachinfo.distributor, nil)
reg = TextInfoPlugin::create_registration(@app, info, '00', packNr)
@@products << "#{lang} #{number} #{packNr}: #{name}"
@@products << "#{lang} #{number} #{packNr}: #{fachinfo.name}"
if parts[ean]
package = reg.sequence('00').package(packNr)
pInfo = parts[ean]
pSize = "#{pInfo[0]} #{pInfo[1]} #{pInfo[2]}"
end
}
parser = DRb::DRbObject.new nil, FIPARSE_URI
registration = @app.registration(number)
unless registration
@app.registrations.store(number, reg)
@app.registrations.odba_store
registration = @app.registration(number)
end
parsed_info = parser.parse_fachinfo_docx(file, number, lang)
fachinfo = nil
fachinfo ||= TextInfoPlugin::store_fachinfo(@app, reg, {lang => parsed_info})
TextInfoPlugin::replace_textinfo(@app, fachinfo, reg, :fachinfo)
ean13s.each{
TextInfoPlugin::replace_textinfo(@app, fachinfo, registration, :fachinfo)
fachinfo.iksnrs.each{
|ean|
number = ean[2..2+6] # 7 digits
packNr = ean[9..11] # 3 digits
Expand All @@ -172,7 +85,7 @@ def update
if oldParts == nil or oldParts.size == 0
newPart = package.create_part
elsif oldParts.size != 1
msg = "Found #{oldParts.size} parts. Problem in database with #{lang} #{number} #{packNr}: #{name}"
# msg = "Found #{oldParts.size} parts. Problem in database with #{lang} #{number} #{packNr}: #{fachinfo.name}"
@@errors << msg
LogFile.debug "#{msg}"
next
Expand Down
1 change: 1 addition & 0 deletions test/data/medical_products
Binary file removed test/data/medical_products/de/Sinovial_DE.docx
Binary file not shown.
1 change: 0 additions & 1 deletion test/data/medical_products/de/Sinovial_DE.xml

This file was deleted.

Binary file removed test/data/medical_products/fr/Sinovial_FR.docx
Binary file not shown.

0 comments on commit 33682cd

Please sign in to comment.