Skip to content

Commit

Permalink
optimise sed usage in ONIX::Normaliser
Browse files Browse the repository at this point in the history
- 1 sed call instead of > 1000
  • Loading branch information
yob committed Oct 1, 2009
1 parent 37d8194 commit 036b689
Showing 1 changed file with 14 additions and 8 deletions.
22 changes: 14 additions & 8 deletions lib/onix/normaliser.rb
Expand Up @@ -71,12 +71,14 @@ def run
@curfile = dest

# remove entities
replace_named_entities(@curfile)
dest = next_tempfile
replace_named_entities(@curfile, dest)
@curfile = dest

FileUtils.cp(@curfile, @newfile)
end

private
#private

# check the specified app is available on the system
#
Expand Down Expand Up @@ -133,7 +135,7 @@ def to_utf8(src, dest)
FileUtils.cp(inpath, outpath)
else
FileUtils.cp(inpath, outpath)
`sed -i 's/<?xml.*?>/<?xml version=\"1.0\" encoding=\"UTF-8\"?>/' #{outpath}`
`sed -i 's/<?xml.*?>/<?xml version=\"1.0\" encoding=\"UTF-8\"?>/g' #{outpath}`
end
elsif src_enc
`iconv --from-code=#{src_enc} --to-code=UTF-8 #{inpath} > #{outpath}`
Expand All @@ -154,11 +156,15 @@ def remove_control_chars(src, dest)
# replace all named entities in the specified file with
# numeric entities.
#
def replace_named_entities(path)
# TODO: this is horrible. 1500 sed calls.
entity_map.each do |named, numeric|
`sed -i 's/\\&#{named};/\\&#{numeric};/g' #{path}`
end
def replace_named_entities(src, dest)
inpath = File.expand_path(src)
outpath = File.expand_path(dest)

cmd = "sed " + entity_map.map do |named, numeric|
"-e 's/\\&#{named};/\\&#{numeric};/g'"
end.join(" ") + " #{inpath} > #{outpath}"
#raise cmd
`#{cmd}`
end

# return a named entity to numeric entity mapping, build by extracting
Expand Down

0 comments on commit 036b689

Please sign in to comment.