Permalink
Browse files

Extract Encoding definitions from oniguruma files.

We follow MRI's basic approach here. It would make more sense to me to manage
these in a single suitable form rather than sprinkled through various files and
requiring extraction, but I also want to keep deviation of oniguruma code small
and all the data is there.
  • Loading branch information...
1 parent cf16a57 commit 2ccd544478335be98c36616b45ac2dc41f08ff3a @brixen brixen committed Dec 15, 2011
Showing with 25 additions and 35 deletions.
  1. +1 −1 rakelib/vm.rake
  2. +24 −34 vm/codegen/encoding_extract.rb
View
@@ -189,7 +189,7 @@ files TYPE_GEN, field_extract_headers + %w[vm/codegen/field_extract.rb] + [:run_
end
file encoding_database => 'vm/codegen/encoding_extract.rb' do |t|
- dir = File.expand_path "../vendor/oniguruma"
+ dir = File.expand_path "../../vendor/oniguruma", __FILE__
ruby 'vm/codegen/encoding_extract.rb', dir, t.name
end
@@ -1,38 +1,28 @@
dir, definitions = ARGV
+if defined? Encoding
+ def readlines(name)
+ IO.readlines(name, :encoding => "ascii-8bit")
+ end
+else
+ def readlines(name)
+ IO.readlines(name)
+ end
+end
+
File.open definitions, "wb" do |f|
- f.puts <<-EOD
- // Encodings
- // This is really a place holder
- define(state, "ASCII-8BIT", ONIG_ENCODING_ASCII);
- define(state, "ISO-8859-1", ONIG_ENCODING_ISO_8859_1);
- define(state, "ISO-8859-2", ONIG_ENCODING_ISO_8859_2);
- define(state, "ISO-8859-3", ONIG_ENCODING_ISO_8859_3);
- define(state, "ISO-8859-4", ONIG_ENCODING_ISO_8859_4);
- define(state, "ISO-8859-5", ONIG_ENCODING_ISO_8859_5);
- define(state, "ISO-8859-6", ONIG_ENCODING_ISO_8859_6);
- define(state, "ISO-8859-7", ONIG_ENCODING_ISO_8859_7);
- define(state, "ISO-8859-8", ONIG_ENCODING_ISO_8859_8);
- define(state, "ISO-8859-9", ONIG_ENCODING_ISO_8859_9);
- define(state, "ISO-8859-10", ONIG_ENCODING_ISO_8859_10);
- define(state, "ISO-8859-11", ONIG_ENCODING_ISO_8859_11);
- define(state, "ISO-8859-13", ONIG_ENCODING_ISO_8859_13);
- define(state, "ISO-8859-14", ONIG_ENCODING_ISO_8859_14);
- define(state, "ISO-8859-15", ONIG_ENCODING_ISO_8859_15);
- define(state, "ISO-8859-16", ONIG_ENCODING_ISO_8859_16);
- // This is really a place holder
- define(state, "UTF-7", ONIG_ENCODING_UTF_8);
- define(state, "UTF-16BE", ONIG_ENCODING_UTF_16BE);
- define(state, "UTF-16LE", ONIG_ENCODING_UTF_16LE);
- define(state, "UTF-32BE", ONIG_ENCODING_UTF_32BE);
- define(state, "UTF-32LE", ONIG_ENCODING_UTF_32LE);
- define(state, "EUC-JP", ONIG_ENCODING_EUC_JP);
- define(state, "EUC-TW", ONIG_ENCODING_EUC_TW);
- define(state, "EUC-KR", ONIG_ENCODING_EUC_KR);
- define(state, "Shift_JIS", ONIG_ENCODING_Shift_JIS);
- define(state, "KOI8-R", ONIG_ENCODING_KOI8_R);
- define(state, "CP1251", ONIG_ENCODING_Windows_1251);
- define(state, "Big5", ONIG_ENCODING_BIG5);
- define(state, "GB18030", ONIG_ENCODING_GB18030);
- EOD
+ Dir["#{dir}/enc/*.[hc]"].each do |name|
+ f.puts " // #{name}"
+
+ readlines(name).each do |line|
+ m = line.match /^ENC_([A-Z]+)\("([^"]+)"(,\s"?([^")]+)"?)?\);?/
+ next unless m
+
+ unless m[1] == "DEFINE" and ["ASCII", "UTF_8", "US_ASCII"].include?(m[4])
+ f.puts " #{line}"
+ end
+ end
+
+ f.puts
+ end
end

0 comments on commit 2ccd544

Please sign in to comment.