Permalink
Browse files

Nevermind, too hard to make work consistently and reliably: Revert "m…

…agically properly respect Encoding.default_internal or a file handle with an internal_encoding, woo"

This reverts commit 0a3e9dd.
  • Loading branch information...
1 parent 0a3e9dd commit 89beecc02800b5335aaf9951dd77c9c39d10fef7 @jrochkind jrochkind committed Apr 19, 2012
Showing with 24 additions and 33 deletions.
  1. +2 −10 lib/marc/reader.rb
  2. +22 −23 test/tc_reader_char_encodings.rb
View
@@ -46,7 +46,7 @@ def initialize(file, options = {})
else
throw "must pass in path or file"
end
-
+
if (! @encoding_options[:external_encoding] ) && @handle.respond_to?(:external_encoding)
# use file encoding only if we didn't already have an explicit one,
# explicit one takes precedence.
@@ -55,15 +55,7 @@ def initialize(file, options = {})
# with binary marc data, the transcode can mess up the byte count
# and make it unreadable.
@encoding_options[:external_encoding] ||= @handle.external_encoding
- end
- # If the File handle has an internal_encoding (becuase it was explicitly
- # set, or because of Encoding.default_internal), mark it so we
- # can transcode and make it look right in the end.
- if (! @encoding_options[:internal_encoding]) && @handle.respond_to?(:internal_encoding)
- @encoding_options[:internal_encoding] = @handle.internal_encoding unless @handle.internal_encoding.nil?
- end
-
-
+ end
end
# to support iteration:
@@ -86,7 +86,6 @@ def test_load_file_opened_with_external_encoding
# Make sure it's got the encoding it's supposed to.
assert_equal("IBM866", record['001'].value.encoding.name )
assert_equal(["d09d"], record['001'].value.encode('utf-8').unpack('H4')) # russian capital N
- assert_equal("2005", record['007'].value)
end
def test_explicit_encoding_beats_file_encoding
@@ -173,28 +172,28 @@ def test_bad_source_bytes_with_custom_replace
end
- def test_default_internal_encoding
- #Some people WILL be changing their Encoding.default_internal
- #It's even recommended by wycats
- #http://yehudakatz.com/2010/05/05/ruby-1-9-encodings-a-primer-and-the-solution-for-rails/
- # We want this to:
- # 1) NOT trans-code under the hood, possibly corrupting byte offset/length counts
- # 2) Give you a record that properly respected that and converted for you, correctly.
- begin
- original = Encoding.default_internal
- Encoding.default_internal = "UTF-8"
-
- reader = MARC::Reader.new(File.open('test/cp866_unimarc.marc', "r:cp866"))
-
- record = reader.first
- assert_equal("UTF-8", record['001'].value.encoding.name )
- assert_equal(["d09d"], record['001'].value.unpack('H4')) # russian capital N
- ensure
- Encoding.default_internal = original
- end
- end
-
-
+ #def test_default_internal_encoding
+ # Some people WILL be changing their Encoding.default_internal
+ # It's even recommended by wycats
+ # http://yehudakatz.com/2010/05/05/ruby-1-9-encodings-a-primer-and-the-solution-for-rails/
+ # This will in some cases make ruby File object trans-code
+ # by default. Trans-coding a serial marc binary can change the
+ # byte count and mess it up. We may need to try and make ruby-marc
+ # take special measures to prevent this. This test is important.
+ # begin
+ # original = Encoding.default_internal
+ # Encoding.default_internal = "UTF-8"
+ #
+ # reader = MARC::Reader.new(File.open('test/cp866_unimarc.marc', 'r:cp866'))
+ #
+ # record = reader.first
+ # assert_equal("IBM866", record['001'].value.encoding.name )
+ # assert_equal(["d09d"], record['001'].value.encode('utf-8').unpack('H4')) # russian capital N
+ # ensure
+ # Encoding.default_internal = original
+ # end
+ # end
+ #
end
else
require 'pathname'

0 comments on commit 89beecc

Please sign in to comment.