Closed
Description
Please describe the bug
Nokogiri::XML::Reader#inner_xml
returns NCR encoded attributes even if the encoding is set to utf-8
in #from_memory
call.
It does not happen if the XML input sets the encoding with <?xml version="1.0" encoding="UTF-8"?>
.
It only happens to attributes, elements and text nodes are correctly encoded.
Help us reproduce what you're seeing
require 'bundler/inline'
gemfile do
source 'https://rubygems.org'
gem 'nokogiri', '1.13.8'
end
require 'nokogiri'
xml = <<~XML
<test><anotación tipo="inspiración">(inspiración)</anotación></test>
XML
reader = Nokogiri::XML::Reader.from_memory(xml, nil, 'utf-8')
puts reader.inner_xml while reader.read && reader.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
# Output with NCR encoded attributes:
# <anotación tipo="inspiración">(inspiración)</anotación>
# (inspiración)
xml_with_encoding = <<~XML
<?xml version="1.0" encoding="UTF-8"?>
<test><anotación tipo="inspiración">(inspiración)</anotación></test>
XML
reader = Nokogiri::XML::Reader.from_memory(xml_with_encoding, nil, 'utf-8')
puts reader.inner_xml while reader.read && reader.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
# Output with correct encoding:
# <anotación tipo="inspiración">(inspiración)</anotación>
# (inspiración)
Environment
# Nokogiri (1.13.8)
---
warnings: []
nokogiri:
version: 1.13.8
cppflags:
- "-I/home/david/.rbenv/versions/3.1.2/lib/ruby/gems/3.1.0/gems/nokogiri-1.13.8-x86_64-linux/ext/nokogiri"
- "-I/home/david/.rbenv/versions/3.1.2/lib/ruby/gems/3.1.0/gems/nokogiri-1.13.8-x86_64-linux/ext/nokogiri/include"
- "-I/home/david/.rbenv/versions/3.1.2/lib/ruby/gems/3.1.0/gems/nokogiri-1.13.8-x86_64-linux/ext/nokogiri/include/libxml2"
ldflags: []
ruby:
version: 3.1.2
platform: x86_64-linux
gem_platform: x86_64-linux
description: ruby 3.1.2p20 (2022-04-12 revision 4491bb740a) [x86_64-linux]
engine: ruby
libxml:
source: packaged
precompiled: true
patches:
- 0001-Remove-script-macro-support.patch
- 0002-Update-entities-to-remove-handling-of-ssi.patch
- 0003-libxml2.la-is-in-top_builddir.patch
- 0004-use-glibc-strlen.patch
- 0005-avoid-isnan-isinf.patch
- 0006-update-automake-files-for-arm64.patch
- '0008-htmlParseComment-handle-abruptly-closed-comments.patch'
- '0009-allow-wildcard-namespaces.patch'
libxml2_path: "/home/david/.rbenv/versions/3.1.2/lib/ruby/gems/3.1.0/gems/nokogiri-1.13.8-x86_64-linux/ext/nokogiri"
memory_management: ruby
iconv_enabled: true
compiled: 2.9.14
loaded: 2.9.14
libxslt:
source: packaged
precompiled: true
patches:
- 0001-update-automake-files-for-arm64.patch
datetime_enabled: true
compiled: 1.1.35
loaded: 1.1.35
other_libraries:
zlib: 1.2.12
libgumbo: 1.0.0-nokogiri
Additional context