Skip to content
Permalink
Browse files
8268457: XML Transformer outputs Unicode supplementary character inco…
…rrectly to HTML

Reviewed-by: lancea, naoto, iris, joehw
  • Loading branch information
Masanori Yano authored and JoeWang-Java committed Jun 30, 2021
1 parent 1810b1c commit 83bce94cc8a7fb45b0604598411fbecc62000dfd
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -41,7 +41,7 @@
* because it is used from another package.
*
* @xsl.usage internal
* @LastModified: Aug 2019
* @LastModified: June 2021
*/
public final class ToHTMLStream extends ToStream
{
@@ -1441,32 +1441,23 @@ else if (
}
}
}

// The next is kind of a hack to keep from escaping in the case
// of Shift_JIS and the like.

/*
else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
&& (ch != 160))
{
writer.write(ch); // no escaping in this case
}
else
*/
String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
if (null != outputStringForChar)
{
writer.write(outputStringForChar);
}
else if (escapingNotNeeded(ch))
{
writer.write(ch); // no escaping in this case
}
else
{
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
if (null != outputStringForChar)
{
writer.write(outputStringForChar);
}
else if (escapingNotNeeded(ch))
{
writer.write(ch); // no escaping in this case
}
else
{
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
}
}
}
cleanStart = i + 1;
@@ -0,0 +1,104 @@
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package transform;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStream;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import static jaxp.library.JAXPTestUtilities.compareWithGold;
import static jaxp.library.JAXPTestUtilities.compareStringWithGold;
import org.testng.Assert;
import org.testng.annotations.Listeners;
import org.testng.annotations.Test;

/*
* @test
* @bug 8268457
* @library /javax/xml/jaxp/libs
* @run testng transform.SurrogateTest
* @summary XML Transformer outputs Unicode supplementary character incorrectly to HTML
*/
@Listeners({jaxp.library.FilePolicy.class})
public class SurrogateTest {

final static String TEST_SRC = System.getProperty("test.src", ".");

@Test
public void toHTMLTest() throws Exception {
String out = "SurrogateTest1out.html";
String expected = TEST_SRC + File.separator + "SurrogateTest1.html";
String xsl = TEST_SRC + File.separator + "SurrogateTest1.xsl";

try (FileInputStream tFis = new FileInputStream(xsl);
InputStream fis = this.getClass().getResourceAsStream("SurrogateTest1.xml");
FileOutputStream fos = new FileOutputStream(out)) {

Source tSrc = new StreamSource(tFis);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer t = tf.newTransformer(tSrc);
t.setOutputProperty("method", "html");

Source src = new StreamSource(fis);
Result res = new StreamResult(fos);
t.transform(src, res);
}
compareWithGold(expected, out);
}

@Test
public void handlerTest() throws Exception {
File xmlFile = new File(TEST_SRC, "SurrogateTest2.xml");
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
SAXParser sp = spf.newSAXParser();
TestHandler th = new TestHandler();
sp.parse(xmlFile, th);
compareStringWithGold(TEST_SRC + File.separator + "SurrogateTest2.txt", th.sb.toString());
}

private static class TestHandler extends DefaultHandler {
private StringBuilder sb = new StringBuilder();

@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
sb.append( localName + "@attr:" + attributes.getValue("attr") + '\n');
}
}
}
@@ -0,0 +1,12 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>
<form>
<input id="tag1" value="𠮟">
</form>
</body>
</html>
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<root>
<tag1>𠮟</tag1>
</root>
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output doctype-public="-//W3C//DTD HTML 4.01 Transitional//EN"
doctype-system="http://www.w3.org/TR/html4/loose.dtd"
encoding="UTF-8" indent="yes" method="html" omit-xml-declaration="yes"/>
<xsl:template match="/">
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<xsl:for-each select="root">
<form>
<xsl:for-each select="tag1">
<input id="tag1">
<xsl:attribute name="value">
<xsl:value-of select="."/>
</xsl:attribute>
</input>
</xsl:for-each>
</form>
</xsl:for-each>
</body>
</html>
</xsl:template>
</xsl:stylesheet>
@@ -0,0 +1,4 @@
root@attr:null
tag1@attr:𠮟
tag2@attr:𠀋
tag3@attr:𣱿
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<root>
<tag1 attr="𠮟"/>
<tag2 attr="𠀋"/>
<tag3 attr="𣱿"/>
</root>

1 comment on commit 83bce94

@openjdk-notifier

This comment has been minimized.

Copy link

@openjdk-notifier openjdk-notifier bot commented on 83bce94 Jun 30, 2021

Please sign in to comment.