Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8268457: XML Transformer outputs Unicode supplementary character incorrectly to HTML #4474

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Expand Down Expand Up @@ -41,7 +41,7 @@
* because it is used from another package.
*
* @xsl.usage internal
* @LastModified: Aug 2019
* @LastModified: June 2021
*/
public final class ToHTMLStream extends ToStream
{
Expand Down Expand Up @@ -1441,32 +1441,23 @@ else if (
}
}
}

// The next is kind of a hack to keep from escaping in the case
// of Shift_JIS and the like.

/*
else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
&& (ch != 160))
{
writer.write(ch); // no escaping in this case
}
else
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was suggesting removing the entire comment-out block if it is not needed (and confusing), but I will defer the decision to Joe.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. It's very obsolete. The comment-out block from line 1445 to 1454 can be removed.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was mistaken. I deleted the entire comment.

*/
String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
if (null != outputStringForChar)
{
writer.write(outputStringForChar);
}
else if (escapingNotNeeded(ch))
{
writer.write(ch); // no escaping in this case
}
else
{
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
if (null != outputStringForChar)
{
writer.write(outputStringForChar);
}
else if (escapingNotNeeded(ch))
{
writer.write(ch); // no escaping in this case
}
else
{
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
}
}
}
cleanStart = i + 1;
Expand Down
104 changes: 104 additions & 0 deletions test/jaxp/javax/xml/jaxp/unittest/transform/SurrogateTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package transform;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStream;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import static jaxp.library.JAXPTestUtilities.compareWithGold;
import static jaxp.library.JAXPTestUtilities.compareStringWithGold;
import org.testng.Assert;
import org.testng.annotations.Listeners;
import org.testng.annotations.Test;

/*
* @test
* @bug 8268457
* @library /javax/xml/jaxp/libs
* @run testng transform.SurrogateTest
* @summary XML Transformer outputs Unicode supplementary character incorrectly to HTML
*/
@Listeners({jaxp.library.FilePolicy.class})
public class SurrogateTest {

final static String TEST_SRC = System.getProperty("test.src", ".");

@Test
public void toHTMLTest() throws Exception {
String out = "SurrogateTest1out.html";
String expected = TEST_SRC + File.separator + "SurrogateTest1.html";
String xsl = TEST_SRC + File.separator + "SurrogateTest1.xsl";

try (FileInputStream tFis = new FileInputStream(xsl);
InputStream fis = this.getClass().getResourceAsStream("SurrogateTest1.xml");
FileOutputStream fos = new FileOutputStream(out)) {

Source tSrc = new StreamSource(tFis);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer t = tf.newTransformer(tSrc);
t.setOutputProperty("method", "html");

Source src = new StreamSource(fis);
Result res = new StreamResult(fos);
t.transform(src, res);
}
compareWithGold(expected, out);
}

@Test
public void handlerTest() throws Exception {
File xmlFile = new File(TEST_SRC, "SurrogateTest2.xml");
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
SAXParser sp = spf.newSAXParser();
TestHandler th = new TestHandler();
sp.parse(xmlFile, th);
compareStringWithGold(TEST_SRC + File.separator + "SurrogateTest2.txt", th.sb.toString());
}

private static class TestHandler extends DefaultHandler {
private StringBuilder sb = new StringBuilder();

@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
sb.append( localName + "@attr:" + attributes.getValue("attr") + '\n');
}
}
}
12 changes: 12 additions & 0 deletions test/jaxp/javax/xml/jaxp/unittest/transform/SurrogateTest1.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>
<form>
<input id="tag1" value="𠮟">
</form>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<root>
<tag1>𠮟</tag1>
</root>
26 changes: 26 additions & 0 deletions test/jaxp/javax/xml/jaxp/unittest/transform/SurrogateTest1.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output doctype-public="-//W3C//DTD HTML 4.01 Transitional//EN"
doctype-system="http://www.w3.org/TR/html4/loose.dtd"
encoding="UTF-8" indent="yes" method="html" omit-xml-declaration="yes"/>
<xsl:template match="/">
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<xsl:for-each select="root">
<form>
<xsl:for-each select="tag1">
<input id="tag1">
<xsl:attribute name="value">
<xsl:value-of select="."/>
</xsl:attribute>
</input>
</xsl:for-each>
</form>
</xsl:for-each>
</body>
</html>
</xsl:template>
</xsl:stylesheet>
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
root@attr:null
tag1@attr:𠮟
tag2@attr:𠀋
tag3@attr:𣱿
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<root>
<tag1 attr="𠮟"/>
<tag2 attr="𠀋"/>
<tag3 attr="𣱿"/>
</root>