Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8268457: XML Transformer outputs Unicode supplementary character incorrectly to HTML #162

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -41,7 +41,7 @@
* because it is used from another package.
*
* @xsl.usage internal
* @LastModified: Aug 2019
* @LastModified: June 2021
*/
public final class ToHTMLStream extends ToStream
{
@@ -1441,32 +1441,23 @@ else if (
}
}
}

// The next is kind of a hack to keep from escaping in the case
// of Shift_JIS and the like.

/*
else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
&& (ch != 160))
{
writer.write(ch); // no escaping in this case
}
else
*/
String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
if (null != outputStringForChar)
{
writer.write(outputStringForChar);
}
else if (escapingNotNeeded(ch))
{
writer.write(ch); // no escaping in this case
}
else
{
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
if (null != outputStringForChar)
{
writer.write(outputStringForChar);
}
else if (escapingNotNeeded(ch))
{
writer.write(ch); // no escaping in this case
}
else
{
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
}
}
}
cleanStart = i + 1;
@@ -0,0 +1,104 @@
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package transform;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStream;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import static jaxp.library.JAXPTestUtilities.compareWithGold;
import static jaxp.library.JAXPTestUtilities.compareStringWithGold;
import org.testng.Assert;
import org.testng.annotations.Listeners;
import org.testng.annotations.Test;

/*
* @test
* @bug 8268457
* @library /javax/xml/jaxp/libs
* @run testng transform.SurrogateTest
* @summary XML Transformer outputs Unicode supplementary character incorrectly to HTML
*/
@Listeners({jaxp.library.FilePolicy.class})
public class SurrogateTest {

final static String TEST_SRC = System.getProperty("test.src", ".");

@Test
public void toHTMLTest() throws Exception {
String out = "SurrogateTest1out.html";
String expected = TEST_SRC + File.separator + "SurrogateTest1.html";
String xsl = TEST_SRC + File.separator + "SurrogateTest1.xsl";

try (FileInputStream tFis = new FileInputStream(xsl);
InputStream fis = this.getClass().getResourceAsStream("SurrogateTest1.xml");
FileOutputStream fos = new FileOutputStream(out)) {

Source tSrc = new StreamSource(tFis);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer t = tf.newTransformer(tSrc);
t.setOutputProperty("method", "html");

Source src = new StreamSource(fis);
Result res = new StreamResult(fos);
t.transform(src, res);
}
compareWithGold(expected, out);
}

@Test
public void handlerTest() throws Exception {
File xmlFile = new File(TEST_SRC, "SurrogateTest2.xml");
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
SAXParser sp = spf.newSAXParser();
TestHandler th = new TestHandler();
sp.parse(xmlFile, th);
compareStringWithGold(TEST_SRC + File.separator + "SurrogateTest2.txt", th.sb.toString());
}

private static class TestHandler extends DefaultHandler {
private StringBuilder sb = new StringBuilder();

@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
sb.append( localName + "@attr:" + attributes.getValue("attr") + '\n');
}
}
}
@@ -0,0 +1,12 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>
<form>
<input id="tag1" value="𠮟">
</form>
</body>
</html>
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<root>
<tag1>𠮟</tag1>
</root>
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output doctype-public="-//W3C//DTD HTML 4.01 Transitional//EN"
doctype-system="http://www.w3.org/TR/html4/loose.dtd"
encoding="UTF-8" indent="yes" method="html" omit-xml-declaration="yes"/>
<xsl:template match="/">
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<xsl:for-each select="root">
<form>
<xsl:for-each select="tag1">
<input id="tag1">
<xsl:attribute name="value">
<xsl:value-of select="."/>
</xsl:attribute>
</input>
</xsl:for-each>
</form>
</xsl:for-each>
</body>
</html>
</xsl:template>
</xsl:stylesheet>
@@ -0,0 +1,4 @@
root@attr:null
tag1@attr:𠮟
tag2@attr:𠀋
tag3@attr:𣱿
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<root>
<tag1 attr="𠮟"/>
<tag2 attr="𠀋"/>
<tag3 attr="𣱿"/>
</root>