Skip to content

Commit

Permalink
Remove BOM from UTF-8 encoded file (String) if present
Browse files Browse the repository at this point in the history
-SITE-3007 ETT GG "Validator broke" - UTF-8-BOM issue
  • Loading branch information
drbgfc committed Jun 6, 2019
1 parent 970bce6 commit b2dce3c
Show file tree
Hide file tree
Showing 4 changed files with 39,910 additions and 560 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,13 @@ private List<RefCCDAValidationResult> runValidators(String validationObjective,
InputStream ccdaFileInputStream = null;
try {
ccdaFileInputStream = ccdaFile.getInputStream();
String ccdaFileContents = IOUtils.toString(new BOMInputStream(ccdaFileInputStream));
BOMInputStream bomInputStream = new BOMInputStream(ccdaFileInputStream);
if(bomInputStream.hasBOM()) {
logger.warn(
"The C-CDA file has a BOM which is supposed to be removed by BOMInputStream - encoding w/o BOM: "
+ bomInputStream.getBOMCharsetName());
}
String ccdaFileContents = IOUtils.toString(bomInputStream, "UTF-8");

List<RefCCDAValidationResult> mdhtResults = doMDHTValidation(validationObjective, referenceFileName, ccdaFileContents);
if(mdhtResults != null && !mdhtResults.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,36 @@
package org.sitenv.referenceccda.validators;

import java.io.IOException;
import java.io.StringReader;

import org.apache.log4j.Logger;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.IOException;
import java.io.StringReader;

public abstract class BaseCCDAValidator {
protected static void trackXPathsInXML(XPathIndexer xpathIndexer, String xmlString) throws SAXException{
public static final String UTF8_BOM = "\uFEFF";
private static Logger logger = Logger.getLogger(BaseCCDAValidator.class);

protected static void trackXPathsInXML(XPathIndexer xpathIndexer, String xmlString) throws SAXException{
XMLReader parser = XMLReaderFactory.createXMLReader();
parser.setContentHandler(xpathIndexer);
try {
xmlString = ifHasUtf8BomThenRemove(xmlString);
InputSource inputSource = new InputSource(new StringReader(xmlString));
parser.parse(inputSource);
} catch (IOException e) {
e.printStackTrace();
System.out.println("Error In Line Number Routine: Bad filename, path or invalid document.");
logger.error("Error In Line Number Routine: Bad filename, path or invalid document.");
}
}

private static String ifHasUtf8BomThenRemove(String xml) {
if (xml.startsWith(UTF8_BOM)) {
logger.warn("Found UTF-8 BOM, removing...");
xml = xml.substring(1);
}
return xml;
}
}
Loading

0 comments on commit b2dce3c

Please sign in to comment.