diff --git a/core/src/main/java/org/semarglproject/source/SaxSource.java b/core/src/main/java/org/semarglproject/source/SaxSource.java index 5bd3445..70ae780 100644 --- a/core/src/main/java/org/semarglproject/source/SaxSource.java +++ b/core/src/main/java/org/semarglproject/source/SaxSource.java @@ -71,15 +71,23 @@ public void process(InputStream inputStream, String mimeType, String baseUri) th private void initXmlReader() throws SAXException { if (xmlReader == null) { - xmlReader = XMLReaderFactory.createXMLReader(); - xmlReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + xmlReader = getDefaultXmlReader(); } xmlReader.setContentHandler(sink); xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", sink); } - public void setXmlReader(XMLReader xmlReader) { - this.xmlReader = xmlReader; + public void setXmlReader(XMLReader xmlReader) throws SAXException { + if(xmlReader == null) { + this.xmlReader = getDefaultXmlReader(); + } else { + this.xmlReader = xmlReader; + } } + public static XMLReader getDefaultXmlReader() throws SAXException { + XMLReader result = XMLReaderFactory.createXMLReader(); + result.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + return result; + } } diff --git a/core/src/main/java/org/semarglproject/source/StreamProcessor.java b/core/src/main/java/org/semarglproject/source/StreamProcessor.java index 667a90e..b74174d 100644 --- a/core/src/main/java/org/semarglproject/source/StreamProcessor.java +++ b/core/src/main/java/org/semarglproject/source/StreamProcessor.java @@ -17,6 +17,7 @@ import org.semarglproject.rdf.ParseException; import org.semarglproject.sink.DataSink; +import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import java.io.InputStream; @@ -90,8 +91,14 @@ public void processInternal(Reader reader, String mimeType, String baseUri) thro public boolean setProperty(String key, Object value) { boolean result = false; if (XML_READER_PROPERTY.equals(key) && value instanceof XMLReader && source instanceof SaxSource) { - ((SaxSource) source).setXmlReader((XMLReader) value); - result = true; + try { + if (value != null) { + ((SaxSource) source).setXmlReader((XMLReader) value); + result = true; + } + } catch(SAXException e) { + throw new IllegalArgumentException("XMLReader was not able to be initialized", e); + } } return sink.setProperty(key, value) || result; } diff --git a/integration/sesame/src/main/java/org/semarglproject/sesame/rdf/rdfa/RDFaFormat.java b/integration/sesame/src/main/java/org/semarglproject/sesame/rdf/rdfa/RDFaFormat.java deleted file mode 100644 index 0ef5097..0000000 --- a/integration/sesame/src/main/java/org/semarglproject/sesame/rdf/rdfa/RDFaFormat.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Copyright 2012-2013 Lev Khomich - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.semarglproject.sesame.rdf.rdfa; - -import org.openrdf.rio.RDFFormat; - -import java.nio.charset.Charset; -import java.util.Arrays; - -/** - * @author Peter Ansell p_ansell@yahoo.com - * - */ -public final class RDFaFormat { - - public static final RDFFormat RDFA = new RDFFormat("RDFa", Arrays.asList( - "application/xhtml+xml", "text/html", "image/svg+xml"), - Charset.forName("UTF-8"), Arrays.asList("xhtml, html, svg"), true, false); - - private RDFaFormat() { - } - -} diff --git a/integration/sesame/src/main/java/org/semarglproject/sesame/rdf/rdfa/RdfaParserConfig.java b/integration/sesame/src/main/java/org/semarglproject/sesame/rdf/rdfa/RdfaParserConfig.java deleted file mode 100644 index a49fe26..0000000 --- a/integration/sesame/src/main/java/org/semarglproject/sesame/rdf/rdfa/RdfaParserConfig.java +++ /dev/null @@ -1,83 +0,0 @@ -/** - * Copyright 2012-2013 Lev Khomich - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.semarglproject.sesame.rdf.rdfa; - -import org.openrdf.rio.ParserConfig; -import org.openrdf.rio.RDFParser; - -/** - * Holds configuration of {@link SesameRDFaParser}. - */ -public class RdfaParserConfig extends ParserConfig { - - private final boolean processorGraphEnabled; - private final boolean vocabExpansionEnabled; - private final short rdfaCompatibility; - - /** - * Creates configuration with disabled data verification, enabled stop at first error, enabled preserving - * of bnode IDs and disabled datatype handling. - * @param enableProcessorGraph see {@link SesameRDFaParser#setProcessorGraphEnabled(boolean)} - * @param enableVocabExpansion see {@link SesameRDFaParser#setVocabExpansionEnabled(boolean)} - * @param rdfaCompatibility see {@link SesameRDFaParser#setRdfaCompatibility(short)} - */ - public RdfaParserConfig(boolean enableProcessorGraph, boolean enableVocabExpansion, short rdfaCompatibility) { - super(false, true, true, RDFParser.DatatypeHandling.IGNORE); - this.processorGraphEnabled = enableProcessorGraph; - this.vocabExpansionEnabled = enableVocabExpansion; - this.rdfaCompatibility = rdfaCompatibility; - } - - /** - * Creates custom {@link SesameRDFaParser} configuration. - * @param verifyData see {@link SesameRDFaParser#setVerifyData(boolean)} - * @param stopAtFirstError see {@link SesameRDFaParser#setStopAtFirstError(boolean)} - * @param preserveBNodeIDs see {@link SesameRDFaParser#setPreserveBNodeIDs(boolean)} - * @param dtHandling see {@link SesameRDFaParser#setDatatypeHandling(org.openrdf.rio.RDFParser.DatatypeHandling)} - * @param enableProcessorGraph see {@link SesameRDFaParser#setProcessorGraphEnabled(boolean)} - * @param enableVocabExpansion see {@link SesameRDFaParser#setVocabExpansionEnabled(boolean)} - * @param rdfaCompatibility see {@link SesameRDFaParser#setRdfaCompatibility(short)} - */ - public RdfaParserConfig(boolean verifyData, boolean stopAtFirstError, - boolean preserveBNodeIDs, RDFParser.DatatypeHandling dtHandling, - boolean enableProcessorGraph, boolean enableVocabExpansion, short rdfaCompatibility) { - super(verifyData, stopAtFirstError, preserveBNodeIDs, dtHandling); - this.processorGraphEnabled = enableProcessorGraph; - this.vocabExpansionEnabled = enableVocabExpansion; - this.rdfaCompatibility = rdfaCompatibility; - } - - /** - * @return {@link org.semarglproject.rdf.rdfa.RdfaParser#ENABLE_PROCESSOR_GRAPH} setting - */ - public final boolean isProcessorGraphEnabled() { - return processorGraphEnabled; - } - - /** - * @return {@link org.semarglproject.rdf.rdfa.RdfaParser#ENABLE_VOCAB_EXPANSION} setting - */ - public final boolean isVocabExpansionEnabled() { - return vocabExpansionEnabled; - } - - /** - * @return {@link org.semarglproject.rdf.rdfa.RdfaParser#RDFA_VERSION_PROPERTY} setting - */ - public final short getRdfaCompatibility() { - return rdfaCompatibility; - } -} diff --git a/integration/sesame/src/main/java/org/semarglproject/sesame/rdf/rdfa/SemarglParserSettings.java b/integration/sesame/src/main/java/org/semarglproject/sesame/rdf/rdfa/SemarglParserSettings.java new file mode 100644 index 0000000..b4dce32 --- /dev/null +++ b/integration/sesame/src/main/java/org/semarglproject/sesame/rdf/rdfa/SemarglParserSettings.java @@ -0,0 +1,71 @@ +/** + * Copyright 2012-2013 Lev Khomich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.semarglproject.sesame.rdf.rdfa; + +import org.openrdf.rio.ParserSetting; +import org.openrdf.rio.helpers.ParserSettingImpl; +import org.semarglproject.rdf.rdfa.RdfaParser; +import org.semarglproject.source.StreamProcessor; +import org.semarglproject.vocab.RDFa; +import org.xml.sax.XMLReader; + +/** + * Settings specific to Semargl that are not in {@link org.openrdf.rio.helpers.BasicParserSettings}. + * + * @author Peter Ansell p_ansell@yahoo.com + * @since 0.5 + */ +public final class SemarglParserSettings { + + /** + * TODO: Javadoc this setting + *
+ * Defaults to false
+ * @since 0.5
+ */
+ public static final ParserSetting
+ * Defaults to false
+ * @since 0.5
+ */
+ public static final ParserSetting
+ * Defaults to 1.1
+ * @since 0.5
+ */
+ public static final ParserSetting
+ * Defaults to null
+ * @since 0.5
+ */
+ public static final ParserSetting
+ * Supported settings can be found using {@link #getSupportedSettings()} and can be modified using
+ * the {@link ParserConfig} object returned from the {@link #getParserConfig()} method.
*/
public SesameRDFaParser() {
- preserveBNodeIDs = true;
- vocabExpansionEnabled = false;
- processorGraphEnabled = false;
- rdfaCompatibility = RDFa.VERSION_11;
- parseErrorListener = null;
+ setParserConfig(new ParserConfig());
streamProcessor = new StreamProcessor(RdfaParser.connect(SesameSink.connect(null)));
- streamProcessor.setProperty(RdfaParser.ENABLE_PROCESSOR_GRAPH, processorGraphEnabled);
- streamProcessor.setProperty(RdfaParser.ENABLE_VOCAB_EXPANSION, vocabExpansionEnabled);
streamProcessor.setProperty(StreamProcessor.PROCESSOR_GRAPH_HANDLER_PROPERTY, this);
+ // by default this would be set to false if not set here
+ setPreserveBNodeIDs(true);
+ parseErrorListener = null;
}
/**
@@ -79,12 +77,12 @@ public SesameRDFaParser() {
*/
public SesameRDFaParser(XMLReader xmlReader) {
this();
- streamProcessor.setProperty(StreamProcessor.XML_READER_PROPERTY, xmlReader);
+ setXmlReader(xmlReader);
}
@Override
public RDFFormat getRDFFormat() {
- return RDFaFormat.RDFA;
+ return RDFFormat.RDFA;
}
@Override
@@ -103,6 +101,7 @@ public void parse(InputStream in, String baseURI) throws RDFParseException, RDFH
@Override
public void parse(Reader reader, String baseURI) throws RDFParseException, RDFHandlerException {
+ refreshSettings();
try {
streamProcessor.process(reader, baseURI);
} catch (ParseException e) {
@@ -132,41 +131,47 @@ public void setParseLocationListener(ParseLocationListener ll) {
@Override
public void setParserConfig(ParserConfig config) {
- if (config instanceof RdfaParserConfig) {
- RdfaParserConfig rdfaParserConfig = (RdfaParserConfig) config;
- setProcessorGraphEnabled(rdfaParserConfig.isProcessorGraphEnabled());
- setVocabExpansionEnabled(rdfaParserConfig.isVocabExpansionEnabled());
- setRdfaCompatibility(rdfaParserConfig.getRdfaCompatibility());
- }
- this.preserveBNodeIDs = config.isPreserveBNodeIDs();
+ this.parserConfig = config;
}
@Override
- public RdfaParserConfig getParserConfig() {
- return new RdfaParserConfig(false, false, preserveBNodeIDs, DatatypeHandling.IGNORE,
- processorGraphEnabled, vocabExpansionEnabled, rdfaCompatibility);
+ public ParserConfig getParserConfig() {
+ return this.parserConfig;
+ }
+
+ @Override
+ public Collection