Permalink
Browse files

Initial checkin of RDFa step

  • Loading branch information...
ndw committed Oct 12, 2013
1 parent fe145e7 commit cea6033c081529d2648e4eb346609c4b89a2a144
Showing with 270 additions and 0 deletions.
  1. +3 −0 resources/etc/configuration.xml
  2. +6 −0 resources/etc/extension-library.xml
  3. +261 −0 src/com/xmlcalabash/extensions/RDFa.java
@@ -163,6 +163,9 @@
<implementation type="cx:message"
class-name="com.xmlcalabash.extensions.Message"/>
<implementation type="cx:rdfa"
class-name="com.xmlcalabash.extensions.RDFa"/>
<implementation type="cx:wait-for-update"
class-name="com.xmlcalabash.extensions.WaitForUpdate"/>
@@ -57,6 +57,12 @@
<p:option name="message" required="true"/>
</p:declare-step>
<p:declare-step type="cx:rdfa">
<p:input port="source"/>
<p:output port="result" sequence="true"/>
<p:option name="max-triples-per-document" select="100"/>
</p:declare-step>
<p:declare-step type="cx:wait-for-update">
<p:output port="result"/>
<p:option name="href" required="true"/>
@@ -0,0 +1,261 @@
package com.xmlcalabash.extensions;
import com.xmlcalabash.core.XProcConstants;
import com.xmlcalabash.core.XProcException;
import com.xmlcalabash.core.XProcRuntime;
import com.xmlcalabash.io.ReadablePipe;
import com.xmlcalabash.io.WritablePipe;
import com.xmlcalabash.library.DefaultStep;
import com.xmlcalabash.runtime.XAtomicStep;
import com.xmlcalabash.util.S9apiUtils;
import com.xmlcalabash.util.TreeWriter;
import net.sf.saxon.s9api.Axis;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmSequenceIterator;
import org.semarglproject.rdf.ParseException;
import org.semarglproject.rdf.rdfa.RdfaParser;
import org.semarglproject.sink.TripleSink;
import org.semarglproject.source.StreamProcessor;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.util.Calendar;
import java.util.Random;
import java.util.Vector;
/**
* Created by IntelliJ IDEA.
* User: ndw
* Date: Oct 8, 2008
* Time: 7:44:07 AM
* To change this template use File | Settings | File Templates.
*/
public class RDFa extends DefaultStep {
private static final QName sem_triples = new QName("sem","http://marklogic.com/semantics", "triples");
private static final QName sem_triple = new QName("sem","http://marklogic.com/semantics", "triple");
private static final QName sem_subject = new QName("sem","http://marklogic.com/semantics", "subject");
private static final QName sem_predicate = new QName("sem","http://marklogic.com/semantics", "predicate");
private static final QName sem_object = new QName("sem","http://marklogic.com/semantics", "object");
private static final QName _datatype = new QName("", "datatype");
private static final QName _max_triples = new QName("", "max-triples-per-document");
private ReadablePipe source = null;
private WritablePipe result = null;
private long limit = 100;
private long count = 0;
/**
* Creates a new instance of Identity
*/
public RDFa(XProcRuntime runtime, XAtomicStep step) {
super(runtime,step);
}
public void setInput(String port, ReadablePipe pipe) {
source = pipe;
}
public void setOutput(String port, WritablePipe pipe) {
result = pipe;
}
public void reset() {
source.resetReader();
result.resetWriter();
}
public void run() throws SaxonApiException {
super.run();
String limitStr = getOption(_max_triples).getString();
try {
limit = Integer.parseInt(limitStr);
} catch (NumberFormatException nfe) {
throw XProcException.dynamicError(19, "The max-triples-per-document on cx:rdf-a must be an integer");
}
XdmNode doc = source.read();
try {
Sink sink = new Sink();
StreamProcessor sp = new StreamProcessor(RdfaParser.connect(sink));
// HACK!!!
// FIXME: set serializer properties appropriately!
Serializer serializer = makeSerializer();
StringWriter writer = new StringWriter();
serializer.setOutputWriter(writer);
S9apiUtils.serialize(runtime, doc, serializer);
writer.close();
ByteArrayInputStream bais = new ByteArrayInputStream(writer.toString().getBytes("UTF-8"));
sp.process(bais, doc.getBaseURI().toASCIIString());
} catch (IOException e) {
throw new XProcException(e);
} catch (ParseException e) {
throw new XProcException(e);
}
}
private class Sink implements TripleSink {
TreeWriter tree = null;
String baseURI = null;
long randomValue = 0;
long milliSecs = 0;
public Sink() {
Random random = new Random();
randomValue = random.nextLong();
Calendar cal = Calendar.getInstance();
milliSecs = cal.getTimeInMillis();
}
@Override
public void addNonLiteral(String subj, String pred, String obj) {
/*
tree.addStartElement(sem_triple);
tree.startContent();
tree.addStartElement(sem_subject);
tree.startContent();
tree.addText(patchURI(subj));
tree.addEndElement();
tree.addStartElement(sem_predicate);
tree.startContent();
tree.addText(patchURI(pred));
tree.addEndElement();
tree.addStartElement(sem_object);
tree.startContent();
tree.addText(patchURI(obj));
tree.addEndElement();
tree.addEndElement();
*/
nextFile();
}
@Override
public void addPlainLiteral(String subj, String pred, String obj, String lang) {
tree.addStartElement(sem_triple);
tree.startContent();
tree.addStartElement(sem_subject);
tree.startContent();
tree.addText(patchURI(subj));
tree.addEndElement();
tree.addStartElement(sem_predicate);
tree.startContent();
tree.addText(patchURI(pred));
tree.addEndElement();
tree.addStartElement(sem_object);
if (lang == null || "".equals(lang)) {
tree.addAttribute(_datatype, "http://www.w3.org/2001/XMLSchema#string");
} else {
tree.addAttribute(XProcConstants.xml_lang,lang);
}
tree.startContent();
tree.addText(obj);
tree.addEndElement();
tree.addEndElement();
nextFile();
}
@Override
public void addTypedLiteral(String subj, String pred, String obj, String datatype) {
if (datatype == null) {
datatype = "http://www.w3.org/2001/XMLSchema#string";
}
tree.addStartElement(sem_triple);
tree.startContent();
tree.addStartElement(sem_subject);
tree.startContent();
tree.addText(patchURI(subj));
tree.addEndElement();
tree.addStartElement(sem_predicate);
tree.startContent();
tree.addText(patchURI(pred));
tree.addEndElement();
tree.addStartElement(sem_object);
tree.addAttribute(_datatype, datatype);
tree.startContent();
tree.addText(obj);
tree.addEndElement();
tree.addEndElement();
nextFile();
}
@Override
public void setBaseUri(String s) {
baseURI = s;
}
@Override
public void startStream() throws ParseException {
tree = new TreeWriter(runtime);
tree.startDocument(step.getNode().getBaseURI());
tree.addStartElement(sem_triples);
tree.startContent();
}
@Override
public void endStream() throws ParseException {
tree.addEndElement();
tree.endDocument();
if (count > 0) {
XdmNode out = tree.getResult();
result.write(out);
}
}
@Override
public boolean setProperty(String key, Object value) {
return false;
}
private void nextFile() {
count += 1;
if (count >= limit) {
tree.addEndElement();
tree.endDocument();
XdmNode out = tree.getResult();
result.write(out);
tree = new TreeWriter(runtime);
tree.startDocument(step.getNode().getBaseURI());
tree.addStartElement(sem_triples);
tree.startContent();
count = 0;
}
}
private String patchURI(String uri) {
if (uri.startsWith("_:")) {
return "http://marklogic.com/semantics/blank/"
+ Long.toHexString(fuse(scramble(milliSecs),randomValue))
+ "/" + uri;
} else {
return uri;
}
}
private long rotl(long x, long y)
{
return (x<<y)^(x>>(64-y));
}
private long fuse(long a, long b)
{
return rotl(a,8)^b;
}
private long scramble(long x)
{
return x^rotl(x,20)^rotl(x,40);
}
}
}

0 comments on commit cea6033

Please sign in to comment.