diff --git a/README.md b/README.md index 9ddc09c..9a9e006 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ JSONLD-Java Clerezza Integration module [![Build Status](https://travis-ci.org/jsonld-java/jsonld-java-clerezza.svg?branch=master)](https://travis-ci.org/jsonld-java/jsonld-java-clerezza) [![Coverage Status](https://coveralls.io/repos/jsonld-java/jsonld-java-clerezza/badge.svg?branch=master)](https://coveralls.io/r/jsonld-java/jsonld-java-clerezza?branch=master) +This module provide a `ParsingProvider`and `SerializingProvider` for Apache Clerezza. Those Providers plug into the Clerezza `Parser` and `Serializer` service infrastructure. Meaning that adding this bundle will allow Clerezza to parse and serialize JSON-LD. + USAGE ===== @@ -24,3 +26,32 @@ ClerezzaTripleCallback The ClerezzaTripleCallback returns an instance of `org.apache.clerezza.rdf.core.MGraph` See [ClerezzaTripleCallbackTest.java](./src/test/java/com/github/jsonldjava/clerezza/ClerezzaTripleCallbackTest.java) for example Usage. + + +From OSGI +--------- + +Assuming the above Bundle is active in the OSGI Environment one can simple inject the `Serializer` and/or `Parser` service. + + @Reference + private Serializer serializer; + + @Reference + private Parser parser; + + +Normal Java +----------- + +Both the `Parser` and `Serializer` also support `java.util.ServiceLoader`. So when running outside an OSGI environment one can use the `getInstance()` to obtain an instance. + + Serializer serializer = Serializer.getInstance(); + + Parser parser = Parser.getInstance(); + +Supported Formats +----------------- + +The JSON-LD parser implementation supports `application/ld+json`. The serializer supports both `application/ld+json` and `application/json`. + +The rational behind this is that the parser can not parse any JSON however the Serializer does generate valid JSON. diff --git a/pom.xml b/pom.xml index 708b5f7..133e87c 100644 --- a/pom.xml +++ b/pom.xml @@ -22,6 +22,9 @@ Peter Ansell + + Rupert Westenthaler + @@ -36,6 +39,19 @@ jar compile + + org.apache.clerezza + rdf.core + ${clerezza.version} + + + + org.apache.felix + org.apache.felix.scr.annotations + 1.9.12 + + + ${project.groupId} jsonld-java @@ -45,8 +61,9 @@ org.apache.clerezza - rdf.core - ${clerezza.version} + rdf.ontologies + 0.12 + test junit @@ -74,6 +91,48 @@ org.jacoco jacoco-maven-plugin + + org.apache.felix + maven-bundle-plugin + true + + + https://github.com/jsonld-java/jsonld-java-clerezza + JSONLD-JAVA Clerezza Integration + ${project.artifactId} + <_versionpolicy>$${version;===;${@}} + <_provider-policy>[$(version;===;$(@)),$(version;=+;$(@))) + <_consumer-policy>[$(version;===;$(@)),$(version;+;$(@))) + + + + + org.apache.felix + maven-scr-plugin + 1.21.0 + + + + org.slf4j + slf4j-simple + ${slf4j.version} + + + + + generate-scr-scrdescriptor + + scr + + + + JSONLD-JAVA + + + + + diff --git a/src/main/java/com/github/jsonldjava/clerezza/ClerezzaJsonLdParsingProvider.java b/src/main/java/com/github/jsonldjava/clerezza/ClerezzaJsonLdParsingProvider.java new file mode 100644 index 0000000..0923399 --- /dev/null +++ b/src/main/java/com/github/jsonldjava/clerezza/ClerezzaJsonLdParsingProvider.java @@ -0,0 +1,65 @@ +package com.github.jsonldjava.clerezza; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.clerezza.rdf.core.MGraph; +import org.apache.clerezza.rdf.core.UriRef; +import org.apache.clerezza.rdf.core.serializedform.ParsingProvider; +import org.apache.clerezza.rdf.core.serializedform.SupportedFormat; +import org.apache.felix.scr.annotations.Component; +import org.apache.felix.scr.annotations.ConfigurationPolicy; +import org.apache.felix.scr.annotations.Service; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.github.jsonldjava.core.JsonLdError; +import com.github.jsonldjava.core.JsonLdProcessor; +import com.github.jsonldjava.utils.JsonUtils; + +/** + * A {@link org.apache.clerezza.rdf.core.serializedform.ParsingProvider} for + * JSON-LD (application/ld+json) based on the java-jsonld library + * + * @author Rupert Westenthaler + * + */ +@Component(immediate = true, policy = ConfigurationPolicy.OPTIONAL) +@Service +@SupportedFormat("application/ld+json") +public class ClerezzaJsonLdParsingProvider implements ParsingProvider { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Override + public void parse(MGraph target, InputStream serializedGraph, String formatIdentifier, + UriRef baseUri) { + // The callback will add parsed triples to the target MGraph + final ClerezzaTripleCallback ctc = new ClerezzaTripleCallback(); + ctc.setMGraph(target); + Object input; + int startSize = 0; + if (logger.isDebugEnabled()) { + startSize = target.size(); + } + final long start = System.currentTimeMillis(); + try { + input = JsonUtils.fromInputStream(serializedGraph, "UTF-8"); + } catch (final IOException e) { + logger.error("Unable to read from the parsed input stream", e); + throw new RuntimeException(e.getMessage(), e); + } + try { + JsonLdProcessor.toRDF(input, ctc); + } catch (final JsonLdError e) { + logger.error("Unable to parse JSON-LD from the parsed input stream", e); + throw new RuntimeException(e.getMessage(), e); + } + if (logger.isDebugEnabled()) { + logger.debug(" - parsed {} triples in {}ms", target.size() - startSize, + System.currentTimeMillis() - start); + } + } + +} + diff --git a/src/main/java/com/github/jsonldjava/clerezza/ClerezzaJsonLdSerializingProvider.java b/src/main/java/com/github/jsonldjava/clerezza/ClerezzaJsonLdSerializingProvider.java new file mode 100644 index 0000000..6a14084 --- /dev/null +++ b/src/main/java/com/github/jsonldjava/clerezza/ClerezzaJsonLdSerializingProvider.java @@ -0,0 +1,183 @@ +package com.github.jsonldjava.clerezza; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.nio.charset.Charset; +import java.util.Collections; +import java.util.Dictionary; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.clerezza.rdf.core.TripleCollection; +import org.apache.clerezza.rdf.core.serializedform.SerializingProvider; +import org.apache.clerezza.rdf.core.serializedform.SupportedFormat; +import org.apache.felix.scr.annotations.Activate; +import org.apache.felix.scr.annotations.Component; +import org.apache.felix.scr.annotations.ConfigurationPolicy; +import org.apache.felix.scr.annotations.Deactivate; +import org.apache.felix.scr.annotations.Property; +import org.apache.felix.scr.annotations.PropertyOption; +import org.apache.felix.scr.annotations.Service; +import org.osgi.service.component.ComponentContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.github.jsonldjava.core.JsonLdError; +import com.github.jsonldjava.core.JsonLdOptions; +import com.github.jsonldjava.core.JsonLdProcessor; +import com.github.jsonldjava.utils.JsonUtils; + +/** + * A {@link org.apache.clerezza.rdf.core.serializedform.SerializingProvider} for + * JSON-LD (application/ld+json) based on the java-jsonld library + * + * @author Rupert Westenthaler + */ +@Component(immediate = true, policy = ConfigurationPolicy.OPTIONAL) +@Service +@SupportedFormat(value = { "application/ld+json", "application/json" }) +public class ClerezzaJsonLdSerializingProvider implements SerializingProvider { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private static final Charset UTF8 = Charset.forName("UTF-8"); + + private static final String MODE_EXPAND = "expand"; + private static final String MODE_FLATTEN = "flatten"; + private static final String MODE_COMPACT = "compact"; + + @Property(value = "", options = { + @PropertyOption(value = "%mode.option.none", name = ""), + @PropertyOption(value = "%mode.option.flatten", name = "flatten"), + @PropertyOption(value = "%mode.option.compact", name = "compact"), + @PropertyOption(value = "%mode.option.expand", name = MODE_EXPAND) }) + private static final String PROP_MODE = "mode"; + + @Property(boolValue = false) + private static final String PROP_USE_RDF_TYPE = "useRdfTye"; + + @Property(boolValue = false) + private static final String PROP_USE_NATIVE_TYPES = "useNativeTypes"; + + @Property(boolValue = true) + private static final String PROP_PRETTY_PRINT = "prettyPrint"; + + // TODO: make configurable or read the whole prefix.cc list from a file and + // search for really used namespaces while parsing the triples in the + // ClerezzaRDFParser + private static Map DEFAULT_NAMESPACES; + static { + // core ontologies, top from prefixcc and some stanbol specific + final Map ns = new LinkedHashMap(); + // core schemas + ns.put("xsd", "http://www.w3.org/2001/XMLSchema#"); + ns.put("owl", "http://www.w3.org/2002/07/owl#"); + ns.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + ns.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); + // well known ontologies + ns.put("skos", "http://www.w3.org/2004/02/skos/core#"); + ns.put("geo", "http://www.w3.org/2003/01/geo/wgs84_pos#"); + ns.put("dc", "http://purl.org/dc/elements/1.1/"); + ns.put("foaf", "http://xmlns.com/foaf/0.1/"); + ns.put("ma", "http://www.w3.org/ns/ma-ont#"); + // big datasets + ns.put("dbo", "http://dbpedia.org/ontology/"); + ns.put("dbp", "http://dbpedia.org/property/"); + ns.put("yago", "http://yago-knowledge.org/resource/"); + ns.put("fb", "http://rdf.freebase.com/ns/"); + ns.put("geonames", "http://www.geonames.org/ontology#"); + // stanbol specific + ns.put("fise", "http://fise.iks-project.eu/ontology/"); + ns.put("enhancer", "http://stanbol.apache.org/ontology/enhancer/enhancer#"); + ns.put("entityhub", "http://stanbol.apache.org/ontology/entityhub/entityhub#"); + + DEFAULT_NAMESPACES = Collections.unmodifiableMap(ns); + } + + private JsonLdOptions opts = null; + private String mode; + + private boolean prettyPrint; + + @Override + public void serialize(OutputStream serializedGraph, TripleCollection tc, String formatIdentifier) { + final ClerezzaRDFParser serializer = new ClerezzaRDFParser(); + try { + final long start = System.currentTimeMillis(); + Object output = JsonLdProcessor.fromRDF(tc, serializer); + + if (MODE_EXPAND.equalsIgnoreCase(mode)) { + logger.debug(" - mode: {}", MODE_EXPAND); + output = JsonLdProcessor.expand(output, opts); + } + if (MODE_FLATTEN.equalsIgnoreCase(mode)) { + logger.debug(" - mode: {}", MODE_FLATTEN); + // TODO: Allow inframe config + final Object inframe = null; + output = JsonLdProcessor.flatten(output, inframe, opts); + } + if (MODE_COMPACT.equalsIgnoreCase(mode)) { + logger.debug(" - mode: {}", MODE_COMPACT); + // TODO: collect namespaces used in the triples in the + // ClerezzaRDFParser + final Map localCtx = new HashMap(); + localCtx.put("@context", DEFAULT_NAMESPACES); + output = JsonLdProcessor.compact(output, localCtx, opts); + } + final Writer writer = new OutputStreamWriter(serializedGraph, UTF8); + logger.debug(" - prettyPrint: {}", prettyPrint); + if (prettyPrint) { + JsonUtils.writePrettyPrint(writer, output); + } else { + JsonUtils.write(writer, output); + } + if (logger.isDebugEnabled()) { + logger.debug(" - serialized {} triples in {}ms", serializer.getCount(), + System.currentTimeMillis() - start); + } + } catch (final JsonLdError e) { + throw new RuntimeException(e.getMessage(), e); + } catch (final IOException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Activate + protected void activate(ComponentContext ctx) { + opts = new JsonLdOptions(); + @SuppressWarnings("unchecked") + final Dictionary config = ctx.getProperties(); + // boolean properties + opts.setUseRdfType(getState(config.get(PROP_USE_RDF_TYPE), false)); + opts.setUseNativeTypes(getState(config.get(PROP_USE_NATIVE_TYPES), false)); + prettyPrint = getState(config.get(PROP_PRETTY_PRINT), true); + // parse the string mode + final Object value = config.get(PROP_MODE); + mode = value == null ? null : value.toString(); + } + + @Deactivate + protected void deactivate(ComponentContext ctx) { + opts = null; + mode = null; + prettyPrint = false; + } + + /** + * @param value + */ + private boolean getState(Object value, boolean defaultState) { + if (value instanceof Boolean) { + return ((Boolean) value).booleanValue(); + } else if (value != null) { + return Boolean.parseBoolean(value.toString()); + } else { + return defaultState; + } + } + +} + diff --git a/src/main/java/com/github/jsonldjava/clerezza/ClerezzaRDFParser.java b/src/main/java/com/github/jsonldjava/clerezza/ClerezzaRDFParser.java new file mode 100644 index 0000000..7a27d10 --- /dev/null +++ b/src/main/java/com/github/jsonldjava/clerezza/ClerezzaRDFParser.java @@ -0,0 +1,113 @@ +package com.github.jsonldjava.clerezza; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.clerezza.rdf.core.BNode; +import org.apache.clerezza.rdf.core.Language; +import org.apache.clerezza.rdf.core.Literal; +import org.apache.clerezza.rdf.core.NonLiteral; +import org.apache.clerezza.rdf.core.PlainLiteral; +import org.apache.clerezza.rdf.core.Resource; +import org.apache.clerezza.rdf.core.Triple; +import org.apache.clerezza.rdf.core.TripleCollection; +import org.apache.clerezza.rdf.core.TypedLiteral; +import org.apache.clerezza.rdf.core.UriRef; + +import com.github.jsonldjava.core.JsonLdError; +import com.github.jsonldjava.core.JsonLdProcessor; +import com.github.jsonldjava.core.RDFDataset; +import com.github.jsonldjava.core.RDFParser; + +/** + * Converts a Clerezza {@link TripleCollection} to the {@link RDFDataset} used + * by the {@link JsonLdProcessor} + * + * @author Rupert Westenthaler + * + */ +public class ClerezzaRDFParser implements RDFParser { + + private static String RDF_LANG_STRING = "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"; + + private long count = 0; + + @Override + public RDFDataset parse(Object input) throws JsonLdError { + count = 0; + final Map bNodeMap = new HashMap(1024); + final RDFDataset result = new RDFDataset(); + if (input instanceof TripleCollection) { + for (final Triple t : ((TripleCollection) input)) { + handleStatement(result, t, bNodeMap); + } + } + bNodeMap.clear(); // help gc + return result; + } + + private void handleStatement(RDFDataset result, Triple t, Map bNodeMap) { + final String subject = getResourceValue(t.getSubject(), bNodeMap); + final String predicate = getResourceValue(t.getPredicate(), bNodeMap); + final Resource object = t.getObject(); + + if (object instanceof Literal) { + + final String value = ((Literal) object).getLexicalForm(); + final String language; + final String datatype; + if (object instanceof TypedLiteral) { + language = null; + datatype = getResourceValue(((TypedLiteral) object).getDataType(), bNodeMap); + } else if (object instanceof PlainLiteral) { + // we use RDF 1.1 literals so we do set the RDF_LANG_STRING + // datatype + datatype = RDF_LANG_STRING; + final Language l = ((PlainLiteral) object).getLanguage(); + if (l == null) { + language = null; + } else { + language = l.toString(); + } + } else { + throw new IllegalStateException("Unknown Literal class " + + object.getClass().getName()); + } + result.addTriple(subject, predicate, value, datatype, language); + count++; + } else { + result.addTriple(subject, predicate, getResourceValue((NonLiteral) object, bNodeMap)); + count++; + } + + } + + /** + * The count of processed triples (not thread save) + * + * @return the count of triples processed by the last {@link #parse(Object)} + * call + */ + public long getCount() { + return count; + } + + private String getResourceValue(NonLiteral nl, Map bNodeMap) { + if (nl == null) { + return null; + } else if (nl instanceof UriRef) { + return ((UriRef) nl).getUnicodeString(); + } else if (nl instanceof BNode) { + String bNodeId = bNodeMap.get(nl); + if (bNodeId == null) { + bNodeId = Integer.toString(bNodeMap.size()); + bNodeMap.put((BNode) nl, bNodeId); + } + return new StringBuilder("_:b").append(bNodeId).toString(); + } else { + throw new IllegalStateException("Unknwon NonLiteral type " + nl.getClass().getName() + + "!"); + } + } +} + diff --git a/src/main/java/com/github/jsonldjava/clerezza/ClerezzaTripleCallback.java b/src/main/java/com/github/jsonldjava/clerezza/ClerezzaTripleCallback.java index cc91d36..89ac284 100644 --- a/src/main/java/com/github/jsonldjava/clerezza/ClerezzaTripleCallback.java +++ b/src/main/java/com/github/jsonldjava/clerezza/ClerezzaTripleCallback.java @@ -20,6 +20,8 @@ public class ClerezzaTripleCallback implements JsonLdTripleCallback { + private static final String RDF_LANG_STRING = "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"; + private MGraph mGraph = new SimpleMGraph(); private Map bNodeMap = new HashMap(); @@ -51,12 +53,10 @@ private void triple(String s, String p, String value, String datatype, String la Resource object; if (language != null) { object = new PlainLiteralImpl(value, new Language(language)); + } else if (datatype == null || RDF_LANG_STRING.equals(datatype)) { + object = new PlainLiteralImpl(value); } else { - if (datatype != null) { - object = new TypedLiteralImpl(value, new UriRef(datatype)); - } else { - object = new PlainLiteralImpl(value); - } + object = new TypedLiteralImpl(value, new UriRef(datatype)); } mGraph.add(new TripleImpl(subject, predicate, object)); diff --git a/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.ParsingProvider b/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.ParsingProvider new file mode 100644 index 0000000..2473510 --- /dev/null +++ b/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.ParsingProvider @@ -0,0 +1 @@ +com.github.jsonldjava.clerezza.ClerezzaJsonLdParsingProvider \ No newline at end of file diff --git a/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider b/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider new file mode 100644 index 0000000..e65291c --- /dev/null +++ b/src/main/resources/META-INF/services/org.apache.clerezza.rdf.core.serializedform.SerializingProvider @@ -0,0 +1 @@ +com.github.jsonldjava.clerezza.ClerezzaJsonLdSerializingProvider \ No newline at end of file diff --git a/src/test/java/com/github/jsonldjava/clerezza/ClerezzaJsonLdParserSerializerTest.java b/src/test/java/com/github/jsonldjava/clerezza/ClerezzaJsonLdParserSerializerTest.java new file mode 100644 index 0000000..41bff24 --- /dev/null +++ b/src/test/java/com/github/jsonldjava/clerezza/ClerezzaJsonLdParserSerializerTest.java @@ -0,0 +1,91 @@ +package com.github.jsonldjava.clerezza; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.ServiceLoader; + +import org.apache.clerezza.rdf.core.Graph; +import org.apache.clerezza.rdf.core.Language; +import org.apache.clerezza.rdf.core.LiteralFactory; +import org.apache.clerezza.rdf.core.MGraph; +import org.apache.clerezza.rdf.core.Triple; +import org.apache.clerezza.rdf.core.UriRef; +import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl; +import org.apache.clerezza.rdf.core.impl.SimpleMGraph; +import org.apache.clerezza.rdf.core.impl.TripleImpl; +import org.apache.clerezza.rdf.core.serializedform.Parser; +import org.apache.clerezza.rdf.core.serializedform.Serializer; +import org.apache.clerezza.rdf.ontologies.FOAF; +import org.apache.clerezza.rdf.ontologies.RDF; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ClerezzaJsonLdParserSerializerTest { + + private Logger log = LoggerFactory.getLogger(getClass()); + + private static final Charset UTF8 = Charset.forName("UTF8"); + + private static Graph rdfData; + + /** + * Typical Clerezza Parser initialization. The JSON-LD serializing provider + * will be found by using the java {@link ServiceLoader} + */ + private Parser parser = Parser.getInstance(); + /** + * Typical Clerezza Serializer initialization. The JSON-LD serializing provider + * will be found by using the java {@link ServiceLoader} + */ + private Serializer serializer = Serializer.getInstance(); + + @BeforeClass + public static void init(){ + LiteralFactory lf = LiteralFactory.getInstance(); + UriRef pers1 = new UriRef("http://www.example.org/test#pers1"); + UriRef pers2 = new UriRef("http://www.example.org/test#pers2"); + MGraph data = new SimpleMGraph(); + //NOTE: This test a language literal with and without language as + // well as a xsd:string typed literal. To test correct handling of + // RDF1.1 + data.add(new TripleImpl(pers1, RDF.type, FOAF.Person)); + data.add(new TripleImpl(pers1, FOAF.name, new PlainLiteralImpl("Rupert Westenthaler", + new Language("de")))); + data.add(new TripleImpl(pers1, FOAF.nick, new PlainLiteralImpl("westei"))); + data.add(new TripleImpl(pers1, FOAF.mbox, lf.createTypedLiteral("rwesten@apache.org"))); + data.add(new TripleImpl(pers1, FOAF.age, lf.createTypedLiteral(38))); + data.add(new TripleImpl(pers1, FOAF.knows, pers2)); + data.add(new TripleImpl(pers2, FOAF.name, new PlainLiteralImpl("Reto Bachmann-Gmür"))); + rdfData = data.getGraph(); + } + + @Test + public void parserTest() { + final InputStream in = getClass().getClassLoader().getResourceAsStream( + "testfiles/product.jsonld"); + SimpleMGraph graph = new SimpleMGraph(); + parser.parse(graph, in, "application/ld+json"); + Assert.assertEquals(13, graph.size()); + } + @Test + public void serializerTest(){ + ByteArrayOutputStream out = new ByteArrayOutputStream(); + serializer.serialize(out, rdfData, "application/ld+json"); + byte[] data = out.toByteArray(); + log.info("Serialized Graph: \n {}",new String(data,UTF8)); + + //Now we reparse the graph to validate it was serialized correctly + SimpleMGraph reparsed = new SimpleMGraph(); + parser.parse(reparsed, new ByteArrayInputStream(data), "application/ld+json"); + Assert.assertEquals(7, reparsed.size()); + for(Triple t : rdfData){ + Assert.assertTrue(reparsed.contains(t)); + } + + } +}