This repository has been archived by the owner on Jun 23, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 8
/
RdfModelFileWriter.java
147 lines (127 loc) · 4.39 KB
/
RdfModelFileWriter.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/* Copyright 2013 Pascal Christoph, hbz.
* Licensed under the Eclipse Public License 1.0 */
package org.lobid.lodmill;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.util.NoSuchElementException;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.RDFLanguages;
import org.culturegraph.mf.exceptions.MetafactureException;
import org.culturegraph.mf.framework.DefaultObjectReceiver;
import org.culturegraph.mf.framework.annotations.Description;
import org.culturegraph.mf.framework.annotations.In;
import org.culturegraph.mf.framework.annotations.Out;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.hp.hpl.jena.rdf.model.Model;
/**
* A sink, writing triples into files. The filenames are constructed from the
* literal of an given property.
*
* @author Pascal Christoph
*/
@Description("Writes the object value of an RDF model into a file. Default serialization is 'NTRIPLES'. The filename is "
+ "constructed from the literal of an given property (recommended properties are identifier)."
+ " Variable are " + "- 'target' (determining the output directory)"
+ "- 'property' (the property in the RDF model. The object of this property"
+ " will be the main part of the file's name.) "
+ "- 'startIndex' ( a subfolder will be extracted out of the filename. This marks the index' beginning )"
+ "- 'stopIndex' ( a subfolder will be extracted out of the filename. This marks the index' end )"
+ "- 'serialization (e.g. one of 'NTRIPLES', 'TURTLE', 'RDFXML','RDFJSON'")
@In(Model.class)
@Out(Void.class)
public final class RdfModelFileWriter extends DefaultObjectReceiver<Model>
implements FilenameExtractor, RDFSink {
private static final Logger LOG =
LoggerFactory.getLogger(RdfModelFileWriter.class);
private FilenameUtil filenameUtil = new FilenameUtil();
private Lang serialization;
/**
* Default constructor
*/
public RdfModelFileWriter() {
setProperty("http://purl.org/dc/terms/identifier");
setFileSuffix("nt");
setSerialization("NTRIPLES");
}
@Override
public String getEncoding() {
return filenameUtil.encoding;
}
@Override
public void setEncoding(final String encoding) {
filenameUtil.encoding = encoding;
}
@Override
public void setTarget(final String target) {
filenameUtil.target = target;
}
@Override
public void setProperty(final String property) {
filenameUtil.property = property;
}
@Override
public void setFileSuffix(final String fileSuffix) {
filenameUtil.fileSuffix = fileSuffix;
}
@Override
public void setStartIndex(final int startIndex) {
filenameUtil.startIndex = startIndex;
}
@Override
public void setEndIndex(final int endIndex) {
filenameUtil.endIndex = endIndex;
}
@Override
public void setSerialization(final String serialization) {
this.serialization = RDFLanguages.nameToLang(serialization);
}
@Override
public void process(final Model model) {
String identifier = null;
try {
identifier =
model
.listObjectsOfProperty(
model.createProperty(filenameUtil.property))
.next().toString();
LOG.debug("Going to store identifier=" + identifier);
} catch (NoSuchElementException e) {
LOG.warn(
"No identifier => cannot derive a filename for " + model.toString());
return;
}
String directory = identifier;
if (directory.length() >= filenameUtil.endIndex) {
directory =
directory.substring(filenameUtil.startIndex, filenameUtil.endIndex);
}
final String file = FilenameUtils.concat(filenameUtil.target,
FilenameUtils.concat(directory + File.separator,
identifier + "." + filenameUtil.fileSuffix));
LOG.debug("Write to " + file);
ensurePathExists(file);
try {
final Writer writer = new OutputStreamWriter(new FileOutputStream(file),
filenameUtil.encoding);
final StringWriter tripleWriter = new StringWriter();
RDFDataMgr.write(tripleWriter, model, this.serialization);
IOUtils.write(tripleWriter.toString(), writer);
writer.close();
} catch (IOException e) {
e.printStackTrace();
throw new MetafactureException(e);
}
}
private static void ensurePathExists(final String path) {
final File parent = new File(path).getAbsoluteFile().getParentFile();
parent.mkdirs();
}
}