-
Notifications
You must be signed in to change notification settings - Fork 4
/
RDF2CSVModule.java
139 lines (117 loc) · 5.77 KB
/
RDF2CSVModule.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
package cz.cvut.spipes.modules;
import cz.cvut.spipes.constants.CSVW;
import cz.cvut.spipes.constants.KBSS_CSVW;
import cz.cvut.spipes.constants.KBSS_MODULE;
import cz.cvut.spipes.engine.ExecutionContext;
import cz.cvut.spipes.engine.ExecutionContextFactory;
import cz.cvut.spipes.modules.annotations.SPipesModule;
import org.apache.jena.rdf.model.*;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.RDFFormat;
import org.apache.jena.vocabulary.RDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.supercsv.io.CsvListWriter;
import org.supercsv.prefs.CsvPreference;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
/**
* Module for converting RDF (representing table) to CSV
* <p>
* The module is responsible for converting the input RDF data into a CSV format and saving the output to a file.
* The table is constructed from column and row resources defined in TableSchema and saves it as a new CSV file.
* </p>
*/
@SPipesModule(label = "RDF2CSV", comment = "Module for converting RDF (representing table) to CSV. " +
"The module is responsible for converting the input RDF data into a CSV format and saving the output to a file." +
"The table is constructed from column and row resources defined in TableSchema and saves it as a new CSV file.")
public class RDF2CSVModule extends AnnotatedAbstractModule {
public static final String TYPE_URI = KBSS_MODULE.uri + "RDF2CSV";
public static final String TYPE_PREFIX = TYPE_URI + "/";
private static final Logger LOG = LoggerFactory.getLogger(RDF2CSVModule.class);
@Parameter(urlPrefix = TYPE_PREFIX, name = "file-output-path",
comment = "The parameter representing where the output file will be stored.")
private String fileOutputPath;
@Override
ExecutionContext executeSelf(){
Model inputRDF = this.getExecutionContext().getDefaultModel();
try(CsvListWriter simpleWriter = new CsvListWriter
(new FileWriter(fileOutputPath, false),
CsvPreference.STANDARD_PREFERENCE)
){
Resource table = inputRDF.listResourcesWithProperty(RDF.type, CSVW.Table)
.next();
if (table == null) {
LOG.warn("No Table resource found in the input RDF.");
return ExecutionContextFactory.createContext(inputRDF);
}
Resource tableSchema = table.getProperty(CSVW.tableSchema).getObject().asResource();
if (tableSchema == null) {
LOG.warn("No TableSchema resource found in the input RDF.");
return ExecutionContextFactory.createContext(inputRDF);
}
Statement columnsStatement = tableSchema.getProperty(CSVW.columns);
if (columnsStatement == null) {
LOG.warn("Columns statement not found in the table schema.");
return ExecutionContextFactory.createContext(inputRDF);
}
Resource columnsList = columnsStatement.getObject().asResource();
RDFList columns = columnsList.as(RDFList.class);
if (columns == null || columns.isEmpty()) {
LOG.warn("Columns list not found or is empty in the columns statement.");
return ExecutionContextFactory.createContext(inputRDF);
}
List<String> header = columns.asJavaList().stream()
.map(rdfNode -> {
Resource columnResource = rdfNode.asResource();
Statement nameStatement = columnResource.getProperty(CSVW.name);
if (nameStatement == null) {
LOG.warn("Name property not found for column resource.");
return "";
}
RDFNode titleNode = nameStatement.getObject();
if (titleNode == null) {
LOG.warn("Name node not found in the name statement.");
return "";
}
return titleNode.toString();
})
.collect(Collectors.toList());
simpleWriter.write(header);
List<RDFNode> rowList = table.listProperties(CSVW.row)
.mapWith(Statement::getObject)
.toList();
rowList.sort(Comparator.comparingInt(o -> o.asResource().getProperty(CSVW.rowNum).getInt()));
for (RDFNode node: rowList){
List<String> row = new ArrayList<>();
Resource rowResource = node.asResource();
Resource res = rowResource.getProperty(CSVW.describes).getObject().asResource();
for (RDFNode col : columns.asJavaList()) {
Property property = inputRDF.getProperty(col.asResource().getProperty(KBSS_CSVW.property).getObject().toString());
row.add(res.hasProperty(property) ? getObjectValueFromStatement(res.getProperty(property)) : "");
}
simpleWriter.write(row);
}
}catch (IOException e){
e.printStackTrace();
}
return ExecutionContextFactory.createContext(inputRDF);
}
@Override
public String getTypeURI() {
return TYPE_URI;
}
private String getObjectValueFromStatement(Statement st){
if (st == null) return "";
RDFNode node = st.getObject();
if(node == null) return "";
return node.isLiteral()
? Optional.ofNullable(node.asNode().getLiteralValue().toString()).orElse("")
: Optional.ofNullable(node.toString()).orElse("");
}
}