-
Notifications
You must be signed in to change notification settings - Fork 4
/
ImproveSPOWithMarginalsModule.java
402 lines (338 loc) · 16.3 KB
/
ImproveSPOWithMarginalsModule.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
package cz.cvut.spipes.modules;
import cz.cvut.spipes.ModelLogger;
import cz.cvut.spipes.constants.KBSS_MODULE;
import cz.cvut.spipes.engine.ExecutionContext;
import cz.cvut.spipes.engine.ExecutionContextFactory;
import cz.cvut.spipes.engine.VariablesBinding;
import cz.cvut.spipes.modules.annotations.SPipesModule;
import cz.cvut.spipes.tdb.TDBTempFactory;
import cz.cvut.spipes.util.JenaUtils;
import cz.cvut.spipes.util.QueryUtils;
import static cz.cvut.spipes.util.VariableBindingUtils.restrict;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.query.QueryParseException;
import org.apache.jena.query.QuerySolution;
import org.apache.jena.query.QuerySolutionMap;
import org.apache.jena.query.ResultSet;
import org.apache.jena.query.Syntax;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.util.FileUtils;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* TODO Order of queries is not enforced.
*/
@SPipesModule(label = "improve spo with marginals", comment = "Constructs improved spo-summary descriptor with knowledge " +
"of provided marginals of weakly described resources. This module expects as an input graph computed spo-summary " +
"patterns (or possibly whole spo-summary descriptor) compliant with data provided in ?data-service-url. Within the " +
"input graph it identifies 'breakable patterns', i.e. spo-summary patterns that can be improved with knowledge of " +
"marginals computed in ?marginals-defs-file-url. The output of the module is a spo-summary descriptor that contains " +
"original spo-summary patterns whenever possible and new spo-summary patterns that were created with additional " +
"knowledge of marginals.")
public class ImproveSPOWithMarginalsModule extends AnnotatedAbstractModule {
private static final Logger LOG = LoggerFactory.getLogger(ImproveSPOWithMarginalsModule.class);
private static final String MODULE_ID = "improve-spo-with-marginals";
private static final String TYPE_URI = KBSS_MODULE.uri + MODULE_ID;
private static final String TYPE_PREFIX = TYPE_URI + "/";
private static Map<String, Model> marginalDefsModelCache = new HashMap<>();
@Parameter(urlPrefix = TYPE_PREFIX, name = "marginal-constraint", comment = "Marginal constraint")
private String marginalConstraint;
@Parameter(urlPrefix = TYPE_PREFIX, name = "marginals-defs-file-url", comment = "Marginal definitions file url")
private String marginalsDefsFileUrl;
@Parameter(urlPrefix = TYPE_PREFIX, name = "marginals-file-url", comment = "Marginals file url")
private String marginalsFileUrl;
@Parameter(urlPrefix = TYPE_PREFIX, name = "data-service-url", comment = "Data service url")
private String dataServiceUrl;
private static final String VAR_EXECUTION_ID = "executionId";
private static Query parseQuery(String queryStr) {
Query query = QueryFactory.create();
return QueryFactory.parse(
query,
queryStr,
"",
Syntax.syntaxSPARQL_11);
}
private static Model loadModelFromTemporaryFile(String fileName) {
String filePath = "/home/blcha/projects/gacr/git/16gacr-model/descriptor/marginals/" + fileName;
try {
return ModelFactory.createDefaultModel().read(new FileInputStream(filePath), "", FileUtils.langTurtle);
} catch (FileNotFoundException e) {
e.printStackTrace();
throw new IllegalStateException(e);
}
}
@Override
ExecutionContext executeSelf() {
final ModelLogger mLOG = new ModelLogger(MODULE_ID, LOG);
final Model inputModel = executionContext.getDefaultModel();
final VariablesBinding inputVB = executionContext.getVariablesBinding();
LOG.debug("Retrieving relevant snapshots ...");
// TODO it should be taken from parameter somehow
Model relevantSnapshotsModel = retrieveRelevantSnapshots(executionContext.getVariablesBinding());
mLOG.trace("relevant-snapshots", relevantSnapshotsModel);
LOG.debug("Loading marginals ...");
Model marginalsModel = loadModelFromFile(marginalsFileUrl);
mLOG.trace("marginals", marginalsModel);
LOG.debug("Loading marginal definitions ...");
Model marginalDefsModel = loadModelFromFile(marginalsDefsFileUrl);
mLOG.trace("marginal-defs", marginalDefsModel);
Model marginalsWithDefsModel = ModelFactory.createUnion(
relevantSnapshotsModel,
ModelFactory.createUnion(marginalsModel, marginalDefsModel)
);
String spoPatternDataQueryTemplate = loadQueryStringFromFile("/get-spo-pattern-data.rq");
spoPatternDataQueryTemplate = QueryUtils.substituteMarkers("MARGINAL_CONSTRAINT", marginalConstraint, spoPatternDataQueryTemplate);
ResultSet breakablePatternsRS = QueryUtils.execSelect(
loadQueryFromFile("/get-breakable-patterns.rq"),
inputModel,
new QuerySolutionMap()
);
Model brakedPatternsOutputModel = ModelFactory.createDefaultModel();
int i = 0;
while (breakablePatternsRS.hasNext()) { // TODO !? bindings -> group of bindings
i++;
QuerySolution bp = breakablePatternsRS.next();
if (!bp.get("pattern").toString().equals("http://onto.fel.cvut.cz/ontologies/dataset-descriptor/spo-summary/di-b364a27fa33296eb3bb27bfa9b3cab9c")) {
continue;
}
// get pattern data
Model patternDataModel = getPatternData(breakablePatternsRS, spoPatternDataQueryTemplate);
mLOG.trace("pattern-data-" + i, patternDataModel);
// extract appropriate marginals
Model marginalTypesModel = computeMarginalTypesModel(patternDataModel, marginalsWithDefsModel);
mLOG.trace("marginal-types-" + i, marginalTypesModel);
Model spoPatternDataWithMarginalsModel = ModelFactory.createUnion(patternDataModel, marginalTypesModel);
mLOG.trace("pattern-data-with-marginals-" + i, spoPatternDataWithMarginalsModel);
Model spoWithWeight = computeSPOWithWeight(
spoPatternDataWithMarginalsModel,
restrict(inputVB, VAR_EXECUTION_ID)
);
mLOG.trace("spo-pattern-with-weight-" + i, spoWithWeight);
Model spoWithSnapshots = computeSPOWithSnapshots(
spoPatternDataWithMarginalsModel,
restrict(inputVB, VAR_EXECUTION_ID)
);
mLOG.trace("spo-pattern-with-snapshosts-" + i, spoWithSnapshots);
Model brokenPatternModel = ModelFactory.createUnion(spoWithWeight, spoWithSnapshots);
mLOG.trace("broken-pattern-" + i, brokenPatternModel);
brakedPatternsOutputModel.add(brokenPatternModel);
}
Model nonBreakablePatternsModel = getNonBreakablePatterns(inputModel);
mLOG.trace("non-breakable-patterns", nonBreakablePatternsModel);
Model mergedPatternsModel = mergePatterns(ModelFactory.createUnion(brakedPatternsOutputModel, nonBreakablePatternsModel));
mLOG.trace("merged-patterns", mergedPatternsModel);
Model dataSourcesModel = getDatasources(mergedPatternsModel);
mLOG.trace("datasources", dataSourcesModel);
Model outputModel = JenaUtils.createUnion(mergedPatternsModel, dataSourcesModel);
return ExecutionContextFactory.createContext(outputModel);
}
private Model retrieveRelevantSnapshots(VariablesBinding variablesBinding) {
return QueryUtils.execConstruct(
loadQueryFromFile("/get-relevant-snapshots.rq"),
ModelFactory.createDefaultModel(),
variablesBinding.asQuerySolution()
);
}
private Model getDatasources(Model spoModel) {
return QueryUtils.execConstruct(
loadQueryFromFile("/get-datasources.rq"),
spoModel,
new QuerySolutionMap()
);
}
private Model mergePatterns(Model patternsModel) {
return QueryUtils.execConstruct(
loadQueryFromFile("/merge-spo-patterns.rq"),
patternsModel,
new QuerySolutionMap()
);
}
private Model getNonBreakablePatterns(Model patternsModel) {
return QueryUtils.execConstruct(
loadQueryFromFile("/get-non-breakable-patterns.rq"),
patternsModel,
new QuerySolutionMap()
);
}
private Model getPatternData(ResultSet breakablePatternsRS, String spoPatternDataQueryTemplate) {
// substitute values
String valuesStr = QueryUtils.nextResultsToValuesClause(breakablePatternsRS, 1);
LOG.debug("Executing query to download patterns data with values: \n{}", valuesStr);
String patternDataQueryStr = QueryUtils.substituteMarkers("VALUES", valuesStr, spoPatternDataQueryTemplate);
Query patternDataQuery = parseQuery(patternDataQueryStr);
VariablesBinding variablesBinding = new VariablesBinding("dataServiceUrl", ResourceFactory.createResource(dataServiceUrl));
LOG.trace("Pattern data query:\n" + patternDataQueryStr);
LOG.trace("Variables bindings:" + variablesBinding);
Model patternDataModel = TDBTempFactory.createTDBModel();
patternDataModel = QueryUtils.execConstruct(// TODO !? implement scrollable cursor
patternDataQuery,
ModelFactory.createDefaultModel(),
variablesBinding.asQuerySolution(),
patternDataModel);
return patternDataModel;
}
private Model computeSPOWithWeight(Model spoPatternDataWithMarginalsModel, VariablesBinding variablesBinding) {
LOG.debug("Computing SPO with weight for pattern data with marginals ...");
Model spoModel = QueryUtils.execConstruct(
loadQueryFromFile("/compute-spo-with-weight.rq"),
spoPatternDataWithMarginalsModel,
variablesBinding.asQuerySolution()
);
return spoModel;
}
private Model computeSPOWithSnapshots(Model spoPatternDataWithMarginalsModel, VariablesBinding variablesBinding) {
LOG.debug("Computing SPO with snapshots for pattern data with marginals ...");
Model spoModel = QueryUtils.execConstruct(
loadQueryFromFile("/compute-spo-with-snapshots.rq"),
spoPatternDataWithMarginalsModel,
variablesBinding.asQuerySolution()
);
return spoModel;
}
private Model computeMarginalTypesModel(Model patternDataModel, Model marginalWithDefsModel) {
LOG.debug("Executing query to get typed marginals ...");
Model marginalTypesModel = QueryUtils.execConstruct(
loadQueryFromFile("/get-marginal-types.rq"),
ModelFactory.createUnion(patternDataModel, marginalWithDefsModel),
new QuerySolutionMap()
);
return marginalTypesModel;
}
private Map<String, Set<String>> buildMarginal2TypeMap(Model marginalDefsModel) {
ResultSet rs = QueryUtils.execSelect(
loadQueryFromFile("/get-marginal-types-simple.rq"),
marginalDefsModel,
null);
Map<String, Set<String>> map = new HashMap<>();
rs.forEachRemaining(
r -> {
Set<String> typeSet = map.putIfAbsent(r.getResource("resource").getURI(), new HashSet<>());
typeSet.add(r.getResource("resourceType").getURI());
}
);
return map;
}
public String getMarginalConstraint() {
return marginalConstraint;
}
public void setMarginalConstraint(String marginalConstraint) {
this.marginalConstraint = marginalConstraint;
}
public String getMarginalsDefsFileUrl() {
return marginalsDefsFileUrl;
}
public void setMarginalsDefsFileUrl(String marginalsDefsFileUrl) {
this.marginalsDefsFileUrl = marginalsDefsFileUrl;
}
public String getDataServiceUrl() {
return dataServiceUrl;
}
public void setDataServiceUrl(String dataServiceUrl) {
this.dataServiceUrl = dataServiceUrl;
}
// TODO fix ad-hoc cache through static field
private Model loadModelFromFile(String marginalsFilePath) {
// clear cache if too big
if (marginalDefsModelCache.size() > 10) {
marginalDefsModelCache.clear();
}
String key = computeFileContentHashKey(marginalsFilePath);
Model cachedModel = marginalDefsModelCache.get(key);
if (cachedModel == null) {
cachedModel = TDBTempFactory.createTDBModel();
cachedModel.read(marginalsFilePath);
marginalDefsModelCache.put(key, cachedModel);
} else {
LOG.debug("Using cached model of file {}", marginalsFilePath);
}
return cachedModel;
}
private String computeFileContentHashKey(String filePath) {
try {
return filePath + " -- " + Files.getLastModifiedTime(Paths.get(URI.create(filePath))).toMillis();
} catch (IOException e) {
LOG.warn("Could not access file from path " + filePath + ": " + e);
throw new IllegalArgumentException("Could not access modification time of file from path " + filePath, e);
}
}
private String loadQueryStringFromFile(String resourcePath) {
String queryString;
try {
queryString = FileUtils.readWholeFileAsUTF8(
getClass().getResourceAsStream(resourcePath));
} catch (IOException e) {
throw new IllegalStateException("Could not load query from resource path " + resourcePath, e);
}
return queryString;
}
private Query loadQueryFromFile(String resourcePath) {
Query query = QueryFactory.create();
try {
QueryFactory.parse(
query,
loadQueryStringFromFile(resourcePath),
"",
Syntax.syntaxSPARQL_11);
} catch (QueryParseException e) {
throw new IllegalStateException("Could not parse query from resource path " + resourcePath, e);
}
return query;
}
private void filterRelevantMarginalDefs() {
}
private String getFilePrefix() {
String id = dataServiceUrl.replaceAll("^https://", "s-").replaceAll("^http://", "").replaceAll("[^a-zA-Z0-9-]", "_");
String directoryPath = "/home/blcha/projects/gacr/git/16gacr-model/descriptor/marginals/" + id;
File directory = new File(directoryPath);
directory.mkdir();
return directoryPath + '/';
}
private void saveModelToTemporaryFile(Model model, String fileName) {
String filePath = getFilePrefix() + fileName;
try {
LOG.debug("Saving model to temporary file " + filePath + " ...");
model.write(new FileOutputStream(filePath), FileUtils.langTurtle);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public String getTypeURI() {
return TYPE_URI;
}
@Override
public void loadConfiguration() {
super.loadConfiguration();
marginalConstraint = getEffectiveStringValue(TYPE_PREFIX + "marginal-constraint");
marginalsDefsFileUrl = getEffectiveStringValue(TYPE_PREFIX + "marginals-defs-file-url");
marginalsFileUrl = getEffectiveStringValue(TYPE_PREFIX + "marginals-file-url");
dataServiceUrl = getEffectiveStringValue(TYPE_PREFIX + "data-service-url");
}
private @NotNull
String getEffectiveStringValue(String propertyUrl) {
RDFNode value = getEffectiveValue(ResourceFactory.createProperty(propertyUrl));
if (value == null) {
throw new RuntimeException(
String.format("Module's parameter '%s' returned value 'null' which is not allowed.", propertyUrl)
);
}
return getEffectiveValue(ResourceFactory.createProperty(propertyUrl)).asLiteral().toString();
}
}