-
Notifications
You must be signed in to change notification settings - Fork 4
/
ImportE5XModule.java
193 lines (153 loc) · 8.32 KB
/
ImportE5XModule.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
package cz.cvut.spipes.modules;
import cz.cvut.kbss.commons.io.NamedStream;
import cz.cvut.kbss.eccairs.report.e5xml.E5XMLLoader;
import cz.cvut.kbss.eccairs.report.e5xml.e5x.E5XXMLParser;
import cz.cvut.kbss.eccairs.report.model.EccairsReport;
import cz.cvut.kbss.eccairs.report.model.dao.EccairsReportDao;
import cz.cvut.kbss.eccairs.schema.dao.SingeltonEccairsAccessFactory;
import cz.cvut.kbss.jopa.model.EntityManager;
import cz.cvut.kbss.jopa.model.EntityManagerFactory;
import cz.cvut.kbss.jopa.model.descriptors.Descriptor;
import cz.cvut.kbss.jopa.model.descriptors.EntityDescriptor;
import cz.cvut.kbss.ucl.MappingEccairsData2Aso;
import cz.cvut.spipes.constants.KBSS_MODULE;
import cz.cvut.spipes.engine.ExecutionContext;
import cz.cvut.spipes.engine.ExecutionContextFactory;
import cz.cvut.spipes.exception.ResourceNotFoundException;
import cz.cvut.spipes.modules.annotations.SPipesModule;
import cz.cvut.spipes.modules.eccairs.EccairsAccessFactory;
import cz.cvut.spipes.modules.eccairs.JopaPersistenceUtils;
import cz.cvut.spipes.modules.eccairs.SesameDataDao;
import cz.cvut.spipes.registry.StreamResource;
import cz.cvut.spipes.registry.StreamResourceRegistry;
import cz.cvut.spipes.util.JenaUtils;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.util.FileUtils;
import org.eclipse.rdf4j.repository.Repository;
import org.eclipse.rdf4j.repository.RepositoryException;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
@SPipesModule(label = "import e5x", comment = "Convert e5x xml files to rdf.")
public class ImportE5XModule extends AbstractModule {
private static final Logger LOG = LoggerFactory.getLogger(ImportE5XModule.class);
// TODO - this parameter id defined with IRI <http://onto.fel.cvut.cz/ontologies/lib/module-param/has-resource-uri> in s-pipes-modules\module.sms.ttl
// TODO - we should be able to annotate directly "StreamResource e5xResource" instead
@Parameter(name = "has-resource-uri", comment = "Uri of a resource referencing content of an e5x file.")
private String e5xResourceUriStr;
StreamResource e5xResource;
private boolean computeEccairsToAviationSafetyOntologyMapping = true;
@Override
ExecutionContext executeSelf() {
ExecutionContext outputExecutionContext = ExecutionContextFactory.createEmptyContext();
// get e5x resource
// create eccairs schema factory
SingeltonEccairsAccessFactory eaf = EccairsAccessFactory.getInstance();
// create entity manager factory with a memory non-persistent sesame repository
EntityManagerFactory emf = JopaPersistenceUtils.createEntityManagerFactoryWithMemoryStore();
EccairsReport r = null;
final NamedStream e5xResourceStream = new NamedStream(e5xResource.getUri()+".e5x", new ByteArrayInputStream(e5xResource.getContent()));
try {
if ("text/xml".equals(e5xResource.getContentType()) || "application/xml".equals(e5xResource.getContentType())) {
LOG.debug("File considered XML (Content Type: {})", e5xResource.getContentType());
LOG.debug("- content length: {}, content (as string) : ", e5xResource.getContent().length, new String(e5xResource.getContent()));
// create factory to parse eccairs values
final E5XXMLParser e5xXMLParser = new E5XXMLParser(eaf);
e5xXMLParser.parseDocument(e5xResourceStream);
r = e5xXMLParser.getReport();
} else if ("application/zip".equals(e5xResource.getContentType()) || "application/octet-stream".equals(e5xResource.getContentType()) || e5xResource.getContentType() == null || e5xResource.getContentType().isEmpty()) {
LOG.debug("File considered ZIP (Content Type: {})", e5xResource.getContentType());
LOG.debug("- content length: {}, content (as byte array): {}",e5xResource.getContent().length, Arrays.toString(e5xResource.getContent()));
// ZIP by default
final E5XMLLoader loader = new E5XMLLoader(e5xResourceStream, eaf);
LOG.debug("- loader created based on resource stream name:{}, email:{}, stream:{}, closed: {}", e5xResourceStream.getName(), e5xResourceStream.getEmailId(), e5xResourceStream.getContent(), e5xResourceStream.isCloased());
EccairsReport[] s = loader.loadData().toArray(EccairsReport[]::new);
LOG.debug("- found {} reports", s.length);
if ( s.length > 0 ) {
r = s[0];
}
} else {
LOG.debug("Unsupported Content Type {}", e5xResource.getContentType());
return outputExecutionContext;
}
if ( r == null ) {
LOG.debug("No report parsed, terminating.");
return outputExecutionContext;
}
String reportContext = EccairsReport.createContextURI(e5xResource.getUri());
r.setUri(reportContext);
Descriptor d = new EntityDescriptor(URI.create(reportContext));
EntityManager em = emf.createEntityManager();
EccairsReportDao dao = new EccairsReportDao(em);
// persisting the parsed report
em.getTransaction().begin();
dao.safePersist(r, d);
em.getTransaction().commit();// the transanction needs to be commited. The updates operate on the persisted report.
if (computeEccairsToAviationSafetyOntologyMapping) {
// create the class for the mappings between eccairs and aso
MappingEccairsData2Aso mapping = new MappingEccairsData2Aso(eaf);
em.getTransaction().begin();
mapping.mapReport(r, em, d.toString());
em.getTransaction().commit();
}
// em.getTransaction().begin();
// r = em.find(EccairsReport.class, r.getUri());
// em.remove(r);
// em.getTransaction().commit();
Repository sesameRepo = JopaPersistenceUtils.getRepository(em);
String transformedModelText = SesameDataDao.getRepositoryData(sesameRepo, URI.create(reportContext));
Model outputModel = JenaUtils.readModelFromString(transformedModelText, FileUtils.langXML);
removeDefaultPrefix(outputModel);
outputExecutionContext = ExecutionContextFactory.createContext(outputModel);
sesameRepo.getConnection().close();
sesameRepo.shutDown();
} catch (IOException e) {
LOG.warn("An exception occurred during report processing.", e);
} catch (RepositoryException e) {
LOG.warn("Failed to close sesame repository connection", e);
}
return outputExecutionContext;
}
private void removeDefaultPrefix(Model outputModel) {
outputModel.removeNsPrefix("");
}
@Override
public String getTypeURI() {
return KBSS_MODULE.getURI() + "import-e5x";
}
@Override
public void loadConfiguration() {
e5xResourceUriStr = getEffectiveValue(KBSS_MODULE.has_resource_uri).asLiteral().toString();
e5xResource = getResourceByUri(e5xResourceUriStr);
}
public String getE5xResourceUri() {
return e5xResource.getUri();
}
public StreamResource getE5xResource() {
return e5xResource;
}
public void setE5xResourceUri(String e5xResourceUri) {
e5xResource = getResourceByUri(e5xResourceUri);
}
public void setE5xResource(@NotNull StreamResource e5xResource) {
this.e5xResource = e5xResource;
}
@NotNull
private StreamResource getResourceByUri(@NotNull String e5xResourceUriStr) {
StreamResource res = StreamResourceRegistry.getInstance().getResourceByUrl(e5xResourceUriStr);
if (res == null) {
throw new ResourceNotFoundException("Stream resource " + e5xResourceUriStr + " not found. ");
}
return res;
}
public boolean isComputeEccairsToAviationSafetyOntologyMapping() {
return computeEccairsToAviationSafetyOntologyMapping;
}
public void setComputeEccairsToAviationSafetyOntologyMapping(boolean computeEccairsToAviationSafetyOntologyMapping) {
this.computeEccairsToAviationSafetyOntologyMapping = computeEccairsToAviationSafetyOntologyMapping;
}
}