diff --git a/owlsim-core/pom.xml b/owlsim-core/pom.xml
index dab2628..ad5b5ff 100644
--- a/owlsim-core/pom.xml
+++ b/owlsim-core/pom.xml
@@ -189,7 +189,7 @@
org.prefixcommons
curie-util
- 0.0.1
+ 0.0.2
diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OWLLoader.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OWLLoader.java
index 0c3fa36..c213399 100644
--- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OWLLoader.java
+++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OWLLoader.java
@@ -1,15 +1,6 @@
package org.monarchinitiative.owlsim.io;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.HashMap;
-import java.util.List;
-import java.util.zip.GZIPInputStream;
-
+import com.google.common.base.Preconditions;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.monarchinitiative.owlsim.kb.BMKnowledgeBase;
@@ -17,23 +8,22 @@
import org.prefixcommons.CurieUtil;
import org.semanticweb.elk.owlapi.ElkReasonerFactory;
import org.semanticweb.owlapi.apibinding.OWLManager;
-import org.semanticweb.owlapi.model.IRI;
-import org.semanticweb.owlapi.model.OWLClassAssertionAxiom;
-import org.semanticweb.owlapi.model.OWLDataFactory;
-import org.semanticweb.owlapi.model.OWLOntology;
-import org.semanticweb.owlapi.model.OWLOntologyCreationException;
-import org.semanticweb.owlapi.model.OWLOntologyManager;
+import org.semanticweb.owlapi.model.*;
import org.semanticweb.owlapi.reasoner.OWLReasoner;
import org.semanticweb.owlapi.reasoner.OWLReasonerFactory;
-import com.google.common.base.Preconditions;
+import java.io.*;
+import java.util.HashMap;
+import java.util.List;
+import java.util.zip.GZIPInputStream;
/**
* Object for loading OWL ontologies into a {@link BMKnowledgeBase}
*
* Note that a KB consists of classes and individuals, both of which can be
* loaded from an ontology
- *
+ *
+ * @deprecated - Use the OwlKnowledgeBase.loader() instead.
* @author cjm
*
*/
@@ -52,7 +42,7 @@
* @return OWL Ontology
* @throws OWLOntologyCreationException
*/
- public OWLOntology loadOWL(IRI iri) throws OWLOntologyCreationException {
+ private OWLOntology loadOWL(IRI iri) throws OWLOntologyCreationException {
return getOWLOntologyManager().loadOntology(iri);
}
@@ -61,7 +51,7 @@ public OWLOntology loadOWL(IRI iri) throws OWLOntologyCreationException {
* @return OWL Ontology
* @throws OWLOntologyCreationException
*/
- public OWLOntology loadOWL(File file) throws OWLOntologyCreationException {
+ private OWLOntology loadOWL(File file) throws OWLOntologyCreationException {
IRI iri = IRI.create(file);
return getOWLOntologyManager().loadOntologyFromOntologyDocument(iri);
}
@@ -73,7 +63,7 @@ public OWLOntology loadOWL(File file) throws OWLOntologyCreationException {
* @return OWL Ontology
* @throws OWLOntologyCreationException
*/
- public OWLOntology loadOWL(String path) throws OWLOntologyCreationException {
+ private OWLOntology loadOWL(String path) throws OWLOntologyCreationException {
if (path.startsWith("http")) {
return loadOWL(IRI.create(path));
} else {
diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/Ontology.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/Ontology.java
new file mode 100644
index 0000000..35f6b72
--- /dev/null
+++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/Ontology.java
@@ -0,0 +1,237 @@
+package org.monarchinitiative.owlsim.io;
+
+import org.apache.commons.validator.routines.UrlValidator;
+import org.apache.log4j.Logger;
+import org.prefixcommons.CurieUtil;
+import org.semanticweb.owlapi.apibinding.OWLManager;
+import org.semanticweb.owlapi.model.*;
+import org.semanticweb.owlapi.model.parameters.ChangeApplied;
+import uk.ac.manchester.cs.owl.owlapi.concurrent.Concurrency;
+
+import java.io.*;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Collection;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * OWL API wrapper to facilitate building OWLOntology objects to load into the {@link org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl}
+ *
+ * @author Jules Jacobsen
+ */
+public class Ontology {
+
+ private static final Logger logger = Logger.getLogger(Ontology.class);
+
+ //OWLOntology is a mutable object
+ private final OntologySourceData sourceData;
+ private final CurieUtil curieUtil;
+ private final OWLOntology owlOntology;
+
+ private final OWLOntologyManager ontologyManager;
+
+ private Ontology(OntologySourceData sourceData, Concurrency concurrency) {
+ Objects.requireNonNull(sourceData, "Unable to create Ontology without data sources.");
+ this.sourceData = sourceData;
+ this.curieUtil = new CurieUtil(sourceData.getCuries());
+ this.ontologyManager = createOntologyManager(concurrency);
+ this.owlOntology = createEmptyOntology(ontologyManager);
+ loadOwlOntology();
+ }
+
+ /**
+ * Loads an ontology using a concurrent OWLOntologyManager.
+ *
+ * @param sourceData
+ * @return An Ontology created from the source data provided.
+ */
+ public static Ontology load(OntologySourceData sourceData) {
+ return new Ontology(sourceData, Concurrency.CONCURRENT);
+ }
+
+ /**
+ * Loads an ontology using an OWLOntologyManager using the concurrency type specified.
+ *
+ * @param sourceData
+ * @param concurrency
+ * @return An Ontology created from the source data provided.
+ */
+ public static Ontology load(OntologySourceData sourceData, Concurrency concurrency) {
+ return new Ontology(sourceData, useConcurrentIfNull(concurrency));
+ }
+
+ private static Concurrency useConcurrentIfNull(Concurrency concurrency) {
+ return concurrency == null ? Concurrency.CONCURRENT : concurrency;
+ }
+
+ public OWLOntology getOwlOntology() {
+ return owlOntology;
+ }
+
+ public OntologySourceData getSourceData() {
+ return sourceData;
+ }
+
+ public CurieUtil getCurieUtil() {
+ return curieUtil;
+ }
+
+ private void loadOwlOntology() {
+ //Order matters here - don't change it.
+ mergeOntologies(sourceData.getOntologies());
+ mergeOntologies(sourceData.getDataOntologies());
+ loadDataFromTsv(sourceData.getDataTsvs());
+ loadDataFromPairwiseMappings(sourceData.getPairwiseMappings());
+ logger.info("Ontology loaded");
+ }
+
+ private OWLOntologyManager createOntologyManager(Concurrency concurrencyType) {
+ if (concurrencyType == Concurrency.NON_CONCURRENT) {
+ logger.info("Using non-concurrent OWL ontology manager");
+ return OWLManager.createOWLOntologyManager();
+ }
+ logger.info("Using concurrent OWL ontology manager");
+ return OWLManager.createConcurrentOWLOntologyManager();
+ }
+
+ private OWLOntology createEmptyOntology(OWLOntologyManager ontologyManager) {
+ try {
+ return ontologyManager.createOntology();
+ } catch (OWLOntologyCreationException e) {
+ throw new OntologyLoadException(e);
+ }
+ }
+
+ private OWLOntology mergeOntology(String uri) {
+ OWLOntology loadedOntology = loadOwlOntology(uri);
+ addAxioms(loadedOntology.getAxioms());
+ return owlOntology;
+ }
+
+ private OWLOntology mergeOntologies(Collection uris) {
+ uris.forEach(uri -> mergeOntology(uri));
+ return owlOntology;
+ }
+
+ private ChangeApplied addAxiom(OWLAxiom axiom) {
+ return ontologyManager.addAxiom(owlOntology, axiom);
+ }
+
+ private ChangeApplied addAxioms(Set axioms) {
+ return ontologyManager.addAxioms(owlOntology, axioms);
+ }
+
+ private OWLOntology loadOwlOntology(String uri) {
+ UrlValidator urlValidator = UrlValidator.getInstance();
+ if (urlValidator.isValid(uri)) {
+ return loadRemoteOntology(IRI.create(uri));
+ } else if (uri.endsWith(".gz")) {
+ return loadGzippedOntology(Paths.get(uri));
+ } else {
+ return loadOwlOntologyFromDocument(Paths.get(uri));
+ }
+ }
+
+ private OWLOntology loadRemoteOntology(IRI iri) {
+ return loadOwlOntology(iri);
+ }
+
+ private OWLOntology loadGzippedOntology(Path path) {
+ logger.info("Loading gzipped ontology from " + path);
+ try (InputStream is = new GZIPInputStream(new FileInputStream(path.toFile()))) {
+ return loadOwlOntologyFromDocument(is);
+ } catch (IOException e) {
+ throw new OntologyLoadException(e);
+ }
+ }
+
+ private OWLOntology loadOwlOntology(IRI iri) {
+ try {
+ logger.info("Loading ontology from IRI" + iri.getShortForm());
+ return ontologyManager.loadOntology(iri);
+ } catch (OWLOntologyCreationException e) {
+ throw new OntologyLoadException(e);
+ }
+ }
+
+ private OWLOntology loadDataFromTsv(Collection paths) {
+ paths.forEach(this::loadDataFromTsv);
+ return owlOntology;
+ }
+
+ private OWLOntology loadDataFromTsv(String path) {
+ if (path.endsWith(".gz")) {
+ return loadDataFromTsvGzip(path);
+ }
+ Path file = Paths.get(path);
+ logger.info("Reading tsv data from " + path);
+ try {
+ Files.lines(file).forEach(line -> loadLineIntoDataOntology(line));
+ } catch (IOException e) {
+ throw new OntologyLoadException(e);
+ }
+ return owlOntology;
+ }
+
+ private OWLOntology loadDataFromTsvGzip(String path) {
+ Path file = Paths.get(path);
+ logger.info("Reading gzipped tsv data from " + file);
+ try (GZIPInputStream gis = new GZIPInputStream(new FileInputStream(file.toFile()));
+ BufferedReader bf = new BufferedReader(new InputStreamReader(gis, Charset.forName("UTF-8")))
+ ) {
+ bf.lines().forEach(line -> loadLineIntoDataOntology(line));
+ } catch (IOException e) {
+ throw new OntologyLoadException(e);
+ }
+ return owlOntology;
+ }
+
+ private void loadDataFromPairwiseMappings(Map pairwiseMappings) {
+ pairwiseMappings.forEach(this::addInstanceOf);
+ }
+
+ private void loadLineIntoDataOntology(String line) {
+ String[] vals = line.split("\t", 2);
+ String[] terms = vals[1].split(";");
+ for (String t : terms) {
+ addInstanceOf(vals[0], t);
+ }
+ }
+
+ private void addInstanceOf(String individual, String ontologyClass) {
+// logger.info("Adding axiom " + individual + ": " + ontologyClass);
+ OWLDataFactory owlDataFactory = ontologyManager.getOWLDataFactory();
+ OWLClass owlClass = owlDataFactory.getOWLClass(toIri(ontologyClass));
+ OWLNamedIndividual owlNamedIndividual = owlDataFactory.getOWLNamedIndividual(toIri(individual));
+ OWLClassAssertionAxiom axiom = owlDataFactory.getOWLClassAssertionAxiom(owlClass, owlNamedIndividual);
+ addAxiom(axiom);
+ }
+
+ private IRI toIri(String id) {
+ return IRI.create(curieUtil.getIri(id).orElse(id));
+ }
+
+ private OWLOntology loadOwlOntologyFromDocument(Path path) {
+ try {
+ logger.info("Loading ontology from document " + path);
+ return ontologyManager.loadOntologyFromOntologyDocument(path.toFile());
+ } catch (OWLOntologyCreationException e) {
+ throw new OntologyLoadException(e);
+ }
+ }
+
+ private OWLOntology loadOwlOntologyFromDocument(InputStream is) {
+ try {
+ return ontologyManager.loadOntologyFromOntologyDocument(is);
+ } catch (OWLOntologyCreationException e) {
+ logger.error("Unable to create ontology" + e);
+ throw new OntologyLoadException(e);
+ }
+ }
+}
+
diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologyLoadException.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologyLoadException.java
new file mode 100644
index 0000000..54751a7
--- /dev/null
+++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologyLoadException.java
@@ -0,0 +1,14 @@
+package org.monarchinitiative.owlsim.io;
+
+/**
+ * @author Jules Jacobsen
+ */
+class OntologyLoadException extends RuntimeException {
+
+ OntologyLoadException(String message) {
+ super(message);
+ }
+
+ OntologyLoadException(Exception e) {
+ }
+}
diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologySourceData.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologySourceData.java
new file mode 100644
index 0000000..8e47937
--- /dev/null
+++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologySourceData.java
@@ -0,0 +1,193 @@
+package org.monarchinitiative.owlsim.io;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.monarchinitiative.owlsim.kb.BMKnowledgeBase;
+import org.semanticweb.owlapi.model.OWLOntology;
+
+import java.io.File;
+import java.util.*;
+
+/**
+ * Simple container for storing the original data sources used for constructing the {@link OWLOntology} and the
+ * {@link BMKnowledgeBase}.
+ *
+ * @author Jules Jacobsen
+ */
+public class OntologySourceData {
+
+ private final List ontologies;
+ private final List dataOntologies;
+
+ private final Map curies;
+ private final List dataTsvs;
+ //TODO: add these so people can programmatically add individual assertions
+ private final Map pairwiseMappings;
+
+ private OntologySourceData(Builder builder) {
+ this.ontologies = distinctImmutableListOf(builder.ontologies);
+ this.dataOntologies = distinctImmutableListOf(builder.dataOntologies);
+ this.curies = ImmutableMap.copyOf(builder.curies);
+ this.dataTsvs = distinctImmutableListOf(builder.dataTsvs);
+ this.pairwiseMappings = ImmutableMap.copyOf(builder.pairwiseMappings);
+ }
+
+ private ImmutableList distinctImmutableListOf(List list) {
+ return list.stream().distinct().collect(ImmutableList.toImmutableList());
+ }
+
+ public List getOntologies() {
+ return ontologies;
+ }
+
+ public List getDataOntologies() {
+ return dataOntologies;
+ }
+
+ public Map getCuries() {
+ return curies;
+ }
+
+ public List getDataTsvs() {
+ return dataTsvs;
+ }
+
+ public Map getPairwiseMappings() {
+ return pairwiseMappings;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ OntologySourceData that = (OntologySourceData) o;
+ return Objects.equals(ontologies, that.ontologies) &&
+ Objects.equals(dataOntologies, that.dataOntologies) &&
+ Objects.equals(curies, that.curies) &&
+ Objects.equals(dataTsvs, that.dataTsvs) &&
+ Objects.equals(pairwiseMappings, that.pairwiseMappings);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(ontologies, dataOntologies, curies, dataTsvs, pairwiseMappings);
+ }
+
+ @Override
+ public String toString() {
+ return "OntologySourceData{" +
+ "ontologies=" + ontologies +
+ ", dataOntologies=" + dataOntologies +
+ ", curies=" + curies +
+ ", dataTsvs=" + dataTsvs +
+ ", pairwiseMappings=" + pairwiseMappings +
+ '}';
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static class Builder {
+ private List ontologies = new ArrayList<>();
+ private List dataOntologies = new ArrayList<>();
+ //Curies need to be supplied if people are adding data using TSV files or pairwise mappings using curies.
+ private Map curies = Collections.emptyMap();
+ private List dataTsvs = new ArrayList<>();
+ private Map pairwiseMappings = Collections.emptyMap();
+
+ private Builder(){
+ //use the static method.
+ }
+
+ /**
+ * @param curies
+ * @return
+ */
+ public Builder curies(Map curies) {
+ this.curies = curies;
+ return this;
+ }
+
+ /**
+ * Loads an OWL/OBO ontology from a file.
+ *
+ * @param file
+ */
+ public Builder ontology(File file) {
+ ontologies.add(file.getAbsolutePath());
+ return this;
+ }
+
+ /**
+ * Loads an OWL/OBO ontology from a path.
+ *
+ * @param path
+ */
+ public Builder ontology(String path) {
+ this.ontologies.add(path);
+ return this;
+ }
+
+ public Builder ontologies(String... paths) {
+ this.ontologies.addAll(Arrays.asList(paths));
+ return this;
+ }
+
+ /**
+ * Loads, and merges the OWL/OBO ontologies from the paths given. These can be remote, local uncompressed or
+ * gzipped.
+ *
+ * @param paths
+ */
+ public Builder ontologies(Collection paths) {
+ this.ontologies.addAll(paths);
+ return this;
+ }
+
+
+ public Builder dataOntology(String path) {
+ this.dataOntologies.add(path);
+ return this;
+ }
+
+ public Builder dataOntologies(String... paths) {
+ this.dataOntologies.addAll(Arrays.asList(paths));
+ return this;
+ }
+
+ public Builder dataOntologies(Collection paths) {
+ this.dataOntologies.addAll(paths);
+ return this;
+ }
+
+ public Builder dataTsv(String path) {
+ dataTsvs.add(path);
+ return this;
+ }
+
+ public Builder dataTsv(String... paths) {
+ dataTsvs.addAll(Arrays.asList(paths));
+ return this;
+ }
+
+ public Builder dataTsv(Collection paths) {
+ dataTsvs.addAll(paths);
+ return this;
+ }
+
+ public OntologySourceData build() {
+ if(ontologies.isEmpty()) {
+ throw new OntologyLoadException("No ontology defined.");
+ }
+ if (curies.isEmpty() && hasNonOntologyData()) {
+ throw new OntologyLoadException("Cannot load TSV data sources or pairwise mappings when curies have not been defined.");
+ }
+ return new OntologySourceData(this);
+ }
+
+ private boolean hasNonOntologyData() {
+ return !dataTsvs.isEmpty() || !pairwiseMappings.isEmpty();
+ }
+ }
+}
diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OwlKnowledgeBase.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OwlKnowledgeBase.java
new file mode 100644
index 0000000..cf206de
--- /dev/null
+++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OwlKnowledgeBase.java
@@ -0,0 +1,163 @@
+package org.monarchinitiative.owlsim.io;
+
+import org.apache.log4j.Logger;
+import org.monarchinitiative.owlsim.kb.BMKnowledgeBase;
+import org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl;
+import org.semanticweb.elk.owlapi.ElkReasonerFactory;
+import org.semanticweb.owlapi.model.OWLOntologyManager;
+import org.semanticweb.owlapi.reasoner.OWLReasonerFactory;
+import uk.ac.manchester.cs.owl.owlapi.concurrent.Concurrency;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * A convenience wrapper to enable easy loading of a {@link BMKnowledgeBase} from OWL ontologies and data files.
+ *
+ * @author Jules Jacobsen
+ */
+public final class OwlKnowledgeBase {
+
+ private static final Logger logger = Logger.getLogger(OwlKnowledgeBase.class);
+
+ private OwlKnowledgeBase() {
+ //class is uninstantiable
+ }
+
+ public static Loader loader() {
+ return new Loader();
+ }
+
+ public static class Loader {
+
+ private Concurrency concurrencyType = Concurrency.CONCURRENT;
+ private OWLReasonerFactory owlReasonerFactory = new ElkReasonerFactory();
+
+ //TODO: do we want to expose this or keep it here? Chris mentioned we might want a way of keeping track of what the original source data was.
+ //So this is where it is. It's so tightly coupled this is literally a conjoined twin at the moment.
+ private OntologySourceData.Builder sourceDataBuilder = OntologySourceData.builder();
+
+ private Loader() {
+ //uses the static load() method in the parent class
+ }
+
+ /**
+ * @param curies
+ * @return
+ */
+ public Loader loadCuries(Map curies) {
+ sourceDataBuilder.curies(curies);
+ return this;
+ }
+
+ /**
+ * Loads an OWL/OBO ontology from a file.
+ *
+ * @param file
+ */
+ public Loader loadOntology(File file) {
+ Math.random();
+ sourceDataBuilder.ontology(file);
+ return this;
+ }
+
+ /**
+ * Loads an OWL/OBO ontology from a path.
+ *
+ * @param path
+ */
+ public Loader loadOntology(String path) {
+ sourceDataBuilder.ontology(path);
+ return this;
+ }
+
+ public Loader loadOntologies(String... paths) {
+ sourceDataBuilder.ontologies(paths);
+ return this;
+ }
+
+ /**
+ * Loads, and merges the OWL/OBO ontologies from the paths given. These can be remote, local uncompressed or
+ * gzipped.
+ *
+ * @param paths
+ */
+ public Loader loadOntologies(Collection paths) {
+ sourceDataBuilder.ontologies(paths);
+ return this;
+ }
+
+ public Loader loadDataFromOntology(String path) {
+ sourceDataBuilder.dataOntology(path);
+ return this;
+ }
+
+ public Loader loadDataFromOntologies(String... paths) {
+ sourceDataBuilder.dataOntologies(paths);
+ return this;
+ }
+
+ public Loader loadDataFromOntologies(Collection paths) {
+ sourceDataBuilder.dataOntologies(paths);
+ return this;
+ }
+
+ public Loader loadDataFromTsv(String path) {
+ sourceDataBuilder.dataTsv(path);
+ return this;
+ }
+
+ public Loader loadDataFromTsv(String... paths) {
+ sourceDataBuilder.dataTsv(paths);
+ return this;
+ }
+
+ public Loader loadDataFromTsv(Collection paths) {
+ sourceDataBuilder.dataTsv(paths);
+ return this;
+ }
+
+ /**
+ * Creates an {@link OWLOntologyManager} that is configured with the standard parsers and storers and provides
+ * locking for concurrent access (default).
+ */
+ public Loader useConcurrentOntologyManager() {
+ concurrencyType = Concurrency.CONCURRENT;
+ return this;
+ }
+
+ /**
+ * Creates an {@link OWLOntologyManager} that is configured with standard parsers,
+ * storers etc.
+ */
+ public Loader useStandardOntologyManager() {
+ concurrencyType = Concurrency.NON_CONCURRENT;
+ return this;
+ }
+
+ /**
+ * Allows overriding of the default {@link ElkReasonerFactory}
+ *
+ * @param owlReasonerFactory a concrete implementation of the {@link OWLReasonerFactory}
+ */
+ public Loader useReasonerFactory(OWLReasonerFactory owlReasonerFactory) {
+ this.owlReasonerFactory = owlReasonerFactory;
+ return this;
+ }
+
+ /**
+ * @return handle for a Bitmap-based Knowledge Base
+ */
+ public BMKnowledgeBase createKnowledgeBase() {
+
+ OntologySourceData sourceData = sourceDataBuilder.build();
+ Ontology ontology = Ontology.load(sourceData, concurrencyType);
+
+ return BMKnowledgeBaseOWLAPIImpl.create(ontology, owlReasonerFactory);
+ }
+
+ }
+
+
+}
diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/filter/FilterEngine.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/filter/FilterEngine.java
index a13ea08..1bd5418 100644
--- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/filter/FilterEngine.java
+++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/filter/FilterEngine.java
@@ -1,13 +1,12 @@
package org.monarchinitiative.owlsim.kb.filter;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-
+import com.googlecode.javaewah.EWAHCompressedBitmap;
import org.apache.log4j.Logger;
import org.monarchinitiative.owlsim.kb.BMKnowledgeBase;
-import com.googlecode.javaewah.EWAHCompressedBitmap;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
/**
* Applies a {@link Filter} on a set of individuals.
@@ -92,7 +91,7 @@ else if (filter instanceof TypeFilter) {
else {
typesBM = knowledgeBase.getTypesBM(id);
}
- LOG.info("typeId = " + tf.getTypeId());
+// LOG.info("typeId = " + tf.getTypeId());
int ix = knowledgeBase.getClassIndex(tf.getTypeId());
return typesBM.getPositions().contains(ix) ^ tf.isNegated();
}
diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java
index aa471b5..7823293 100644
--- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java
+++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java
@@ -1,17 +1,13 @@
package org.monarchinitiative.owlsim.kb.impl;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.*;
import java.util.stream.Collectors;
import org.apache.log4j.Logger;
import org.monarchinitiative.owlsim.io.OWLLoader;
+import org.monarchinitiative.owlsim.io.Ontology;
import org.monarchinitiative.owlsim.kb.BMKnowledgeBase;
import org.monarchinitiative.owlsim.kb.CURIEMapper;
import org.monarchinitiative.owlsim.kb.LabelMapper;
@@ -51,7 +47,6 @@
import org.semanticweb.owlapi.reasoner.OWLReasoner;
import org.semanticweb.owlapi.reasoner.OWLReasonerFactory;
-import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.googlecode.javaewah.EWAHCompressedBitmap;
import com.hp.hpl.jena.query.Query;
@@ -80,460 +75,437 @@
*/
public class BMKnowledgeBaseOWLAPIImpl implements BMKnowledgeBase {
- private Logger LOG = Logger.getLogger(BMKnowledgeBaseOWLAPIImpl.class);
+ private Logger LOG = Logger.getLogger(BMKnowledgeBaseOWLAPIImpl.class);
+
+ private KBMetadata kbMetdata;
+
+ private EWAHKnowledgeBaseStore ontoEWAHStore;
+ private OWLOntology owlOntology;
+ private OWLOntology owlDataOntology;
+ private OWLReasoner owlReasoner;
+
+ private Map, Integer> classNodeToIntegerMap;
+ private Node[] classNodeArray;
+ private Map, Integer> individualNodeToIntegerMap;
+ private Node[] individualNodeArray;
+
+ private Set> classNodes;
+ private Set> individualNodes;
+
+ private Map> classToNodeMap;
+ private Map> individualToNodeMap;
+ // private Set classesInSignature;
+ private Set individualsInSignature;
+ private Map>> propertyValueMapMap;
+ private Map> opposingClassMap = new HashMap<>();
+
+ private Map> individualToWeightedDirectTypeMap = new HashMap<>();
+
+ private int[] individualCountPerClassArray;
+
+ private CURIEMapper curieMapper;
+ private LabelMapper labelMapper;
+ private CurieUtil curieUtil;
+
+ /**
+ * @deprecated - use the create() constructor method with the single owlOntology - this should contain all the required axioms.
+ * @param owlOntology
+ * @param owlDataOntology
+ * TODO - fix this
+ * @param reasonerFactory
+ */
+ public BMKnowledgeBaseOWLAPIImpl(OWLOntology owlOntology, OWLOntology owlDataOntology, OWLReasonerFactory reasonerFactory, CurieUtil curieUtil) {
+ Objects.requireNonNull(owlOntology, "OWLOntology required - cannot be null.");
+ Objects.requireNonNull(reasonerFactory, "OWLReasonerFactory required, cannot be null.");
+ Objects.requireNonNull(curieUtil, "CurieUtil required - cannot be null");
+
+ Instant start = Instant.now();
+ curieMapper = new CURIEMapperImpl();
+ labelMapper = new LabelMapperImpl(curieMapper);
+
+ this.owlOntology = owlOntology;
+ this.owlDataOntology = owlDataOntology;
+ if (owlDataOntology != null) {
+ translateFromDataOntology();
+ }
+ LOG.info("Creating OWL reasoner");
+ this.owlReasoner = reasonerFactory.createReasoner(owlOntology);
+ this.curieUtil = curieUtil;
+ createMap();
+ ontoEWAHStore = new EWAHKnowledgeBaseStore(classNodes.size(), individualNodes.size());
+ storeInferences();
+ populateLabelsFromOntology(labelMapper, owlOntology);
+ if (owlDataOntology != null) {
+ LOG.info("Fetching labels from " + owlDataOntology);
+ // the data ontology may contain labels of data items
+ populateLabelsFromOntology(labelMapper, owlDataOntology);
+ }
+ Instant end = Instant.now();
+ LOG.info("Knowledgebase loaded in " + Duration.between(start, end).toMillis() + " ms");
+ }
+
+ public static BMKnowledgeBase create(Ontology ontology, OWLReasonerFactory owlReasonerFactory) {
+ return new BMKnowledgeBaseOWLAPIImpl(ontology.getOwlOntology(), null, owlReasonerFactory, ontology.getCurieUtil());
+ }
+
+ public static BMKnowledgeBase create(OWLOntology owlOntology, OWLReasonerFactory rf, CurieUtil curieUtil) {
+ return new BMKnowledgeBaseOWLAPIImpl(owlOntology, null, rf, curieUtil);
+ }
+
+ /**
+ * @deprecated
+ * @param owlOntology
+ * @param owlDataOntology
+ * @param rf
+ * @return
+ */
+ public static BMKnowledgeBase create(OWLOntology owlOntology, OWLOntology owlDataOntology, OWLReasonerFactory rf, CurieUtil curieUtil) {
+ return new BMKnowledgeBaseOWLAPIImpl(owlOntology, owlDataOntology, rf, curieUtil);
+ }
+
+ public KBMetadata getKbMetdata() {
+ return kbMetdata;
+ }
+
+ public void setKbMetdata(KBMetadata kbMetdata) {
+ this.kbMetdata = kbMetdata;
+ }
+
+ private String getShortForm(IRI iri) {
+ if (curieUtil.getCurieMap().isEmpty()) {
+ return iri.toString();
+ } else {
+ String iriString = iri.toString();
+ return curieUtil.getCurie(iriString).orElse(iriString);
+ }
+ }
+
+ private void populateLabelsFromOntology(LabelMapper labelMapper, OWLOntology ontology) {
+ LOG.info("Populating labels from " + ontology);
+ int n = 0;
+ for (OWLAnnotationAssertionAxiom aaa : ontology.getAxioms(AxiomType.ANNOTATION_ASSERTION)) {
+ if (aaa.getProperty().isLabel()) {
+ if (aaa.getSubject() instanceof IRI && aaa.getValue() instanceof OWLLiteral) {
+ labelMapper.add(getShortForm((IRI) aaa.getSubject()), ((OWLLiteral) aaa.getValue()).getLiteral());
+ n++;
+ }
+ }
+ }
+ if (n == 0) {
+ LOG.info("Setting labels from fragments");
+ Set objs = new HashSet<>();
+ objs.addAll(ontology.getClassesInSignature());
+ objs.addAll(ontology.getIndividualsInSignature());
+ for (OWLNamedObject obj : objs) {
+ labelMapper.add(getShortForm(obj.getIRI()), obj.getIRI().getFragment());
+ n++;
+ }
+ }
+ LOG.info("Label axioms mapped: " + n);
+ }
+
+ /**
+ * @return utility object to map labels to ids
+ */
+ public LabelMapper getLabelMapper() {
+ return labelMapper;
+ }
+
+ /**
+ * @return set of all classes
+ */
+ public Set getClassesInSignature() {
+ return classToNodeMap.keySet(); // TODO - consider optimizing
+ }
+
+ /**
+ * @return set of all class identifiers
+ */
+ public Set getClassIdsInSignature() {
+ Set ids = new HashSet<>();
+ for (OWLClass i : getClassesInSignature()) {
+ ids.add(getShortForm(i.getIRI()));
+ }
+ return ids;
+ }
+
+ public Set getClassIdsByOntology(String ont) {
+ return getClassIdsInSignature().stream().filter(x -> isIn(x, ont)).collect(Collectors.toSet());
+ }
+
+ /**
+ * @param id
+ * @param ont
+ * @return true if id is in ontology
+ */
+ private boolean isIn(String id, String ont) {
+ // TODO - use curie util
+ return id.startsWith(ont + ":") || id.contains("/" + ont + "_");
+ }
+
+ public int getNumClassNodes() {
+ return classNodeArray.length;
+ }
+
+ /**
+ * @return set of all individual identifiers
+ */
+ protected Set getIndividualsInSignature() {
+ return individualsInSignature;
+ }
+
+ /**
+ * @return ids
+ */
+ public Set getIndividualIdsInSignature() {
+ Set ids = new HashSet<>();
+ for (OWLNamedIndividual i : getIndividualsInSignature()) {
+ ids.add(getShortForm(i.getIRI()));
+ }
+ return ids;
+ }
+
+ /**
+ * @return OWLAPI representation of the ontology
+ */
+ protected OWLOntology getOwlOntology() {
+ return owlOntology;
+ }
+
+ // Assumption: data ontology includes ObjectPropertyAssertions
+ // TODO: make flexible
+ // TODO: extract associations
+ private void translateFromDataOntology() {
+ // TODO: allow other axiom types
+ for (OWLObjectPropertyAssertionAxiom opa : owlDataOntology.getAxioms(AxiomType.OBJECT_PROPERTY_ASSERTION)) {
+ OWLIndividual obj = opa.getObject();
+ if (obj instanceof OWLNamedIndividual) {
+ OWLClass type = getOWLDataFactory().getOWLClass(((OWLNamedIndividual) obj).getIRI());
+ OWLClassAssertionAxiom ca = getOWLDataFactory().getOWLClassAssertionAxiom(type, opa.getSubject());
+ owlOntology.getOWLOntologyManager().addAxiom(owlOntology, ca);
+ }
+ }
+ }
+
+ // Each OWLClass and OWLIndividual is mapped to an Integer index
+ private void createMap() {
+ LOG.info("Creating mapping from ontology objects to integers");
+ classNodes = new HashSet<>();
+ individualNodes = new HashSet<>();
+ Set classesInSignature;
+ classesInSignature = owlOntology.getClassesInSignature(true);
+ LOG.info("|classes|=" + classesInSignature.size());
+ classesInSignature.add(getOWLThing());
+ classesInSignature.remove(getOWLNothing());
+ individualsInSignature = owlOntology.getIndividualsInSignature(true);
+ LOG.info("|individuals|=" + individualsInSignature.size());
+ classToNodeMap = new HashMap<>();
+ individualToNodeMap = new HashMap<>();
+ classNodeToIntegerMap = new HashMap<>();
+ individualNodeToIntegerMap = new HashMap<>();
+ propertyValueMapMap = new HashMap<>();
+ final HashMap, Integer> classNodeToFrequencyMap = new HashMap<>();
+ final HashMap, Double> classNodeToFreqDepthMap = new HashMap<>();
+ for (OWLClass c : classesInSignature) {
+ if (owlReasoner.getInstances(c, false).isEmpty()) {
+ // TODO: deal with subclasses
+ // LOG.info("Skipping non-instantiated class: "+c);
+ // continue;
+ }
+ Node node = owlReasoner.getEquivalentClasses(c);
+ if (node.contains(getOWLNothing())) {
+ LOG.warn("Ignoring unsatisfiable class: " + c);
+ continue;
+ }
+ classNodes.add(node);
+ classToNodeMap.put(c, node);
+ int numAncNodes = owlReasoner.getSuperClasses(c, false).getNodes().size();
+ int freq = owlReasoner.getInstances(c, false).getNodes().size();
+ classNodeToFrequencyMap.put(node, freq);
+
+ // freq depth is inversely correlated informativeness;
+ // frequency is primary measure (high freq = low informativeness);
+ // if frequency is tied, then tie is broken by number of ancestors
+ // (high ancestors = high informativeness)
+ // note that if frequency is not tied, then depth/ancestors should make
+ // no overall difference - we ensure this by taking the proportion of
+ // ancestor nodes divided by number of classes (there are always equal
+ // or more classes than nodes)
+ double freqDepth = freq + 1 - (numAncNodes / (double) classesInSignature.size());
+ // LOG.info("freqDepth = "+freq+" "+freqDepth);
+ classNodeToFreqDepthMap.put(node, freqDepth);
+ }
+
+ for (OWLNamedIndividual i : individualsInSignature) {
+ Node node = owlReasoner.getSameIndividuals(i);
+ individualNodes.add(node);
+ individualToNodeMap.put(i, node);
+ setPropertyValues(owlOntology, i);
+ if (owlDataOntology != null)
+ setPropertyValues(owlDataOntology, i);
+ }
+
+ // Order class nodes such that LOW frequencies (HIGH Information Content)
+ // nodes are have LOWER indices
+ // TODO: use depth as a tie breaker
+ List> classNodesSorted = new ArrayList<>(classNodes);
+ classNodesSorted.sort((n1, n2) -> {
+ double f1 = classNodeToFreqDepthMap.get(n1);
+ double f2 = classNodeToFreqDepthMap.get(n2);
+ if (f1 < f2)
+ return -1;
+ if (f1 > f2)
+ return 1;
+ return 0;
+ });
+ int numClassNodes = classNodesSorted.size();
+ classNodeArray = classNodesSorted.toArray(new Node[numClassNodes]);
+ individualCountPerClassArray = new int[numClassNodes];
+ for (int i = 0; i < numClassNodes; i++) {
+ classNodeToIntegerMap.put(classNodeArray[i], i);
+ // LOG.info(classNodeArray[i] + " ix="+i + "
+ // FREQ="+classNodeToFrequencyMap.get(classNodeArray[i]));
+ // LOG.info(classNodeArray[i] + " ix="+i + "
+ // IX_REV="+classNodeToIntegerMap.get(classNodeArray[i]));
+ individualCountPerClassArray[i] = classNodeToFrequencyMap.get(classNodeArray[i]);
+ }
+ individualNodeArray = individualNodes.toArray(new Node[individualNodes.size()]);
+ for (int i = 0; i < individualNodes.size(); i++) {
+ individualNodeToIntegerMap.put(individualNodeArray[i], i);
+ }
+
+ }
+
+ private void setPropertyValues(OWLOntology ont, OWLNamedIndividual i) {
+ Preconditions.checkNotNull(i);
+ Map> pvm = new HashMap<>();
+ String id = getShortForm(i.getIRI());
+ propertyValueMapMap.put(id, pvm);
+ for (OWLIndividualAxiom ax : ont.getAxioms(i)) {
+ if (ax instanceof OWLPropertyAssertionAxiom) {
+ OWLPropertyAssertionAxiom paa = (OWLPropertyAssertionAxiom) ax;
+ OWLPropertyExpression p = paa.getProperty();
+ if (p instanceof OWLObjectProperty || p instanceof OWLDataProperty) {
+ String pid;
+ if (p instanceof OWLObjectProperty)
+ pid = getShortForm(((OWLObjectProperty) p).getIRI());
+ else
+ pid = getShortForm(((OWLDataProperty) p).getIRI());
+ OWLPropertyAssertionObject obj = paa.getObject();
+ if (obj instanceof OWLLiteral) {
+ addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral());
+ } else if (obj instanceof OWLNamedIndividual) {
+ addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI()));
+
+ }
+
+ } else if (false) {
+ String pid = getShortForm(((OWLDataProperty) p).getIRI());
+ OWLLiteral obj = ((OWLDataPropertyAssertionAxiom) paa).getObject();
+ if (obj instanceof OWLLiteral) {
+ addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral());
+ } else if (obj instanceof OWLNamedIndividual) {
+ addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI()));
+
+ }
+
+ }
+ }
+ }
+
+ }
+
+ private void addPropertyValue(Map> pvm, String pid, String v) {
+ // LOG.debug("PV="+pid+"="+v);
+ if (!pvm.containsKey(pid))
+ pvm.put(pid, new HashSet<>());
+ pvm.get(pid).add(v);
+ }
+
+ private void addOpposingClassPair(OWLClass c, OWLClassExpression dc) {
+ addOpposingClassPairAsym(c, dc);
+ if (!dc.isAnonymous())
+ addOpposingClassPairAsym(dc.asOWLClass(), c);
+ }
+
+ private void addOpposingClassPairAsym(OWLClass c, OWLClassExpression d) {
+ if (!opposingClassMap.containsKey(c))
+ opposingClassMap.put(c, new HashSet<>());
+ opposingClassMap.get(c).add(d);
+ }
+
+ private void storeInferences() {
+
+ // Note: if there are any nodes containing >1 class or individual, then
+ // the store method is called redundantly. This is unlikely to affect performance,
+ // and the semantics are unchanged
+ for (OWLClass c : getClassesInSignature()) {
+ int clsIndex = getIndex(c);
+ // LOG.info("Storing inferences for "+c+" --> " + clsIndex);
+ Set sups = getIntegersForClassSet(owlReasoner.getSuperClasses(c, false));
+ sups.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c)));
+
+ Set subs = getIntegersForClassSet(owlReasoner.getSubClasses(c, false));
+ subs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c)));
+
+ ontoEWAHStore.setDirectSuperClasses(clsIndex, getIntegersForClassSet(owlReasoner.getSuperClasses(c, true)));
+ ontoEWAHStore.setSuperClasses(clsIndex, sups);
+ ontoEWAHStore.setDirectSubClasses(clsIndex, getIntegersForClassSet(owlReasoner.getSubClasses(c, true)));
+ ontoEWAHStore.setSubClasses(clsIndex, subs);
+
+ // Find all disjoint pairs plus opposing pairs
+ for (OWLAnnotationAssertionAxiom aaa : owlOntology.getAnnotationAssertionAxioms(c.getIRI())) {
+ // RO_0002604 is-opposite-of. TODO - use a vocabulary object
+ if (aaa.getProperty().getIRI().toString().equals("http://purl.obolibrary.org/obo/RO_0002604")) {
+ OWLAnnotationValue v = aaa.getValue();
+ if (v instanceof IRI) {
+ IRI dciri = (IRI) v;
+ OWLClass dc = owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(dciri);
+ addOpposingClassPair(c, dc);
+
+ }
+ }
+ }
+
+ for (OWLDisjointClassesAxiom dca : owlOntology.getDisjointClassesAxioms(c)) {
+ for (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) {
+ addOpposingClassPair(c, dc);
+ }
+ }
+
+ // direct individuals are those asserted to be of type c or anything equivalent to c
+ Set individualInts = new HashSet<>();
+ for (OWLClass ec : owlReasoner.getEquivalentClasses(c).getEntities()) {
+ for (OWLClassAssertionAxiom ax : owlOntology.getClassAssertionAxioms(ec)) {
+ if (ax.getIndividual().isNamed()) {
+ individualInts.add(getIndex(ax.getIndividual().asOWLNamedIndividual()));
+ }
+ }
+ }
+ ontoEWAHStore.setDirectIndividuals(clsIndex, individualInts);
+
+ }
+
+ // populate frequency-awareness map
+ individualToWeightedDirectTypeMap = new HashMap<>();
+ for (OWLNamedIndividual i : individualsInSignature) {
+ int individualIndex = getIndex(i);
+ // LOG.info("String inferences for "+i+" --> " +individualIndex);
+ ontoEWAHStore.setDirectTypes(individualIndex, getIntegersForClassSet(owlReasoner.getTypes(i, true)));
+ ontoEWAHStore.setTypes(individualIndex, getIntegersForClassSet(owlReasoner.getTypes(i, false)));
- private KBMetadata kbMetdata;
-
- private EWAHKnowledgeBaseStore ontoEWAHStore;
- private OWLOntology owlOntology;
- private OWLOntology owlDataOntology;
- private OWLReasoner owlReasoner;
-
- private Map, Integer> classNodeToIntegerMap;
- private Node[] classNodeArray;
- private Map, Integer> individualNodeToIntegerMap;
- private Node[] individualNodeArray;
-
- private Set> classNodes;
- private Set> individualNodes;
-
- private Map> classToNodeMap;
- private Map> individualToNodeMap;
- // private Set classesInSignature;
- private Set individualsInSignature;
- private Map>> propertyValueMapMap;
- Map> opposingClassMap =
- new HashMap>();
-
- Map> individualToWeightedDirectTypeMap = new HashMap<>();
-
-
- private int[] individualCountPerClassArray;
-
- CURIEMapper curieMapper;
- LabelMapper labelMapper;
- CurieUtil curieUtil;
-
- /**
- * @param owlOntology
- * @param owlDataOntology TODO - fix this
- * @param rf
- */
- public BMKnowledgeBaseOWLAPIImpl(OWLOntology owlOntology,
- OWLOntology owlDataOntology, OWLReasonerFactory rf,
- CurieUtil curieUtil) {
- super();
- curieMapper = new CURIEMapperImpl();
- labelMapper = new LabelMapperImpl(curieMapper);
-
- this.owlOntology = owlOntology;
- this.owlDataOntology = owlDataOntology;
- if (owlDataOntology != null) {
- translateFromDataOntology();
- }
- this.owlReasoner = rf.createReasoner(owlOntology);
- this.curieUtil = curieUtil;
- createMap();
- ontoEWAHStore = new EWAHKnowledgeBaseStore(classNodes.size(), individualNodes.size());
- storeInferences();
- populateLabelsFromOntology(labelMapper, owlOntology);
- if (owlDataOntology != null) {
- LOG.info("Fetching labels from " + owlDataOntology);
- // the data ontology may contain labels of data items
- populateLabelsFromOntology(labelMapper, owlDataOntology);
- }
- }
-
- public static BMKnowledgeBase create(OWLOntology owlOntology, OWLReasonerFactory rf,
- CurieUtil curieUtil) {
- return new BMKnowledgeBaseOWLAPIImpl(owlOntology, null, rf, curieUtil);
- }
-
- /**
- * @param owlOntology
- * @param owlDataOntology
- * @param rf
- * @return
- */
- public static BMKnowledgeBase create(OWLOntology owlOntology, OWLOntology owlDataOntology,
- OWLReasonerFactory rf, CurieUtil curieUtil) {
- return new BMKnowledgeBaseOWLAPIImpl(owlOntology, owlDataOntology, rf, curieUtil);
- }
-
-
-
- public KBMetadata getKbMetdata() {
- return kbMetdata;
- }
-
-
-
- public void setKbMetdata(KBMetadata kbMetdata) {
- this.kbMetdata = kbMetdata;
- }
-
- private String getShortForm(IRI iri) {
- if (curieUtil.getCurieMap().isEmpty()) {
- return iri.toString();
- } else {
- Optional curie = curieUtil.getCurie(iri.toString());
- if (curie.isPresent()) {
- return curie.get();
- }
- else {
- return iri.toString();
- }
- }
- }
-
- private void populateLabelsFromOntology(LabelMapper labelMapper, OWLOntology ontology) {
- LOG.info("Populating labels from " + ontology);
- int n = 0;
- for (OWLAnnotationAssertionAxiom aaa : ontology.getAxioms(AxiomType.ANNOTATION_ASSERTION)) {
- if (aaa.getProperty().isLabel()) {
- if (aaa.getSubject() instanceof IRI && aaa.getValue() instanceof OWLLiteral) {
- labelMapper.add(getShortForm((IRI) aaa.getSubject()),
- ((OWLLiteral) aaa.getValue()).getLiteral());
- n++;
- }
- }
- }
- if (n == 0) {
- LOG.info("Setting labels from fragments");
- Set objs = new HashSet();
- objs.addAll(ontology.getClassesInSignature());
- objs.addAll(ontology.getIndividualsInSignature());
- for (OWLNamedObject obj : objs) {
- labelMapper.add(getShortForm(obj.getIRI()), obj.getIRI().getFragment());
- n++;
- }
- }
- LOG.info("Label axioms mapped: " + n);
- }
-
- /**
- * @return utility object to map labels to ids
- */
- public LabelMapper getLabelMapper() {
- return labelMapper;
- }
-
- /**
- * @return set of all classes
- */
- public Set getClassesInSignature() {
- return classToNodeMap.keySet(); // TODO - consider optimizing
- }
-
- /**
- * @return set of all class identifiers
- */
- public Set getClassIdsInSignature() {
- Set ids = new HashSet();
- for (OWLClass i : getClassesInSignature()) {
- ids.add(getShortForm(i.getIRI()));
- }
- return ids;
- }
-
- public Set getClassIdsByOntology(String ont) {
- return getClassIdsInSignature().stream().filter(x -> isIn(x, ont)).collect(Collectors.toSet());
- }
-
- /**
- * @param id
- * @param ont
- * @return true if id is in ontology
- */
- public boolean isIn(String id, String ont) {
- // TODO - use curie util
- return id.startsWith(ont+":") || id.contains("/"+ont+"_");
- }
-
- public int getNumClassNodes() {
- return classNodeArray.length;
- }
-
-
-
- /**
- * @return set of all individual identifiers
- */
- protected Set getIndividualsInSignature() {
- return individualsInSignature;
- }
-
- /**
- * @return ids
- */
- public Set getIndividualIdsInSignature() {
- Set ids = new HashSet();
- for (OWLNamedIndividual i : getIndividualsInSignature()) {
- ids.add(getShortForm(i.getIRI()));
- }
- return ids;
- }
-
-
-
- /**
- * @return OWLAPI representation of the ontology
- */
- protected OWLOntology getOwlOntology() {
- return owlOntology;
- }
-
- // Assumption: data ontology includes ObjectPropertyAssertions
- // TODO: make flexible
- // TODO: extract associations
- private void translateFromDataOntology() {
- // TODO: allow other axiom types
- for (OWLObjectPropertyAssertionAxiom opa : owlDataOntology
- .getAxioms(AxiomType.OBJECT_PROPERTY_ASSERTION)) {
- OWLIndividual obj = opa.getObject();
- if (obj instanceof OWLNamedIndividual) {
- OWLClass type = getOWLDataFactory().getOWLClass(((OWLNamedIndividual) obj).getIRI());
- OWLClassAssertionAxiom ca =
- getOWLDataFactory().getOWLClassAssertionAxiom(type, opa.getSubject());
- owlOntology.getOWLOntologyManager().addAxiom(owlOntology, ca);
- }
- }
- }
-
-
- // Each OWLClass and OWLIndividual is mapped to an Integer index
- private void createMap() {
- LOG.info("Creating mapping from ontology objects to integers");
- classNodes = new HashSet>();
- individualNodes = new HashSet>();
- Set classesInSignature;
- classesInSignature = owlOntology.getClassesInSignature(true);
- LOG.info("|classes|=" + classesInSignature.size());
- classesInSignature.add(getOWLThing());
- classesInSignature.remove(getOWLNothing());
- individualsInSignature = owlOntology.getIndividualsInSignature(true);
- LOG.info("|individuals|=" + individualsInSignature.size());
- classToNodeMap = new HashMap>();
- individualToNodeMap = new HashMap>();
- classNodeToIntegerMap = new HashMap, Integer>();
- individualNodeToIntegerMap = new HashMap, Integer>();
- propertyValueMapMap = new HashMap>>();
- final HashMap, Integer> classNodeToFrequencyMap =
- new HashMap, Integer>();
- final HashMap, Double> classNodeToFreqDepthMap =
- new HashMap, Double>();
- for (OWLClass c : classesInSignature) {
- if (owlReasoner.getInstances(c, false).isEmpty()) {
- // TODO: deal with subclasses
- // LOG.info("Skipping non-instantiated class: "+c);
- // continue;
- }
- Node node = owlReasoner.getEquivalentClasses(c);
- if (node.contains(getOWLNothing())) {
- LOG.warn("Ignoring unsatisfiable class: " + c);
- continue;
- }
- classNodes.add(node);
- classToNodeMap.put(c, node);
- int numAncNodes = owlReasoner.getSuperClasses(c, false).getNodes().size();
- int freq = owlReasoner.getInstances(c, false).getNodes().size();
- classNodeToFrequencyMap.put(node, freq);
-
- // freq depth is inversely correlated informativeness;
- // frequency is primary measure (high freq = low informativeness);
- // if frequency is tied, then tie is broken by number of ancestors
- // (high ancestors = high informativeness)
- // note that if frequency is not tied, then depth/ancestors should make
- // no overall difference - we ensure this by taking the proportion of
- // ancestor nodes divided by number of classes (there are always equal
- // or more classes than nodes)
- double freqDepth = freq + 1 - (numAncNodes / (double) classesInSignature.size());
- // LOG.info("freqDepth = "+freq+" "+freqDepth);
- classNodeToFreqDepthMap.put(node, freqDepth);
- }
-
- for (OWLNamedIndividual i : individualsInSignature) {
- Node node = owlReasoner.getSameIndividuals(i);
- individualNodes.add(node);
- individualToNodeMap.put(i, node);
- setPropertyValues(owlOntology, i);
- if (owlDataOntology != null)
- setPropertyValues(owlDataOntology, i);
- }
-
- // Order class nodes such that LOW frequencies (HIGH Information Content)
- // nodes are have LOWER indices
- // TODO: use depth as a tie breaker
- List> classNodesSorted = new ArrayList>(classNodes);
- Collections.sort(classNodesSorted, new Comparator>() {
- public int compare(Node n1, Node n2) {
- double f1 = classNodeToFreqDepthMap.get(n1);
- double f2 = classNodeToFreqDepthMap.get(n2);
- if (f1 < f2)
- return -1;
- if (f1 > f2)
- return 1;
- return 0;
- }
- });
- int numClassNodes = classNodesSorted.size();
- classNodeArray = classNodesSorted.toArray(new Node[numClassNodes]);
- individualCountPerClassArray = new int[numClassNodes];
- for (int i = 0; i < numClassNodes; i++) {
- classNodeToIntegerMap.put(classNodeArray[i], i);
- // LOG.info(classNodeArray[i] + " ix="+i + "
- // FREQ="+classNodeToFrequencyMap.get(classNodeArray[i]));
- // LOG.info(classNodeArray[i] + " ix="+i + "
- // IX_REV="+classNodeToIntegerMap.get(classNodeArray[i]));
- individualCountPerClassArray[i] = classNodeToFrequencyMap.get(classNodeArray[i]);
- }
- individualNodeArray = individualNodes.toArray(new Node[individualNodes.size()]);
- for (int i = 0; i < individualNodes.size(); i++) {
- individualNodeToIntegerMap.put(individualNodeArray[i], i);
- }
-
- }
-
-
- private void setPropertyValues(OWLOntology ont, OWLNamedIndividual i) {
- Preconditions.checkNotNull(i);
- Map> pvm = new HashMap>();
- String id = getShortForm(i.getIRI());
- propertyValueMapMap.put(id, pvm);
- for (OWLIndividualAxiom ax : ont.getAxioms(i)) {
- if (ax instanceof OWLPropertyAssertionAxiom) {
- OWLPropertyAssertionAxiom paa = (OWLPropertyAssertionAxiom) ax;
- OWLPropertyExpression p = paa.getProperty();
- if (p instanceof OWLObjectProperty || p instanceof OWLDataProperty) {
- String pid;
- if (p instanceof OWLObjectProperty)
- pid = getShortForm(((OWLObjectProperty) p).getIRI());
- else
- pid = getShortForm(((OWLDataProperty) p).getIRI());
- OWLPropertyAssertionObject obj = paa.getObject();
- if (obj instanceof OWLLiteral) {
- addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral());
- } else if (obj instanceof OWLNamedIndividual) {
- addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI()));
-
- }
-
- } else if (false) {
- String pid = getShortForm(((OWLDataProperty) p).getIRI());
- OWLLiteral obj = ((OWLDataPropertyAssertionAxiom) paa).getObject();
- if (obj instanceof OWLLiteral) {
- addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral());
- } else if (obj instanceof OWLNamedIndividual) {
- addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI()));
-
- }
-
- }
- }
- }
-
- }
-
-
- private void addPropertyValue(Map> pvm, String pid, String v) {
- // LOG.debug("PV="+pid+"="+v);
- if (!pvm.containsKey(pid))
- pvm.put(pid, new HashSet