diff --git a/owlsim-core/pom.xml b/owlsim-core/pom.xml index dab2628..ad5b5ff 100644 --- a/owlsim-core/pom.xml +++ b/owlsim-core/pom.xml @@ -189,7 +189,7 @@ org.prefixcommons curie-util - 0.0.1 + 0.0.2 diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OWLLoader.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OWLLoader.java index 0c3fa36..c213399 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OWLLoader.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OWLLoader.java @@ -1,15 +1,6 @@ package org.monarchinitiative.owlsim.io; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.HashMap; -import java.util.List; -import java.util.zip.GZIPInputStream; - +import com.google.common.base.Preconditions; import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -17,23 +8,22 @@ import org.prefixcommons.CurieUtil; import org.semanticweb.elk.owlapi.ElkReasonerFactory; import org.semanticweb.owlapi.apibinding.OWLManager; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLClassAssertionAxiom; -import org.semanticweb.owlapi.model.OWLDataFactory; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyCreationException; -import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.model.*; import org.semanticweb.owlapi.reasoner.OWLReasoner; import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; -import com.google.common.base.Preconditions; +import java.io.*; +import java.util.HashMap; +import java.util.List; +import java.util.zip.GZIPInputStream; /** * Object for loading OWL ontologies into a {@link BMKnowledgeBase} * * Note that a KB consists of classes and individuals, both of which can be * loaded from an ontology - * + * + * @deprecated - Use the OwlKnowledgeBase.loader() instead. * @author cjm * */ @@ -52,7 +42,7 @@ * @return OWL Ontology * @throws OWLOntologyCreationException */ - public OWLOntology loadOWL(IRI iri) throws OWLOntologyCreationException { + private OWLOntology loadOWL(IRI iri) throws OWLOntologyCreationException { return getOWLOntologyManager().loadOntology(iri); } @@ -61,7 +51,7 @@ public OWLOntology loadOWL(IRI iri) throws OWLOntologyCreationException { * @return OWL Ontology * @throws OWLOntologyCreationException */ - public OWLOntology loadOWL(File file) throws OWLOntologyCreationException { + private OWLOntology loadOWL(File file) throws OWLOntologyCreationException { IRI iri = IRI.create(file); return getOWLOntologyManager().loadOntologyFromOntologyDocument(iri); } @@ -73,7 +63,7 @@ public OWLOntology loadOWL(File file) throws OWLOntologyCreationException { * @return OWL Ontology * @throws OWLOntologyCreationException */ - public OWLOntology loadOWL(String path) throws OWLOntologyCreationException { + private OWLOntology loadOWL(String path) throws OWLOntologyCreationException { if (path.startsWith("http")) { return loadOWL(IRI.create(path)); } else { diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/Ontology.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/Ontology.java new file mode 100644 index 0000000..35f6b72 --- /dev/null +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/Ontology.java @@ -0,0 +1,237 @@ +package org.monarchinitiative.owlsim.io; + +import org.apache.commons.validator.routines.UrlValidator; +import org.apache.log4j.Logger; +import org.prefixcommons.CurieUtil; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.*; +import org.semanticweb.owlapi.model.parameters.ChangeApplied; +import uk.ac.manchester.cs.owl.owlapi.concurrent.Concurrency; + +import java.io.*; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collection; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.zip.GZIPInputStream; + +/** + * OWL API wrapper to facilitate building OWLOntology objects to load into the {@link org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl} + * + * @author Jules Jacobsen + */ +public class Ontology { + + private static final Logger logger = Logger.getLogger(Ontology.class); + + //OWLOntology is a mutable object + private final OntologySourceData sourceData; + private final CurieUtil curieUtil; + private final OWLOntology owlOntology; + + private final OWLOntologyManager ontologyManager; + + private Ontology(OntologySourceData sourceData, Concurrency concurrency) { + Objects.requireNonNull(sourceData, "Unable to create Ontology without data sources."); + this.sourceData = sourceData; + this.curieUtil = new CurieUtil(sourceData.getCuries()); + this.ontologyManager = createOntologyManager(concurrency); + this.owlOntology = createEmptyOntology(ontologyManager); + loadOwlOntology(); + } + + /** + * Loads an ontology using a concurrent OWLOntologyManager. + * + * @param sourceData + * @return An Ontology created from the source data provided. + */ + public static Ontology load(OntologySourceData sourceData) { + return new Ontology(sourceData, Concurrency.CONCURRENT); + } + + /** + * Loads an ontology using an OWLOntologyManager using the concurrency type specified. + * + * @param sourceData + * @param concurrency + * @return An Ontology created from the source data provided. + */ + public static Ontology load(OntologySourceData sourceData, Concurrency concurrency) { + return new Ontology(sourceData, useConcurrentIfNull(concurrency)); + } + + private static Concurrency useConcurrentIfNull(Concurrency concurrency) { + return concurrency == null ? Concurrency.CONCURRENT : concurrency; + } + + public OWLOntology getOwlOntology() { + return owlOntology; + } + + public OntologySourceData getSourceData() { + return sourceData; + } + + public CurieUtil getCurieUtil() { + return curieUtil; + } + + private void loadOwlOntology() { + //Order matters here - don't change it. + mergeOntologies(sourceData.getOntologies()); + mergeOntologies(sourceData.getDataOntologies()); + loadDataFromTsv(sourceData.getDataTsvs()); + loadDataFromPairwiseMappings(sourceData.getPairwiseMappings()); + logger.info("Ontology loaded"); + } + + private OWLOntologyManager createOntologyManager(Concurrency concurrencyType) { + if (concurrencyType == Concurrency.NON_CONCURRENT) { + logger.info("Using non-concurrent OWL ontology manager"); + return OWLManager.createOWLOntologyManager(); + } + logger.info("Using concurrent OWL ontology manager"); + return OWLManager.createConcurrentOWLOntologyManager(); + } + + private OWLOntology createEmptyOntology(OWLOntologyManager ontologyManager) { + try { + return ontologyManager.createOntology(); + } catch (OWLOntologyCreationException e) { + throw new OntologyLoadException(e); + } + } + + private OWLOntology mergeOntology(String uri) { + OWLOntology loadedOntology = loadOwlOntology(uri); + addAxioms(loadedOntology.getAxioms()); + return owlOntology; + } + + private OWLOntology mergeOntologies(Collection uris) { + uris.forEach(uri -> mergeOntology(uri)); + return owlOntology; + } + + private ChangeApplied addAxiom(OWLAxiom axiom) { + return ontologyManager.addAxiom(owlOntology, axiom); + } + + private ChangeApplied addAxioms(Set axioms) { + return ontologyManager.addAxioms(owlOntology, axioms); + } + + private OWLOntology loadOwlOntology(String uri) { + UrlValidator urlValidator = UrlValidator.getInstance(); + if (urlValidator.isValid(uri)) { + return loadRemoteOntology(IRI.create(uri)); + } else if (uri.endsWith(".gz")) { + return loadGzippedOntology(Paths.get(uri)); + } else { + return loadOwlOntologyFromDocument(Paths.get(uri)); + } + } + + private OWLOntology loadRemoteOntology(IRI iri) { + return loadOwlOntology(iri); + } + + private OWLOntology loadGzippedOntology(Path path) { + logger.info("Loading gzipped ontology from " + path); + try (InputStream is = new GZIPInputStream(new FileInputStream(path.toFile()))) { + return loadOwlOntologyFromDocument(is); + } catch (IOException e) { + throw new OntologyLoadException(e); + } + } + + private OWLOntology loadOwlOntology(IRI iri) { + try { + logger.info("Loading ontology from IRI" + iri.getShortForm()); + return ontologyManager.loadOntology(iri); + } catch (OWLOntologyCreationException e) { + throw new OntologyLoadException(e); + } + } + + private OWLOntology loadDataFromTsv(Collection paths) { + paths.forEach(this::loadDataFromTsv); + return owlOntology; + } + + private OWLOntology loadDataFromTsv(String path) { + if (path.endsWith(".gz")) { + return loadDataFromTsvGzip(path); + } + Path file = Paths.get(path); + logger.info("Reading tsv data from " + path); + try { + Files.lines(file).forEach(line -> loadLineIntoDataOntology(line)); + } catch (IOException e) { + throw new OntologyLoadException(e); + } + return owlOntology; + } + + private OWLOntology loadDataFromTsvGzip(String path) { + Path file = Paths.get(path); + logger.info("Reading gzipped tsv data from " + file); + try (GZIPInputStream gis = new GZIPInputStream(new FileInputStream(file.toFile())); + BufferedReader bf = new BufferedReader(new InputStreamReader(gis, Charset.forName("UTF-8"))) + ) { + bf.lines().forEach(line -> loadLineIntoDataOntology(line)); + } catch (IOException e) { + throw new OntologyLoadException(e); + } + return owlOntology; + } + + private void loadDataFromPairwiseMappings(Map pairwiseMappings) { + pairwiseMappings.forEach(this::addInstanceOf); + } + + private void loadLineIntoDataOntology(String line) { + String[] vals = line.split("\t", 2); + String[] terms = vals[1].split(";"); + for (String t : terms) { + addInstanceOf(vals[0], t); + } + } + + private void addInstanceOf(String individual, String ontologyClass) { +// logger.info("Adding axiom " + individual + ": " + ontologyClass); + OWLDataFactory owlDataFactory = ontologyManager.getOWLDataFactory(); + OWLClass owlClass = owlDataFactory.getOWLClass(toIri(ontologyClass)); + OWLNamedIndividual owlNamedIndividual = owlDataFactory.getOWLNamedIndividual(toIri(individual)); + OWLClassAssertionAxiom axiom = owlDataFactory.getOWLClassAssertionAxiom(owlClass, owlNamedIndividual); + addAxiom(axiom); + } + + private IRI toIri(String id) { + return IRI.create(curieUtil.getIri(id).orElse(id)); + } + + private OWLOntology loadOwlOntologyFromDocument(Path path) { + try { + logger.info("Loading ontology from document " + path); + return ontologyManager.loadOntologyFromOntologyDocument(path.toFile()); + } catch (OWLOntologyCreationException e) { + throw new OntologyLoadException(e); + } + } + + private OWLOntology loadOwlOntologyFromDocument(InputStream is) { + try { + return ontologyManager.loadOntologyFromOntologyDocument(is); + } catch (OWLOntologyCreationException e) { + logger.error("Unable to create ontology" + e); + throw new OntologyLoadException(e); + } + } +} + diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologyLoadException.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologyLoadException.java new file mode 100644 index 0000000..54751a7 --- /dev/null +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologyLoadException.java @@ -0,0 +1,14 @@ +package org.monarchinitiative.owlsim.io; + +/** + * @author Jules Jacobsen + */ +class OntologyLoadException extends RuntimeException { + + OntologyLoadException(String message) { + super(message); + } + + OntologyLoadException(Exception e) { + } +} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologySourceData.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologySourceData.java new file mode 100644 index 0000000..8e47937 --- /dev/null +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OntologySourceData.java @@ -0,0 +1,193 @@ +package org.monarchinitiative.owlsim.io; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; +import org.semanticweb.owlapi.model.OWLOntology; + +import java.io.File; +import java.util.*; + +/** + * Simple container for storing the original data sources used for constructing the {@link OWLOntology} and the + * {@link BMKnowledgeBase}. + * + * @author Jules Jacobsen + */ +public class OntologySourceData { + + private final List ontologies; + private final List dataOntologies; + + private final Map curies; + private final List dataTsvs; + //TODO: add these so people can programmatically add individual assertions + private final Map pairwiseMappings; + + private OntologySourceData(Builder builder) { + this.ontologies = distinctImmutableListOf(builder.ontologies); + this.dataOntologies = distinctImmutableListOf(builder.dataOntologies); + this.curies = ImmutableMap.copyOf(builder.curies); + this.dataTsvs = distinctImmutableListOf(builder.dataTsvs); + this.pairwiseMappings = ImmutableMap.copyOf(builder.pairwiseMappings); + } + + private ImmutableList distinctImmutableListOf(List list) { + return list.stream().distinct().collect(ImmutableList.toImmutableList()); + } + + public List getOntologies() { + return ontologies; + } + + public List getDataOntologies() { + return dataOntologies; + } + + public Map getCuries() { + return curies; + } + + public List getDataTsvs() { + return dataTsvs; + } + + public Map getPairwiseMappings() { + return pairwiseMappings; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + OntologySourceData that = (OntologySourceData) o; + return Objects.equals(ontologies, that.ontologies) && + Objects.equals(dataOntologies, that.dataOntologies) && + Objects.equals(curies, that.curies) && + Objects.equals(dataTsvs, that.dataTsvs) && + Objects.equals(pairwiseMappings, that.pairwiseMappings); + } + + @Override + public int hashCode() { + return Objects.hash(ontologies, dataOntologies, curies, dataTsvs, pairwiseMappings); + } + + @Override + public String toString() { + return "OntologySourceData{" + + "ontologies=" + ontologies + + ", dataOntologies=" + dataOntologies + + ", curies=" + curies + + ", dataTsvs=" + dataTsvs + + ", pairwiseMappings=" + pairwiseMappings + + '}'; + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private List ontologies = new ArrayList<>(); + private List dataOntologies = new ArrayList<>(); + //Curies need to be supplied if people are adding data using TSV files or pairwise mappings using curies. + private Map curies = Collections.emptyMap(); + private List dataTsvs = new ArrayList<>(); + private Map pairwiseMappings = Collections.emptyMap(); + + private Builder(){ + //use the static method. + } + + /** + * @param curies + * @return + */ + public Builder curies(Map curies) { + this.curies = curies; + return this; + } + + /** + * Loads an OWL/OBO ontology from a file. + * + * @param file + */ + public Builder ontology(File file) { + ontologies.add(file.getAbsolutePath()); + return this; + } + + /** + * Loads an OWL/OBO ontology from a path. + * + * @param path + */ + public Builder ontology(String path) { + this.ontologies.add(path); + return this; + } + + public Builder ontologies(String... paths) { + this.ontologies.addAll(Arrays.asList(paths)); + return this; + } + + /** + * Loads, and merges the OWL/OBO ontologies from the paths given. These can be remote, local uncompressed or + * gzipped. + * + * @param paths + */ + public Builder ontologies(Collection paths) { + this.ontologies.addAll(paths); + return this; + } + + + public Builder dataOntology(String path) { + this.dataOntologies.add(path); + return this; + } + + public Builder dataOntologies(String... paths) { + this.dataOntologies.addAll(Arrays.asList(paths)); + return this; + } + + public Builder dataOntologies(Collection paths) { + this.dataOntologies.addAll(paths); + return this; + } + + public Builder dataTsv(String path) { + dataTsvs.add(path); + return this; + } + + public Builder dataTsv(String... paths) { + dataTsvs.addAll(Arrays.asList(paths)); + return this; + } + + public Builder dataTsv(Collection paths) { + dataTsvs.addAll(paths); + return this; + } + + public OntologySourceData build() { + if(ontologies.isEmpty()) { + throw new OntologyLoadException("No ontology defined."); + } + if (curies.isEmpty() && hasNonOntologyData()) { + throw new OntologyLoadException("Cannot load TSV data sources or pairwise mappings when curies have not been defined."); + } + return new OntologySourceData(this); + } + + private boolean hasNonOntologyData() { + return !dataTsvs.isEmpty() || !pairwiseMappings.isEmpty(); + } + } +} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OwlKnowledgeBase.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OwlKnowledgeBase.java new file mode 100644 index 0000000..cf206de --- /dev/null +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/io/OwlKnowledgeBase.java @@ -0,0 +1,163 @@ +package org.monarchinitiative.owlsim.io; + +import org.apache.log4j.Logger; +import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; +import org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl; +import org.semanticweb.elk.owlapi.ElkReasonerFactory; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; +import uk.ac.manchester.cs.owl.owlapi.concurrent.Concurrency; + +import java.io.File; +import java.util.Collection; +import java.util.Map; + +/** + * A convenience wrapper to enable easy loading of a {@link BMKnowledgeBase} from OWL ontologies and data files. + * + * @author Jules Jacobsen + */ +public final class OwlKnowledgeBase { + + private static final Logger logger = Logger.getLogger(OwlKnowledgeBase.class); + + private OwlKnowledgeBase() { + //class is uninstantiable + } + + public static Loader loader() { + return new Loader(); + } + + public static class Loader { + + private Concurrency concurrencyType = Concurrency.CONCURRENT; + private OWLReasonerFactory owlReasonerFactory = new ElkReasonerFactory(); + + //TODO: do we want to expose this or keep it here? Chris mentioned we might want a way of keeping track of what the original source data was. + //So this is where it is. It's so tightly coupled this is literally a conjoined twin at the moment. + private OntologySourceData.Builder sourceDataBuilder = OntologySourceData.builder(); + + private Loader() { + //uses the static load() method in the parent class + } + + /** + * @param curies + * @return + */ + public Loader loadCuries(Map curies) { + sourceDataBuilder.curies(curies); + return this; + } + + /** + * Loads an OWL/OBO ontology from a file. + * + * @param file + */ + public Loader loadOntology(File file) { + Math.random(); + sourceDataBuilder.ontology(file); + return this; + } + + /** + * Loads an OWL/OBO ontology from a path. + * + * @param path + */ + public Loader loadOntology(String path) { + sourceDataBuilder.ontology(path); + return this; + } + + public Loader loadOntologies(String... paths) { + sourceDataBuilder.ontologies(paths); + return this; + } + + /** + * Loads, and merges the OWL/OBO ontologies from the paths given. These can be remote, local uncompressed or + * gzipped. + * + * @param paths + */ + public Loader loadOntologies(Collection paths) { + sourceDataBuilder.ontologies(paths); + return this; + } + + public Loader loadDataFromOntology(String path) { + sourceDataBuilder.dataOntology(path); + return this; + } + + public Loader loadDataFromOntologies(String... paths) { + sourceDataBuilder.dataOntologies(paths); + return this; + } + + public Loader loadDataFromOntologies(Collection paths) { + sourceDataBuilder.dataOntologies(paths); + return this; + } + + public Loader loadDataFromTsv(String path) { + sourceDataBuilder.dataTsv(path); + return this; + } + + public Loader loadDataFromTsv(String... paths) { + sourceDataBuilder.dataTsv(paths); + return this; + } + + public Loader loadDataFromTsv(Collection paths) { + sourceDataBuilder.dataTsv(paths); + return this; + } + + /** + * Creates an {@link OWLOntologyManager} that is configured with the standard parsers and storers and provides + * locking for concurrent access (default). + */ + public Loader useConcurrentOntologyManager() { + concurrencyType = Concurrency.CONCURRENT; + return this; + } + + /** + * Creates an {@link OWLOntologyManager} that is configured with standard parsers, + * storers etc. + */ + public Loader useStandardOntologyManager() { + concurrencyType = Concurrency.NON_CONCURRENT; + return this; + } + + /** + * Allows overriding of the default {@link ElkReasonerFactory} + * + * @param owlReasonerFactory a concrete implementation of the {@link OWLReasonerFactory} + */ + public Loader useReasonerFactory(OWLReasonerFactory owlReasonerFactory) { + this.owlReasonerFactory = owlReasonerFactory; + return this; + } + + /** + * @return handle for a Bitmap-based Knowledge Base + */ + public BMKnowledgeBase createKnowledgeBase() { + + OntologySourceData sourceData = sourceDataBuilder.build(); + Ontology ontology = Ontology.load(sourceData, concurrencyType); + + return BMKnowledgeBaseOWLAPIImpl.create(ontology, owlReasonerFactory); + } + + } + + +} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/filter/FilterEngine.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/filter/FilterEngine.java index a13ea08..1bd5418 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/filter/FilterEngine.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/filter/FilterEngine.java @@ -1,13 +1,12 @@ package org.monarchinitiative.owlsim.kb.filter; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - +import com.googlecode.javaewah.EWAHCompressedBitmap; import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; -import com.googlecode.javaewah.EWAHCompressedBitmap; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; /** * Applies a {@link Filter} on a set of individuals. @@ -92,7 +91,7 @@ else if (filter instanceof TypeFilter) { else { typesBM = knowledgeBase.getTypesBM(id); } - LOG.info("typeId = " + tf.getTypeId()); +// LOG.info("typeId = " + tf.getTypeId()); int ix = knowledgeBase.getClassIndex(tf.getTypeId()); return typesBM.getPositions().contains(ix) ^ tf.isNegated(); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java index aa471b5..7823293 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java @@ -1,17 +1,13 @@ package org.monarchinitiative.owlsim.kb.impl; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.time.Duration; +import java.time.Instant; +import java.util.*; import java.util.stream.Collectors; import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.io.OWLLoader; +import org.monarchinitiative.owlsim.io.Ontology; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.CURIEMapper; import org.monarchinitiative.owlsim.kb.LabelMapper; @@ -51,7 +47,6 @@ import org.semanticweb.owlapi.reasoner.OWLReasoner; import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; -import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.googlecode.javaewah.EWAHCompressedBitmap; import com.hp.hpl.jena.query.Query; @@ -80,460 +75,437 @@ */ public class BMKnowledgeBaseOWLAPIImpl implements BMKnowledgeBase { - private Logger LOG = Logger.getLogger(BMKnowledgeBaseOWLAPIImpl.class); + private Logger LOG = Logger.getLogger(BMKnowledgeBaseOWLAPIImpl.class); + + private KBMetadata kbMetdata; + + private EWAHKnowledgeBaseStore ontoEWAHStore; + private OWLOntology owlOntology; + private OWLOntology owlDataOntology; + private OWLReasoner owlReasoner; + + private Map, Integer> classNodeToIntegerMap; + private Node[] classNodeArray; + private Map, Integer> individualNodeToIntegerMap; + private Node[] individualNodeArray; + + private Set> classNodes; + private Set> individualNodes; + + private Map> classToNodeMap; + private Map> individualToNodeMap; + // private Set classesInSignature; + private Set individualsInSignature; + private Map>> propertyValueMapMap; + private Map> opposingClassMap = new HashMap<>(); + + private Map> individualToWeightedDirectTypeMap = new HashMap<>(); + + private int[] individualCountPerClassArray; + + private CURIEMapper curieMapper; + private LabelMapper labelMapper; + private CurieUtil curieUtil; + + /** + * @deprecated - use the create() constructor method with the single owlOntology - this should contain all the required axioms. + * @param owlOntology + * @param owlDataOntology + * TODO - fix this + * @param reasonerFactory + */ + public BMKnowledgeBaseOWLAPIImpl(OWLOntology owlOntology, OWLOntology owlDataOntology, OWLReasonerFactory reasonerFactory, CurieUtil curieUtil) { + Objects.requireNonNull(owlOntology, "OWLOntology required - cannot be null."); + Objects.requireNonNull(reasonerFactory, "OWLReasonerFactory required, cannot be null."); + Objects.requireNonNull(curieUtil, "CurieUtil required - cannot be null"); + + Instant start = Instant.now(); + curieMapper = new CURIEMapperImpl(); + labelMapper = new LabelMapperImpl(curieMapper); + + this.owlOntology = owlOntology; + this.owlDataOntology = owlDataOntology; + if (owlDataOntology != null) { + translateFromDataOntology(); + } + LOG.info("Creating OWL reasoner"); + this.owlReasoner = reasonerFactory.createReasoner(owlOntology); + this.curieUtil = curieUtil; + createMap(); + ontoEWAHStore = new EWAHKnowledgeBaseStore(classNodes.size(), individualNodes.size()); + storeInferences(); + populateLabelsFromOntology(labelMapper, owlOntology); + if (owlDataOntology != null) { + LOG.info("Fetching labels from " + owlDataOntology); + // the data ontology may contain labels of data items + populateLabelsFromOntology(labelMapper, owlDataOntology); + } + Instant end = Instant.now(); + LOG.info("Knowledgebase loaded in " + Duration.between(start, end).toMillis() + " ms"); + } + + public static BMKnowledgeBase create(Ontology ontology, OWLReasonerFactory owlReasonerFactory) { + return new BMKnowledgeBaseOWLAPIImpl(ontology.getOwlOntology(), null, owlReasonerFactory, ontology.getCurieUtil()); + } + + public static BMKnowledgeBase create(OWLOntology owlOntology, OWLReasonerFactory rf, CurieUtil curieUtil) { + return new BMKnowledgeBaseOWLAPIImpl(owlOntology, null, rf, curieUtil); + } + + /** + * @deprecated + * @param owlOntology + * @param owlDataOntology + * @param rf + * @return + */ + public static BMKnowledgeBase create(OWLOntology owlOntology, OWLOntology owlDataOntology, OWLReasonerFactory rf, CurieUtil curieUtil) { + return new BMKnowledgeBaseOWLAPIImpl(owlOntology, owlDataOntology, rf, curieUtil); + } + + public KBMetadata getKbMetdata() { + return kbMetdata; + } + + public void setKbMetdata(KBMetadata kbMetdata) { + this.kbMetdata = kbMetdata; + } + + private String getShortForm(IRI iri) { + if (curieUtil.getCurieMap().isEmpty()) { + return iri.toString(); + } else { + String iriString = iri.toString(); + return curieUtil.getCurie(iriString).orElse(iriString); + } + } + + private void populateLabelsFromOntology(LabelMapper labelMapper, OWLOntology ontology) { + LOG.info("Populating labels from " + ontology); + int n = 0; + for (OWLAnnotationAssertionAxiom aaa : ontology.getAxioms(AxiomType.ANNOTATION_ASSERTION)) { + if (aaa.getProperty().isLabel()) { + if (aaa.getSubject() instanceof IRI && aaa.getValue() instanceof OWLLiteral) { + labelMapper.add(getShortForm((IRI) aaa.getSubject()), ((OWLLiteral) aaa.getValue()).getLiteral()); + n++; + } + } + } + if (n == 0) { + LOG.info("Setting labels from fragments"); + Set objs = new HashSet<>(); + objs.addAll(ontology.getClassesInSignature()); + objs.addAll(ontology.getIndividualsInSignature()); + for (OWLNamedObject obj : objs) { + labelMapper.add(getShortForm(obj.getIRI()), obj.getIRI().getFragment()); + n++; + } + } + LOG.info("Label axioms mapped: " + n); + } + + /** + * @return utility object to map labels to ids + */ + public LabelMapper getLabelMapper() { + return labelMapper; + } + + /** + * @return set of all classes + */ + public Set getClassesInSignature() { + return classToNodeMap.keySet(); // TODO - consider optimizing + } + + /** + * @return set of all class identifiers + */ + public Set getClassIdsInSignature() { + Set ids = new HashSet<>(); + for (OWLClass i : getClassesInSignature()) { + ids.add(getShortForm(i.getIRI())); + } + return ids; + } + + public Set getClassIdsByOntology(String ont) { + return getClassIdsInSignature().stream().filter(x -> isIn(x, ont)).collect(Collectors.toSet()); + } + + /** + * @param id + * @param ont + * @return true if id is in ontology + */ + private boolean isIn(String id, String ont) { + // TODO - use curie util + return id.startsWith(ont + ":") || id.contains("/" + ont + "_"); + } + + public int getNumClassNodes() { + return classNodeArray.length; + } + + /** + * @return set of all individual identifiers + */ + protected Set getIndividualsInSignature() { + return individualsInSignature; + } + + /** + * @return ids + */ + public Set getIndividualIdsInSignature() { + Set ids = new HashSet<>(); + for (OWLNamedIndividual i : getIndividualsInSignature()) { + ids.add(getShortForm(i.getIRI())); + } + return ids; + } + + /** + * @return OWLAPI representation of the ontology + */ + protected OWLOntology getOwlOntology() { + return owlOntology; + } + + // Assumption: data ontology includes ObjectPropertyAssertions + // TODO: make flexible + // TODO: extract associations + private void translateFromDataOntology() { + // TODO: allow other axiom types + for (OWLObjectPropertyAssertionAxiom opa : owlDataOntology.getAxioms(AxiomType.OBJECT_PROPERTY_ASSERTION)) { + OWLIndividual obj = opa.getObject(); + if (obj instanceof OWLNamedIndividual) { + OWLClass type = getOWLDataFactory().getOWLClass(((OWLNamedIndividual) obj).getIRI()); + OWLClassAssertionAxiom ca = getOWLDataFactory().getOWLClassAssertionAxiom(type, opa.getSubject()); + owlOntology.getOWLOntologyManager().addAxiom(owlOntology, ca); + } + } + } + + // Each OWLClass and OWLIndividual is mapped to an Integer index + private void createMap() { + LOG.info("Creating mapping from ontology objects to integers"); + classNodes = new HashSet<>(); + individualNodes = new HashSet<>(); + Set classesInSignature; + classesInSignature = owlOntology.getClassesInSignature(true); + LOG.info("|classes|=" + classesInSignature.size()); + classesInSignature.add(getOWLThing()); + classesInSignature.remove(getOWLNothing()); + individualsInSignature = owlOntology.getIndividualsInSignature(true); + LOG.info("|individuals|=" + individualsInSignature.size()); + classToNodeMap = new HashMap<>(); + individualToNodeMap = new HashMap<>(); + classNodeToIntegerMap = new HashMap<>(); + individualNodeToIntegerMap = new HashMap<>(); + propertyValueMapMap = new HashMap<>(); + final HashMap, Integer> classNodeToFrequencyMap = new HashMap<>(); + final HashMap, Double> classNodeToFreqDepthMap = new HashMap<>(); + for (OWLClass c : classesInSignature) { + if (owlReasoner.getInstances(c, false).isEmpty()) { + // TODO: deal with subclasses + // LOG.info("Skipping non-instantiated class: "+c); + // continue; + } + Node node = owlReasoner.getEquivalentClasses(c); + if (node.contains(getOWLNothing())) { + LOG.warn("Ignoring unsatisfiable class: " + c); + continue; + } + classNodes.add(node); + classToNodeMap.put(c, node); + int numAncNodes = owlReasoner.getSuperClasses(c, false).getNodes().size(); + int freq = owlReasoner.getInstances(c, false).getNodes().size(); + classNodeToFrequencyMap.put(node, freq); + + // freq depth is inversely correlated informativeness; + // frequency is primary measure (high freq = low informativeness); + // if frequency is tied, then tie is broken by number of ancestors + // (high ancestors = high informativeness) + // note that if frequency is not tied, then depth/ancestors should make + // no overall difference - we ensure this by taking the proportion of + // ancestor nodes divided by number of classes (there are always equal + // or more classes than nodes) + double freqDepth = freq + 1 - (numAncNodes / (double) classesInSignature.size()); + // LOG.info("freqDepth = "+freq+" "+freqDepth); + classNodeToFreqDepthMap.put(node, freqDepth); + } + + for (OWLNamedIndividual i : individualsInSignature) { + Node node = owlReasoner.getSameIndividuals(i); + individualNodes.add(node); + individualToNodeMap.put(i, node); + setPropertyValues(owlOntology, i); + if (owlDataOntology != null) + setPropertyValues(owlDataOntology, i); + } + + // Order class nodes such that LOW frequencies (HIGH Information Content) + // nodes are have LOWER indices + // TODO: use depth as a tie breaker + List> classNodesSorted = new ArrayList<>(classNodes); + classNodesSorted.sort((n1, n2) -> { + double f1 = classNodeToFreqDepthMap.get(n1); + double f2 = classNodeToFreqDepthMap.get(n2); + if (f1 < f2) + return -1; + if (f1 > f2) + return 1; + return 0; + }); + int numClassNodes = classNodesSorted.size(); + classNodeArray = classNodesSorted.toArray(new Node[numClassNodes]); + individualCountPerClassArray = new int[numClassNodes]; + for (int i = 0; i < numClassNodes; i++) { + classNodeToIntegerMap.put(classNodeArray[i], i); + // LOG.info(classNodeArray[i] + " ix="+i + " + // FREQ="+classNodeToFrequencyMap.get(classNodeArray[i])); + // LOG.info(classNodeArray[i] + " ix="+i + " + // IX_REV="+classNodeToIntegerMap.get(classNodeArray[i])); + individualCountPerClassArray[i] = classNodeToFrequencyMap.get(classNodeArray[i]); + } + individualNodeArray = individualNodes.toArray(new Node[individualNodes.size()]); + for (int i = 0; i < individualNodes.size(); i++) { + individualNodeToIntegerMap.put(individualNodeArray[i], i); + } + + } + + private void setPropertyValues(OWLOntology ont, OWLNamedIndividual i) { + Preconditions.checkNotNull(i); + Map> pvm = new HashMap<>(); + String id = getShortForm(i.getIRI()); + propertyValueMapMap.put(id, pvm); + for (OWLIndividualAxiom ax : ont.getAxioms(i)) { + if (ax instanceof OWLPropertyAssertionAxiom) { + OWLPropertyAssertionAxiom paa = (OWLPropertyAssertionAxiom) ax; + OWLPropertyExpression p = paa.getProperty(); + if (p instanceof OWLObjectProperty || p instanceof OWLDataProperty) { + String pid; + if (p instanceof OWLObjectProperty) + pid = getShortForm(((OWLObjectProperty) p).getIRI()); + else + pid = getShortForm(((OWLDataProperty) p).getIRI()); + OWLPropertyAssertionObject obj = paa.getObject(); + if (obj instanceof OWLLiteral) { + addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); + } else if (obj instanceof OWLNamedIndividual) { + addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); + + } + + } else if (false) { + String pid = getShortForm(((OWLDataProperty) p).getIRI()); + OWLLiteral obj = ((OWLDataPropertyAssertionAxiom) paa).getObject(); + if (obj instanceof OWLLiteral) { + addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); + } else if (obj instanceof OWLNamedIndividual) { + addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); + + } + + } + } + } + + } + + private void addPropertyValue(Map> pvm, String pid, String v) { + // LOG.debug("PV="+pid+"="+v); + if (!pvm.containsKey(pid)) + pvm.put(pid, new HashSet<>()); + pvm.get(pid).add(v); + } + + private void addOpposingClassPair(OWLClass c, OWLClassExpression dc) { + addOpposingClassPairAsym(c, dc); + if (!dc.isAnonymous()) + addOpposingClassPairAsym(dc.asOWLClass(), c); + } + + private void addOpposingClassPairAsym(OWLClass c, OWLClassExpression d) { + if (!opposingClassMap.containsKey(c)) + opposingClassMap.put(c, new HashSet<>()); + opposingClassMap.get(c).add(d); + } + + private void storeInferences() { + + // Note: if there are any nodes containing >1 class or individual, then + // the store method is called redundantly. This is unlikely to affect performance, + // and the semantics are unchanged + for (OWLClass c : getClassesInSignature()) { + int clsIndex = getIndex(c); + // LOG.info("Storing inferences for "+c+" --> " + clsIndex); + Set sups = getIntegersForClassSet(owlReasoner.getSuperClasses(c, false)); + sups.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); + + Set subs = getIntegersForClassSet(owlReasoner.getSubClasses(c, false)); + subs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); + + ontoEWAHStore.setDirectSuperClasses(clsIndex, getIntegersForClassSet(owlReasoner.getSuperClasses(c, true))); + ontoEWAHStore.setSuperClasses(clsIndex, sups); + ontoEWAHStore.setDirectSubClasses(clsIndex, getIntegersForClassSet(owlReasoner.getSubClasses(c, true))); + ontoEWAHStore.setSubClasses(clsIndex, subs); + + // Find all disjoint pairs plus opposing pairs + for (OWLAnnotationAssertionAxiom aaa : owlOntology.getAnnotationAssertionAxioms(c.getIRI())) { + // RO_0002604 is-opposite-of. TODO - use a vocabulary object + if (aaa.getProperty().getIRI().toString().equals("http://purl.obolibrary.org/obo/RO_0002604")) { + OWLAnnotationValue v = aaa.getValue(); + if (v instanceof IRI) { + IRI dciri = (IRI) v; + OWLClass dc = owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(dciri); + addOpposingClassPair(c, dc); + + } + } + } + + for (OWLDisjointClassesAxiom dca : owlOntology.getDisjointClassesAxioms(c)) { + for (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { + addOpposingClassPair(c, dc); + } + } + + // direct individuals are those asserted to be of type c or anything equivalent to c + Set individualInts = new HashSet<>(); + for (OWLClass ec : owlReasoner.getEquivalentClasses(c).getEntities()) { + for (OWLClassAssertionAxiom ax : owlOntology.getClassAssertionAxioms(ec)) { + if (ax.getIndividual().isNamed()) { + individualInts.add(getIndex(ax.getIndividual().asOWLNamedIndividual())); + } + } + } + ontoEWAHStore.setDirectIndividuals(clsIndex, individualInts); + + } + + // populate frequency-awareness map + individualToWeightedDirectTypeMap = new HashMap<>(); + for (OWLNamedIndividual i : individualsInSignature) { + int individualIndex = getIndex(i); + // LOG.info("String inferences for "+i+" --> " +individualIndex); + ontoEWAHStore.setDirectTypes(individualIndex, getIntegersForClassSet(owlReasoner.getTypes(i, true))); + ontoEWAHStore.setTypes(individualIndex, getIntegersForClassSet(owlReasoner.getTypes(i, false))); - private KBMetadata kbMetdata; - - private EWAHKnowledgeBaseStore ontoEWAHStore; - private OWLOntology owlOntology; - private OWLOntology owlDataOntology; - private OWLReasoner owlReasoner; - - private Map, Integer> classNodeToIntegerMap; - private Node[] classNodeArray; - private Map, Integer> individualNodeToIntegerMap; - private Node[] individualNodeArray; - - private Set> classNodes; - private Set> individualNodes; - - private Map> classToNodeMap; - private Map> individualToNodeMap; - // private Set classesInSignature; - private Set individualsInSignature; - private Map>> propertyValueMapMap; - Map> opposingClassMap = - new HashMap>(); - - Map> individualToWeightedDirectTypeMap = new HashMap<>(); - - - private int[] individualCountPerClassArray; - - CURIEMapper curieMapper; - LabelMapper labelMapper; - CurieUtil curieUtil; - - /** - * @param owlOntology - * @param owlDataOntology TODO - fix this - * @param rf - */ - public BMKnowledgeBaseOWLAPIImpl(OWLOntology owlOntology, - OWLOntology owlDataOntology, OWLReasonerFactory rf, - CurieUtil curieUtil) { - super(); - curieMapper = new CURIEMapperImpl(); - labelMapper = new LabelMapperImpl(curieMapper); - - this.owlOntology = owlOntology; - this.owlDataOntology = owlDataOntology; - if (owlDataOntology != null) { - translateFromDataOntology(); - } - this.owlReasoner = rf.createReasoner(owlOntology); - this.curieUtil = curieUtil; - createMap(); - ontoEWAHStore = new EWAHKnowledgeBaseStore(classNodes.size(), individualNodes.size()); - storeInferences(); - populateLabelsFromOntology(labelMapper, owlOntology); - if (owlDataOntology != null) { - LOG.info("Fetching labels from " + owlDataOntology); - // the data ontology may contain labels of data items - populateLabelsFromOntology(labelMapper, owlDataOntology); - } - } - - public static BMKnowledgeBase create(OWLOntology owlOntology, OWLReasonerFactory rf, - CurieUtil curieUtil) { - return new BMKnowledgeBaseOWLAPIImpl(owlOntology, null, rf, curieUtil); - } - - /** - * @param owlOntology - * @param owlDataOntology - * @param rf - * @return - */ - public static BMKnowledgeBase create(OWLOntology owlOntology, OWLOntology owlDataOntology, - OWLReasonerFactory rf, CurieUtil curieUtil) { - return new BMKnowledgeBaseOWLAPIImpl(owlOntology, owlDataOntology, rf, curieUtil); - } - - - - public KBMetadata getKbMetdata() { - return kbMetdata; - } - - - - public void setKbMetdata(KBMetadata kbMetdata) { - this.kbMetdata = kbMetdata; - } - - private String getShortForm(IRI iri) { - if (curieUtil.getCurieMap().isEmpty()) { - return iri.toString(); - } else { - Optional curie = curieUtil.getCurie(iri.toString()); - if (curie.isPresent()) { - return curie.get(); - } - else { - return iri.toString(); - } - } - } - - private void populateLabelsFromOntology(LabelMapper labelMapper, OWLOntology ontology) { - LOG.info("Populating labels from " + ontology); - int n = 0; - for (OWLAnnotationAssertionAxiom aaa : ontology.getAxioms(AxiomType.ANNOTATION_ASSERTION)) { - if (aaa.getProperty().isLabel()) { - if (aaa.getSubject() instanceof IRI && aaa.getValue() instanceof OWLLiteral) { - labelMapper.add(getShortForm((IRI) aaa.getSubject()), - ((OWLLiteral) aaa.getValue()).getLiteral()); - n++; - } - } - } - if (n == 0) { - LOG.info("Setting labels from fragments"); - Set objs = new HashSet(); - objs.addAll(ontology.getClassesInSignature()); - objs.addAll(ontology.getIndividualsInSignature()); - for (OWLNamedObject obj : objs) { - labelMapper.add(getShortForm(obj.getIRI()), obj.getIRI().getFragment()); - n++; - } - } - LOG.info("Label axioms mapped: " + n); - } - - /** - * @return utility object to map labels to ids - */ - public LabelMapper getLabelMapper() { - return labelMapper; - } - - /** - * @return set of all classes - */ - public Set getClassesInSignature() { - return classToNodeMap.keySet(); // TODO - consider optimizing - } - - /** - * @return set of all class identifiers - */ - public Set getClassIdsInSignature() { - Set ids = new HashSet(); - for (OWLClass i : getClassesInSignature()) { - ids.add(getShortForm(i.getIRI())); - } - return ids; - } - - public Set getClassIdsByOntology(String ont) { - return getClassIdsInSignature().stream().filter(x -> isIn(x, ont)).collect(Collectors.toSet()); - } - - /** - * @param id - * @param ont - * @return true if id is in ontology - */ - public boolean isIn(String id, String ont) { - // TODO - use curie util - return id.startsWith(ont+":") || id.contains("/"+ont+"_"); - } - - public int getNumClassNodes() { - return classNodeArray.length; - } - - - - /** - * @return set of all individual identifiers - */ - protected Set getIndividualsInSignature() { - return individualsInSignature; - } - - /** - * @return ids - */ - public Set getIndividualIdsInSignature() { - Set ids = new HashSet(); - for (OWLNamedIndividual i : getIndividualsInSignature()) { - ids.add(getShortForm(i.getIRI())); - } - return ids; - } - - - - /** - * @return OWLAPI representation of the ontology - */ - protected OWLOntology getOwlOntology() { - return owlOntology; - } - - // Assumption: data ontology includes ObjectPropertyAssertions - // TODO: make flexible - // TODO: extract associations - private void translateFromDataOntology() { - // TODO: allow other axiom types - for (OWLObjectPropertyAssertionAxiom opa : owlDataOntology - .getAxioms(AxiomType.OBJECT_PROPERTY_ASSERTION)) { - OWLIndividual obj = opa.getObject(); - if (obj instanceof OWLNamedIndividual) { - OWLClass type = getOWLDataFactory().getOWLClass(((OWLNamedIndividual) obj).getIRI()); - OWLClassAssertionAxiom ca = - getOWLDataFactory().getOWLClassAssertionAxiom(type, opa.getSubject()); - owlOntology.getOWLOntologyManager().addAxiom(owlOntology, ca); - } - } - } - - - // Each OWLClass and OWLIndividual is mapped to an Integer index - private void createMap() { - LOG.info("Creating mapping from ontology objects to integers"); - classNodes = new HashSet>(); - individualNodes = new HashSet>(); - Set classesInSignature; - classesInSignature = owlOntology.getClassesInSignature(true); - LOG.info("|classes|=" + classesInSignature.size()); - classesInSignature.add(getOWLThing()); - classesInSignature.remove(getOWLNothing()); - individualsInSignature = owlOntology.getIndividualsInSignature(true); - LOG.info("|individuals|=" + individualsInSignature.size()); - classToNodeMap = new HashMap>(); - individualToNodeMap = new HashMap>(); - classNodeToIntegerMap = new HashMap, Integer>(); - individualNodeToIntegerMap = new HashMap, Integer>(); - propertyValueMapMap = new HashMap>>(); - final HashMap, Integer> classNodeToFrequencyMap = - new HashMap, Integer>(); - final HashMap, Double> classNodeToFreqDepthMap = - new HashMap, Double>(); - for (OWLClass c : classesInSignature) { - if (owlReasoner.getInstances(c, false).isEmpty()) { - // TODO: deal with subclasses - // LOG.info("Skipping non-instantiated class: "+c); - // continue; - } - Node node = owlReasoner.getEquivalentClasses(c); - if (node.contains(getOWLNothing())) { - LOG.warn("Ignoring unsatisfiable class: " + c); - continue; - } - classNodes.add(node); - classToNodeMap.put(c, node); - int numAncNodes = owlReasoner.getSuperClasses(c, false).getNodes().size(); - int freq = owlReasoner.getInstances(c, false).getNodes().size(); - classNodeToFrequencyMap.put(node, freq); - - // freq depth is inversely correlated informativeness; - // frequency is primary measure (high freq = low informativeness); - // if frequency is tied, then tie is broken by number of ancestors - // (high ancestors = high informativeness) - // note that if frequency is not tied, then depth/ancestors should make - // no overall difference - we ensure this by taking the proportion of - // ancestor nodes divided by number of classes (there are always equal - // or more classes than nodes) - double freqDepth = freq + 1 - (numAncNodes / (double) classesInSignature.size()); - // LOG.info("freqDepth = "+freq+" "+freqDepth); - classNodeToFreqDepthMap.put(node, freqDepth); - } - - for (OWLNamedIndividual i : individualsInSignature) { - Node node = owlReasoner.getSameIndividuals(i); - individualNodes.add(node); - individualToNodeMap.put(i, node); - setPropertyValues(owlOntology, i); - if (owlDataOntology != null) - setPropertyValues(owlDataOntology, i); - } - - // Order class nodes such that LOW frequencies (HIGH Information Content) - // nodes are have LOWER indices - // TODO: use depth as a tie breaker - List> classNodesSorted = new ArrayList>(classNodes); - Collections.sort(classNodesSorted, new Comparator>() { - public int compare(Node n1, Node n2) { - double f1 = classNodeToFreqDepthMap.get(n1); - double f2 = classNodeToFreqDepthMap.get(n2); - if (f1 < f2) - return -1; - if (f1 > f2) - return 1; - return 0; - } - }); - int numClassNodes = classNodesSorted.size(); - classNodeArray = classNodesSorted.toArray(new Node[numClassNodes]); - individualCountPerClassArray = new int[numClassNodes]; - for (int i = 0; i < numClassNodes; i++) { - classNodeToIntegerMap.put(classNodeArray[i], i); - // LOG.info(classNodeArray[i] + " ix="+i + " - // FREQ="+classNodeToFrequencyMap.get(classNodeArray[i])); - // LOG.info(classNodeArray[i] + " ix="+i + " - // IX_REV="+classNodeToIntegerMap.get(classNodeArray[i])); - individualCountPerClassArray[i] = classNodeToFrequencyMap.get(classNodeArray[i]); - } - individualNodeArray = individualNodes.toArray(new Node[individualNodes.size()]); - for (int i = 0; i < individualNodes.size(); i++) { - individualNodeToIntegerMap.put(individualNodeArray[i], i); - } - - } - - - private void setPropertyValues(OWLOntology ont, OWLNamedIndividual i) { - Preconditions.checkNotNull(i); - Map> pvm = new HashMap>(); - String id = getShortForm(i.getIRI()); - propertyValueMapMap.put(id, pvm); - for (OWLIndividualAxiom ax : ont.getAxioms(i)) { - if (ax instanceof OWLPropertyAssertionAxiom) { - OWLPropertyAssertionAxiom paa = (OWLPropertyAssertionAxiom) ax; - OWLPropertyExpression p = paa.getProperty(); - if (p instanceof OWLObjectProperty || p instanceof OWLDataProperty) { - String pid; - if (p instanceof OWLObjectProperty) - pid = getShortForm(((OWLObjectProperty) p).getIRI()); - else - pid = getShortForm(((OWLDataProperty) p).getIRI()); - OWLPropertyAssertionObject obj = paa.getObject(); - if (obj instanceof OWLLiteral) { - addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); - } else if (obj instanceof OWLNamedIndividual) { - addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); - - } - - } else if (false) { - String pid = getShortForm(((OWLDataProperty) p).getIRI()); - OWLLiteral obj = ((OWLDataPropertyAssertionAxiom) paa).getObject(); - if (obj instanceof OWLLiteral) { - addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); - } else if (obj instanceof OWLNamedIndividual) { - addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); - - } - - } - } - } - - } - - - private void addPropertyValue(Map> pvm, String pid, String v) { - // LOG.debug("PV="+pid+"="+v); - if (!pvm.containsKey(pid)) - pvm.put(pid, new HashSet()); - pvm.get(pid).add(v); - } - - private void addOpposingClassPair(OWLClass c, OWLClassExpression dc) { - addOpposingClassPairAsym(c, dc); - if (!dc.isAnonymous()) - addOpposingClassPairAsym(dc.asOWLClass(), c); - } - - private void addOpposingClassPairAsym(OWLClass c, OWLClassExpression d) { - if (!opposingClassMap.containsKey(c)) - opposingClassMap.put(c, new HashSet()); - opposingClassMap.get(c).add(d); - } - - private void storeInferences() { - - - // Note: if there are any nodes containing >1 class or individual, then - // the store method is called redundantly. This is unlikely to affect performance, - // and the semantics are unchanged - for (OWLClass c : getClassesInSignature()) { - int clsIndex = getIndex(c); - // LOG.info("Storing inferences for "+c+" --> " + clsIndex); - Set sups = getIntegersForClassSet(owlReasoner.getSuperClasses(c, false)); - sups.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); - - Set subs = getIntegersForClassSet(owlReasoner.getSubClasses(c, false)); - subs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); - - ontoEWAHStore.setDirectSuperClasses(clsIndex, - getIntegersForClassSet(owlReasoner.getSuperClasses(c, true))); - ontoEWAHStore.setSuperClasses(clsIndex, sups); - ontoEWAHStore.setDirectSubClasses(clsIndex, - getIntegersForClassSet(owlReasoner.getSubClasses(c, true))); - ontoEWAHStore.setSubClasses(clsIndex, subs); - - // Find all disjoint pairs plus opposing pairs - for (OWLAnnotationAssertionAxiom aaa : owlOntology.getAnnotationAssertionAxioms(c.getIRI())) { - // RO_0002604 is-opposite-of. TODO - use a vocabulary object - if (aaa.getProperty().getIRI().toString() - .equals("http://purl.obolibrary.org/obo/RO_0002604")) { - OWLAnnotationValue v = aaa.getValue(); - if (v instanceof IRI) { - IRI dciri = (IRI) v; - OWLClass dc = - owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(dciri); - addOpposingClassPair(c, dc); - - } - } - } - - for (OWLDisjointClassesAxiom dca : owlOntology.getDisjointClassesAxioms(c)) { - for (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { - addOpposingClassPair(c, dc); - } - } - - - // direct individuals are those asserted to be of type c or anything equivalent to c - Set individualInts = new HashSet(); - for (OWLClass ec : owlReasoner.getEquivalentClasses(c).getEntities()) { - for (OWLClassAssertionAxiom ax : owlOntology.getClassAssertionAxioms(ec)) { - if (ax.getIndividual().isNamed()) { - individualInts.add(getIndex(ax.getIndividual().asOWLNamedIndividual())); - } - } - } - ontoEWAHStore.setDirectIndividuals(clsIndex, individualInts); - - } - - // populate frequency-awareness map - individualToWeightedDirectTypeMap = new HashMap<>(); - for (OWLNamedIndividual i : individualsInSignature) { - int individualIndex = getIndex(i); - // LOG.info("String inferences for "+i+" --> " +individualIndex); - ontoEWAHStore.setDirectTypes(individualIndex, - getIntegersForClassSet(owlReasoner.getTypes(i, true))); - ontoEWAHStore.setTypes(individualIndex, - getIntegersForClassSet(owlReasoner.getTypes(i, false))); - // TODO - ensure robust for equivalent individuals Map wmap = new HashMap<>(); individualToWeightedDirectTypeMap.put(individualIndex, wmap); for (OWLClassAssertionAxiom caax : owlOntology.getClassAssertionAxioms(i)) { int cix; - + // only associations to named classes if (caax.getClassExpression().isAnonymous()) { continue; } cix = getIndex(caax.getClassExpression().asOWLClass()); - + // we use reification to store probability for (OWLAnnotation ann : caax.getAnnotations()) { OWLAnnotationProperty prop = ann.getProperty(); @@ -557,7 +529,7 @@ private void storeInferences() { if (prop.getIRI().toString().contains("frequency")) { wmap.put(cix, lv.parseInteger()); } - + } } } @@ -566,442 +538,432 @@ private void storeInferences() { // Treat ClassAssertion( ComplementOf(c) i) as a negative assertion Set ncs = new HashSet(); Set ncsDirect = new HashSet(); - for (OWLClassAssertionAxiom cx : owlOntology.getClassAssertionAxioms(i)) { - // TODO: investigate efficiency - number of items set may be high - if (cx.getClassExpression() instanceof OWLObjectComplementOf) { - OWLObjectComplementOf nx = (OWLObjectComplementOf) (cx.getClassExpression()); - OWLClassExpression nc = nx.getOperand(); - ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(nc, false))); - ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); - ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); - } - } - - // Populate negative assertions from DisjointClasses axioms - for (OWLClass c : owlReasoner.getTypes(i, false).getFlattened()) { - LOG.debug("TESTING FOR DCs: " + c); - if (opposingClassMap.containsKey(c)) { - for (OWLClassExpression dc : opposingClassMap.get(c)) { - LOG.info(i + " Type: " + c + " DisjointWith: " + dc); - ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); - ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - } - } - /* - * for (OWLDisjointClassesAxiom dca : owlOntology.getDisjointClassesAxioms(c)) { for - * (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { - * LOG.info(i+" Type: "+c+" DisjointWith: "+dc); - * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); - * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - * ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); } } for - * (OWLAnnotationAssertionAxiom aaa : owlOntology.getAnnotationAssertionAxioms(c.getIRI())) - * { // RO_0002604 is-opposite-of. TODO - use a vocabulary object if - * (aaa.getProperty().getIRI().toString().equals("http://purl.obolibrary.org/obo/RO_0002604" - * )) { OWLAnnotationValue v = aaa.getValue(); if (v instanceof IRI) { IRI dciri = (IRI)v; - * OWLClass dc = owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(dciri); - * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); - * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - * ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - * - * } } } - */ - } - - ontoEWAHStore.setNegatedTypes(individualIndex, ncs); // TODO - determine if storing all - // inferred negated types is too - // inefficient - ontoEWAHStore.setDirectNegatedTypes(individualIndex, ncsDirect); - } - - } - - // TODO - private void storeIndividualProperties() { - for (OWLNamedIndividual i : individualsInSignature) { - for (OWLIndividualAxiom ax : owlOntology.getAxioms(i)) { - if (ax instanceof OWLObjectPropertyAssertionAxiom) { - OWLObjectPropertyExpression p = ((OWLObjectPropertyAssertionAxiom) ax).getProperty(); - } - } - } - } - - // TODO - complete this - // TODO - separate this out as it is not an OWLAPI model. Maybe sparql is overkill here? - // use sparql to query the memory model - private void storeIndividualToClassFrequencies() { - String sparql = ""; - Query query = QueryFactory.create(sparql); - Model model = null; - QueryExecution qexec = QueryExecutionFactory.create(query, model); - ResultSet results = qexec.execSelect(); - for (; results.hasNext();) { - QuerySolution soln = results.nextSolution(); - RDFNode x = soln.get("varName"); // Get a result variable by name. - Resource r = soln.getResource("VarR"); // Get a result variable - must be a resource - Literal l = soln.getLiteral("VarL"); // Get a result variable - must be a literal - } - } - - - - private Set getIntegersForClassSet(NodeSet nodeset) { - Set bits = new HashSet(); - for (Node n : nodeset.getNodes()) { - if (n.contains(getOWLNothing())) - continue; - bits.add(getIndexForClassNode(n)); - } - return bits; - } - - - private Set getIntegersForIndividualSet(NodeSet nodeset) { - Set bits = new HashSet(); - for (Node n : nodeset.getNodes()) { - bits.add(getIndexForIndividualNode(n)); - } - return bits; - } - - /** - * Each class is mapped to an integer - * - * Note that equivalent classes will be mapped to the same integer - * - * @param c - * @return integer representation of class - */ - protected int getIndex(OWLClass c) { - Preconditions.checkNotNull(c); - return getIndexForClassNode(classToNodeMap.get(c)); - } - - /** - * @param id - * @return integer representation of class with id - */ - public int getClassIndex(String id) { - Preconditions.checkNotNull(id); - return getIndex(getOWLClass(id)); - } - - /** - * @param index - * @return OWLClass Node that corresponds to this index - */ - public Node getClassNode(int index) { - return classNodeArray[index]; - } - - /** - * @param index - * @return OWLClass Node that corresponds to this index - */ - public Node getIndividualNode(int index) { - return individualNodeArray[index]; - } - - /** - * @param cix - * @return bitmap - */ - public EWAHCompressedBitmap getDirectIndividualsBM(int cix) { - return ontoEWAHStore.getDirectIndividuals(cix); - } - - @Override - public EWAHCompressedBitmap getIndividualsBM(String classId) { - return getIndividualsBM(getClassIndex(classId)); - } - - @Override - public EWAHCompressedBitmap getIndividualsBM(int classIndex) { - if (classIndex == getRootIndex()) { - EWAHCompressedBitmap indsBM = new EWAHCompressedBitmap(); - indsBM.setSizeInBits(getIndividualIdsInSignature().size(), true); - return indsBM; - } - EWAHCompressedBitmap subsBM = getSubClasses(classIndex); - EWAHCompressedBitmap indsBM = null; - // Note this implementation iterates through all subclasses - // combining individuals; it is too expensive to store all inferred inds by class - for (int subcix : subsBM.getPositions()) { - EWAHCompressedBitmap bm = getDirectIndividualsBM(subcix); - if (indsBM == null) { - indsBM = bm; - } else { - indsBM = indsBM.or(bm); - } - } - return indsBM; - } - - - /** - * Note: each index can correspond to multiple classes c1...cn if this set is an equivalence set. - * In this case the representative classId is returned - * - * @param index - * @return classId - */ - public String getClassId(int index) { - Node n = getClassNode(index); - OWLClass c = n.getRepresentativeElement(); - return getShortForm(c.getIRI()); - } - - public Set getClassIds(int index) { - Node n = getClassNode(index); - Set cids = new HashSet(); - for (OWLClass c : n.getEntities()) { - cids.add(getShortForm(c.getIRI())); - } - return cids; - } - - public Set getClassIds(EWAHCompressedBitmap bm) { - Set cids = new HashSet(); - for (int x : bm) { - Node n = getClassNode(x); - for (OWLClass c : n.getEntities()) { - cids.add(getShortForm(c.getIRI())); - } - } - return cids; - } - - - /** - * @param id - * @return integer representation of class with id - */ - public int getIndividualIndex(String id) { - Preconditions.checkNotNull(id); - return getIndex(getOWLNamedIndividual(id)); - } - - /** - * Each set of equivalent classes (a class node) is mapped to a unique integer - * - * @param n - * @return integer representation of class node - */ - protected int getIndexForClassNode(Node n) { - Preconditions.checkNotNull(n); - if (!classNodeToIntegerMap.containsKey(n)) - LOG.error("No such node: " + n); - return classNodeToIntegerMap.get(n); - } - - /** - * Each individual is mapped to an integer - * - * Note that individuals that stand in a SameAs relationship to one another will be mapped to the - * same integer - * - * @param i - * @return integer representation of individual - */ - protected int getIndex(OWLNamedIndividual i) { - return getIndexForIndividualNode(individualToNodeMap.get(i)); - } - - /** - * Each set of same individuals (an individual node) is mapped to a unique integer - * - * @param n - * @return integer representation of class node - */ - protected int getIndexForIndividualNode(Node n) { - return individualNodeToIntegerMap.get(n); - } - - - - /** - * @param c - * @return Bitmap representation of set of superclasses of c (direct and indirect) - */ - protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c) { - return ontoEWAHStore.getSuperClasses(getIndex(c)); - } - - /** - * @param c - * @return Bitmap representation of set of direct superclasses of c - */ - protected EWAHCompressedBitmap getDirectSuperClassesBM(OWLClass c) { - return ontoEWAHStore.getDirectSuperClasses(getIndex(c)); - } - - /** - * @param c - * @param isDirect - * @return Bitmap representation of set ofsuperclasses of c - */ - protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c, boolean isDirect) { - return ontoEWAHStore.getSuperClasses(getIndex(c), isDirect); - } - - /** - * @param clsSet - * @return union of all superClasses (direct and indirect) of any input class - */ - protected EWAHCompressedBitmap getSuperClassesBMByOWLClassSet(Set clsSet) { - Set classIndices = new HashSet(); - for (OWLClass c : clsSet) { - classIndices.add(getIndex(c)); - } - return ontoEWAHStore.getSuperClasses(classIndices); - } + for (OWLClassAssertionAxiom cx : owlOntology.getClassAssertionAxioms(i)) { + // TODO: investigate efficiency - number of items set may be high + if (cx.getClassExpression() instanceof OWLObjectComplementOf) { + OWLObjectComplementOf nx = (OWLObjectComplementOf) (cx.getClassExpression()); + OWLClassExpression nc = nx.getOperand(); + ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(nc, false))); + ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); + ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); + } + } + + // Populate negative assertions from DisjointClasses axioms + for (OWLClass c : owlReasoner.getTypes(i, false).getFlattened()) { + LOG.debug("TESTING FOR DCs: " + c); + if (opposingClassMap.containsKey(c)) { + for (OWLClassExpression dc : opposingClassMap.get(c)) { + LOG.info(i + " Type: " + c + " DisjointWith: " + dc); + ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); + ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + } + } + /* + * for (OWLDisjointClassesAxiom dca : owlOntology.getDisjointClassesAxioms(c)) { + * for (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { + * LOG.info(i+" Type: "+c+" DisjointWith: "+dc); + * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); + * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + * ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); } } + * for (OWLAnnotationAssertionAxiom aaa : owlOntology.getAnnotationAssertionAxioms(c.getIRI())){ + * // RO_0002604 is-opposite-of. TODO - use a vocabulary object if + * (aaa.getProperty().getIRI().toString().equals("http://purl.obolibrary.org/obo/RO_0002604" )) { + * OWLAnnotationValue v = aaa.getValue(); if (v instanceof IRI){ + * IRI dciri = (IRI)v; OWLClass dc = owlOntology.getOWLOntologyManager().getOWLDataFactory(). getOWLClass(dciri); + * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); + * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + * ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + * + * } } } + */ + } + + ontoEWAHStore.setNegatedTypes(individualIndex, ncs); // TODO - determine if storing all + // inferred negated types is too + // inefficient + ontoEWAHStore.setDirectNegatedTypes(individualIndex, ncsDirect); + } + + } + + // TODO + private void storeIndividualProperties() { + for (OWLNamedIndividual i : individualsInSignature) { + for (OWLIndividualAxiom ax : owlOntology.getAxioms(i)) { + if (ax instanceof OWLObjectPropertyAssertionAxiom) { + OWLObjectPropertyExpression p = ((OWLObjectPropertyAssertionAxiom) ax).getProperty(); + } + } + } + } + + // TODO - complete this + // TODO - separate this out as it is not an OWLAPI model. Maybe sparql is overkill here? + // use sparql to query the memory model + private void storeIndividualToClassFrequencies() { + String sparql = ""; + Query query = QueryFactory.create(sparql); + Model model = null; + QueryExecution qexec = QueryExecutionFactory.create(query, model); + ResultSet results = qexec.execSelect(); + for (; results.hasNext();) { + QuerySolution soln = results.nextSolution(); + RDFNode x = soln.get("varName"); // Get a result variable by name. + Resource r = soln.getResource("VarR"); // Get a result variable - must be a resource + Literal l = soln.getLiteral("VarL"); // Get a result variable - must be a literal + } + } + + private Set getIntegersForClassSet(NodeSet nodeset) { + Set bits = new HashSet<>(); + for (Node n : nodeset.getNodes()) { + if (n.contains(getOWLNothing())) + continue; + bits.add(getIndexForClassNode(n)); + } + return bits; + } + + private Set getIntegersForIndividualSet(NodeSet nodeset) { + Set bits = new HashSet<>(); + for (Node n : nodeset.getNodes()) { + bits.add(getIndexForIndividualNode(n)); + } + return bits; + } + + /** + * Each class is mapped to an integer + * + * Note that equivalent classes will be mapped to the same integer + * + * @param c + * @return integer representation of class + */ + private int getIndex(OWLClass c) { + return getIndexForClassNode(classToNodeMap.get(c)); + } + + /** + * @param id + * @return integer representation of class with id + */ + public int getClassIndex(String id) { + Preconditions.checkNotNull(id); + return getIndex(getOWLClass(id)); + } + + /** + * @param index + * @return OWLClass Node that corresponds to this index + */ + public Node getClassNode(int index) { + return classNodeArray[index]; + } + + /** + * @param index + * @return OWLClass Node that corresponds to this index + */ + public Node getIndividualNode(int index) { + return individualNodeArray[index]; + } + + /** + * @param cix + * @return bitmap + */ + public EWAHCompressedBitmap getDirectIndividualsBM(int cix) { + return ontoEWAHStore.getDirectIndividuals(cix); + } + + @Override + public EWAHCompressedBitmap getIndividualsBM(String classId) { + return getIndividualsBM(getClassIndex(classId)); + } + + @Override + public EWAHCompressedBitmap getIndividualsBM(int classIndex) { + if (classIndex == getRootIndex()) { + EWAHCompressedBitmap indsBM = new EWAHCompressedBitmap(); + indsBM.setSizeInBits(getIndividualIdsInSignature().size(), true); + return indsBM; + } + EWAHCompressedBitmap subsBM = getSubClasses(classIndex); + EWAHCompressedBitmap indsBM = null; + // Note this implementation iterates through all subclasses + // combining individuals; it is too expensive to store all inferred inds by class + for (int subcix : subsBM.getPositions()) { + EWAHCompressedBitmap bm = getDirectIndividualsBM(subcix); + if (indsBM == null) { + indsBM = bm; + } else { + indsBM = indsBM.or(bm); + } + } + return indsBM; + } + + /** + * Note: each index can correspond to multiple classes c1...cn if this set is an equivalence set. + * In this case the representative classId is returned + * + * @param index + * @return classId + */ + public String getClassId(int index) { + Node n = getClassNode(index); + OWLClass c = n.getRepresentativeElement(); + return getShortForm(c.getIRI()); + } + + public Set getClassIds(int index) { + Node n = getClassNode(index); + Set cids = new HashSet<>(); + for (OWLClass c : n.getEntities()) { + cids.add(getShortForm(c.getIRI())); + } + return cids; + } + + public Set getClassIds(EWAHCompressedBitmap bm) { + Set cids = new HashSet<>(); + for (int x : bm) { + Node n = getClassNode(x); + for (OWLClass c : n.getEntities()) { + cids.add(getShortForm(c.getIRI())); + } + } + return cids; + } + + /** + * @param id + * @return integer representation of class with id + */ + public int getIndividualIndex(String id) { + Preconditions.checkNotNull(id); + return getIndex(getOWLNamedIndividual(id)); + } + + /** + * Each set of equivalent classes (a class node) is mapped to a unique integer + * + * @param n + * @return integer representation of class node + */ + private Integer getIndexForClassNode(Node n) { + Preconditions.checkNotNull(n); + if (!classNodeToIntegerMap.containsKey(n)) + LOG.error("No such node: " + n); + return classNodeToIntegerMap.get(n); + } + + /** + * Each individual is mapped to an integer + * + * Note that individuals that stand in a SameAs relationship to one another will be mapped to the + * same integer + * + * @param i + * @return integer representation of individual + */ + private int getIndex(OWLNamedIndividual i) { + return getIndexForIndividualNode(individualToNodeMap.get(i)); + } + + /** + * Each set of same individuals (an individual node) is mapped to a unique integer + * + * @param n + * @return integer representation of class node + */ + private int getIndexForIndividualNode(Node n) { + return individualNodeToIntegerMap.get(n); + } + + /** + * @param c + * @return Bitmap representation of set of superclasses of c (direct and indirect) + */ + protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c) { + return ontoEWAHStore.getSuperClasses(getIndex(c)); + } + + /** + * @param c + * @return Bitmap representation of set of direct superclasses of c + */ + protected EWAHCompressedBitmap getDirectSuperClassesBM(OWLClass c) { + return ontoEWAHStore.getDirectSuperClasses(getIndex(c)); + } + + /** + * @param c + * @param isDirect + * @return Bitmap representation of set ofsuperclasses of c + */ + protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c, boolean isDirect) { + return ontoEWAHStore.getSuperClasses(getIndex(c), isDirect); + } + + /** + * @param clsSet + * @return union of all superClasses (direct and indirect) of any input class + */ + protected EWAHCompressedBitmap getSuperClassesBMByOWLClassSet(Set clsSet) { + Set classIndices = new HashSet<>(); + for (OWLClass c : clsSet) { + classIndices.add(getIndex(c)); + } + return ontoEWAHStore.getSuperClasses(classIndices); + } /* (non-Javadoc) * @see org.monarchinitiative.owlsim.kb.BMKnowledgeBase#getSuperClassesBM(com.googlecode.javaewah.EWAHCompressedBitmap) */ public EWAHCompressedBitmap getSuperClassesBM(EWAHCompressedBitmap classesBM) { - return ontoEWAHStore.getSuperClasses(new HashSet<>(classesBM.getPositions())); - } - - public EWAHCompressedBitmap getSuperClassesBM(String cid) { - return ontoEWAHStore.getSuperClasses(getClassIndex(cid)); - } - - public EWAHCompressedBitmap getDirectSuperClassesBM(String cid) { - return ontoEWAHStore.getDirectSuperClasses(getClassIndex(cid)); - } - - public EWAHCompressedBitmap getSuperClassesBM(int classIndex) { - return ontoEWAHStore.getSuperClasses(classIndex); - } - - public EWAHCompressedBitmap getClassesBM(Set classIds) { - EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); - for (String id : classIds) { - bm.set(getClassIndex(id)); - } - return bm; - } - - - public EWAHCompressedBitmap getDirectSuperClassesBM(int classIndex) { - return ontoEWAHStore.getDirectSuperClasses(classIndex); - } - - public EWAHCompressedBitmap getSubClasses(int classIndex) { - return ontoEWAHStore.getSubClasses(classIndex); - } - - public EWAHCompressedBitmap getDirectSubClassesBM(String cid) { - return ontoEWAHStore.getDirectSubClasses(getClassIndex(cid)); - } - - public EWAHCompressedBitmap getDirectSubClassesBM(int classIndex) { - return ontoEWAHStore.getDirectSubClasses(classIndex); - } - - /** - * @param clsIds - * @return union of all subClasses (direct and indirect) of any input class - */ - public EWAHCompressedBitmap getSubClassesBM(Set clsIds) { - Set classIndices = new HashSet(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getSubClasses(classIndices); - } - - /** - * @param clsIds - * @return union of all direct subClasses of all input classes - */ - public EWAHCompressedBitmap getDirectSubClassesBM(Set clsIds) { - Set classIndices = new HashSet(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getDirectSubClasses(classIndices); - } - - - /** - * @param clsIds - * @return union of all superClasses (direct and indirect) of any input class - */ - public EWAHCompressedBitmap getSuperClassesBM(Set clsIds) { - Set classIndices = new HashSet(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getSuperClasses(classIndices); - } - - /** - * @param clsIds - * @return union of all direct superClasses of all input classes - */ - public EWAHCompressedBitmap getDirectSuperClassesBM(Set clsIds) { - Set classIndices = new HashSet(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getDirectSuperClasses(classIndices); - } - - /** - * @param i - * @return Bitmap representation of set of (direct or indirect) types of i - */ - protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i) { - return ontoEWAHStore.getTypes(getIndex(i)); - } - - /** - * @param i - * @return Bitmap representation of set of direct types of i - */ - protected EWAHCompressedBitmap getDirectTypesBM(OWLNamedIndividual i) { - return ontoEWAHStore.getDirectTypes(getIndex(i)); - } - - /** - * @param i - * @param classFilter - * @return Bitmap representation of the subset of direct types of i, which are descendants of - * classFilter - */ - protected EWAHCompressedBitmap getFilteredDirectTypesBM(OWLNamedIndividual i, OWLClass c) { - return ontoEWAHStore.getDirectTypes(getIndex(i), this.getIndex(c)); - } - - /** - * @param i - * @param isDirect - * @return Bitmap representation of set of (direct or indirect) types of i - */ - protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i, boolean isDirect) { - return ontoEWAHStore.getTypes(getIndex(i), isDirect); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) instantiated classes - */ - public EWAHCompressedBitmap getTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getTypes(getIndividualIndex(id)); - } + return ontoEWAHStore.getSuperClasses(new HashSet<>(classesBM.getPositions())); + } + + public EWAHCompressedBitmap getSuperClassesBM(String cid) { + return ontoEWAHStore.getSuperClasses(getClassIndex(cid)); + } + + public EWAHCompressedBitmap getDirectSuperClassesBM(String cid) { + return ontoEWAHStore.getDirectSuperClasses(getClassIndex(cid)); + } + + public EWAHCompressedBitmap getSuperClassesBM(int classIndex) { + return ontoEWAHStore.getSuperClasses(classIndex); + } + + public EWAHCompressedBitmap getClassesBM(Set classIds) { + EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); + for (String id : classIds) { + bm.set(getClassIndex(id)); + } + return bm; + } + + public EWAHCompressedBitmap getDirectSuperClassesBM(int classIndex) { + return ontoEWAHStore.getDirectSuperClasses(classIndex); + } + + public EWAHCompressedBitmap getSubClasses(int classIndex) { + return ontoEWAHStore.getSubClasses(classIndex); + } + + public EWAHCompressedBitmap getDirectSubClassesBM(String cid) { + return ontoEWAHStore.getDirectSubClasses(getClassIndex(cid)); + } + + public EWAHCompressedBitmap getDirectSubClassesBM(int classIndex) { + return ontoEWAHStore.getDirectSubClasses(classIndex); + } + + /** + * @param clsIds + * @return union of all subClasses (direct and indirect) of any input class + */ + public EWAHCompressedBitmap getSubClassesBM(Set clsIds) { + Set classIndices = new HashSet<>(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getSubClasses(classIndices); + } + + /** + * @param clsIds + * @return union of all direct subClasses of all input classes + */ + public EWAHCompressedBitmap getDirectSubClassesBM(Set clsIds) { + Set classIndices = new HashSet<>(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getDirectSubClasses(classIndices); + } + + /** + * @param clsIds + * @return union of all superClasses (direct and indirect) of any input class + */ + public EWAHCompressedBitmap getSuperClassesBM(Set clsIds) { + Set classIndices = new HashSet<>(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getSuperClasses(classIndices); + } + + /** + * @param clsIds + * @return union of all direct superClasses of all input classes + */ + public EWAHCompressedBitmap getDirectSuperClassesBM(Set clsIds) { + Set classIndices = new HashSet(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getDirectSuperClasses(classIndices); + } + + /** + * @param i + * @return Bitmap representation of set of (direct or indirect) types of i + */ + protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i) { + return ontoEWAHStore.getTypes(getIndex(i)); + } + + /** + * @param i + * @return Bitmap representation of set of direct types of i + */ + protected EWAHCompressedBitmap getDirectTypesBM(OWLNamedIndividual i) { + return ontoEWAHStore.getDirectTypes(getIndex(i)); + } + + /** + * @param i + * @param classFilter + * @return Bitmap representation of the subset of direct types of i, which + * are descendants of classFilter + */ + protected EWAHCompressedBitmap getFilteredDirectTypesBM(OWLNamedIndividual i, OWLClass c) { + return ontoEWAHStore.getDirectTypes(getIndex(i), this.getIndex(c)); + } + + /** + * @param i + * @param isDirect + * @return Bitmap representation of set of (direct or indirect) types of i + */ + protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i, boolean isDirect) { + return ontoEWAHStore.getTypes(getIndex(i), isDirect); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) instantiated classes + */ + public EWAHCompressedBitmap getTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getTypes(getIndividualIndex(id)); + } + + /** + * @param individualIndex + * @return bitmap representation of all (direct and indirect) instantiated classes + */ + public EWAHCompressedBitmap getTypesBM(int individualIndex) { + return ontoEWAHStore.getTypes(individualIndex); + } - /** - * @param individualIndex - * @return bitmap representation of all (direct and indirect) instantiated classes - */ - public EWAHCompressedBitmap getTypesBM(int individualIndex) { - return ontoEWAHStore.getTypes(individualIndex); - } - /* (non-Javadoc) * @see org.monarchinitiative.owlsim.kb.BMKnowledgeBase#getDirectWeightedTypes(java.lang.String) */ @@ -1011,186 +973,175 @@ public EWAHCompressedBitmap getTypesBM(int individualIndex) { } - /** - * @param id - * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated - */ - public EWAHCompressedBitmap getNegatedTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getNegatedTypes(getIndividualIndex(id)); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated - */ - public EWAHCompressedBitmap getDirectNegatedTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getDirectNegatedTypes(getIndividualIndex(id)); - } - - - /** - * @param id - * @return bitmap representation of all (direct and indirect) instantiated classes - */ - public EWAHCompressedBitmap getDirectTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getDirectTypes(getIndividualIndex(id)); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) instantiated classes that are - * subclasses of classId - */ - public EWAHCompressedBitmap getFilteredDirectTypesBM(String id, String classId) { - Preconditions.checkNotNull(id); - Preconditions.checkNotNull(classId); - return ontoEWAHStore.getDirectTypes(getIndividualIndex(id), getClassIndex(classId)); - } - - - - private OWLClass getOWLThing() { - return getOWLDataFactory().getOWLThing(); - } - - private OWLClass getOWLNothing() { - return getOWLDataFactory().getOWLNothing(); - } - - private OWLDataFactory getOWLDataFactory() { - return owlOntology.getOWLOntologyManager().getOWLDataFactory(); - } - - - /** - * @param obj - * @return CURIE-style identifier - */ - protected String getIdentifier(OWLNamedObject obj) { - return obj.getIRI().toString(); - } - - /** - * @param id CURIE-style - * @return OWLAPI Class object - */ - protected OWLClass getOWLClass(String id) { - Preconditions.checkNotNull(id); - if (curieUtil.getCurieMap().isEmpty()) { - return getOWLClass(IRI.create(id)); - } else { - return getOWLClass(IRI.create(curieUtil.getIri(id).or(id))); - } - } - - /** - * @param iri - * @return OWLAPI Class object - */ - protected OWLClass getOWLClass(IRI iri) { - return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(iri); - } - - /** - * @param iri - * @return OWLAPI Class object - */ - protected OWLNamedIndividual getOWLNamedIndividual(IRI iri) { - return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLNamedIndividual(iri); - } - - /** - * @param id CURIE-style - * @return OWLAPI Class object - */ - public OWLNamedIndividual getOWLNamedIndividual(String id) { - Preconditions.checkNotNull(id); - if (curieUtil.getCurieMap().isEmpty()) { - return getOWLNamedIndividual(IRI.create(id)); - } else { - return getOWLNamedIndividual(IRI.create(curieUtil.getIri(id).or(id))); - } - } - - public Attribute getAttribute(String id) { - Preconditions.checkNotNull(id); - String label = labelMapper.getArbitraryLabel(id); - return new Attribute(id, label); - } - - public Entity getEntity(String id) { - Preconditions.checkNotNull(id); - String label = labelMapper.getArbitraryLabel(id); - return new Entity(id, label); - } - - public int[] getIndividualCountPerClassArray() { - return individualCountPerClassArray; - } - - - - @Override - public Map> getPropertyValueMap(String individualId) { - return propertyValueMapMap.get(individualId); - } - - @Override - public Set getPropertyValues(String individualId, String property) { - Map> m = getPropertyValueMap(individualId); - if (m.containsKey(property)) - return new HashSet(m.get(property)); - else - return Collections.emptySet(); - } - - public EWAHCompressedBitmap[] getStoredDirectSubClassIndex() { - return ontoEWAHStore.getStoredDirectSubClasses(); - } - - @Override - public int getRootIndex() { - return getIndex(getOWLThing()); - } - - - - @Override - public String getIndividualId(int index) { - Node n = getIndividualNode(index); - OWLNamedIndividual ind = n.getRepresentativeElement(); - return getShortForm(ind.getIRI()); - } - - - - @Override - public EWAHCompressedBitmap getFilteredTypesBM(Set ids, String classId) { - - Set classBits = new HashSet(); - for (String id : ids) { - classBits.add(this.getClassIndex(id)); - } - - return ontoEWAHStore.getTypes(classBits, getClassIndex(classId)); - - } - - - public EWAHCompressedBitmap getFilteredDirectTypesBM(Set classIds, String classId) { - - Set classBits = new HashSet(); - for (String id : classIds) { - classBits.add(this.getClassIndex(id)); - } - - return ontoEWAHStore.getDirectTypes(classBits, getClassIndex(classId)); - - } - - + /** + * @param id + * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated + */ + public EWAHCompressedBitmap getNegatedTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getNegatedTypes(getIndividualIndex(id)); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated + */ + public EWAHCompressedBitmap getDirectNegatedTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getDirectNegatedTypes(getIndividualIndex(id)); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) instantiated classes + */ + public EWAHCompressedBitmap getDirectTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getDirectTypes(getIndividualIndex(id)); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) instantiated classes that are + * subclasses of classId + */ + public EWAHCompressedBitmap getFilteredDirectTypesBM(String id, String classId) { + Preconditions.checkNotNull(id); + Preconditions.checkNotNull(classId); + return ontoEWAHStore.getDirectTypes(getIndividualIndex(id), getClassIndex(classId)); + } + + private OWLClass getOWLThing() { + return getOWLDataFactory().getOWLThing(); + } + + private OWLClass getOWLNothing() { + return getOWLDataFactory().getOWLNothing(); + } + + private OWLDataFactory getOWLDataFactory() { + return owlOntology.getOWLOntologyManager().getOWLDataFactory(); + } + + /** + * @param obj + * @return CURIE-style identifier + */ + protected String getIdentifier(OWLNamedObject obj) { + return obj.getIRI().toString(); + } + + /** + * @param id CURIE-style + * @return OWLAPI Class object + */ + private OWLClass getOWLClass(String id) { + Preconditions.checkNotNull(id); + if (curieUtil.getCurieMap().isEmpty()) { + return getOWLClass(IRI.create(id)); + } else { + return getOWLClass(IRI.create(curieUtil.getIri(id).orElse(id))); + } + } + + /** + * @param iri + * @return OWLAPI Class object + */ + private OWLClass getOWLClass(IRI iri) { + return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(iri); + } + + /** + * @param iri + * @return OWLAPI Class object + */ + private OWLNamedIndividual getOWLNamedIndividual(IRI iri) { + return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLNamedIndividual(iri); + } + + /** + * @param id + * CURIE-style + * @return OWLAPI Class object + */ + public OWLNamedIndividual getOWLNamedIndividual(String id) { + Preconditions.checkNotNull(id); + //TODO: check - this is redundant code simply return getOWLNamedIndividual(IRI.create(curieUtil.getIri(id).orElse(id))); will suffice + if (curieUtil.getCurieMap().isEmpty()) { + return getOWLNamedIndividual(IRI.create(id)); + } else { + return getOWLNamedIndividual(IRI.create(curieUtil.getIri(id).orElse(id))); + } + } + + public Attribute getAttribute(String id) { + Preconditions.checkNotNull(id); + String label = labelMapper.getArbitraryLabel(id); + return new Attribute(id, label); + } + + public Entity getEntity(String id) { + Preconditions.checkNotNull(id); + String label = labelMapper.getArbitraryLabel(id); + return new Entity(id, label); + } + + public int[] getIndividualCountPerClassArray() { + return individualCountPerClassArray; + } + + @Override + public Map> getPropertyValueMap(String individualId) { + return propertyValueMapMap.get(individualId); + } + + @Override + public Set getPropertyValues(String individualId, String property) { + Map> m = getPropertyValueMap(individualId); + if (m.containsKey(property)) + return new HashSet<>(m.get(property)); + else + return Collections.emptySet(); + } + + public EWAHCompressedBitmap[] getStoredDirectSubClassIndex() { + return ontoEWAHStore.getStoredDirectSubClasses(); + } + + @Override + public int getRootIndex() { + return getIndex(getOWLThing()); + } + + @Override + public String getIndividualId(int index) { + Node n = getIndividualNode(index); + OWLNamedIndividual ind = n.getRepresentativeElement(); + return getShortForm(ind.getIRI()); + } + + @Override + public EWAHCompressedBitmap getFilteredTypesBM(Set ids, String classId) { + + Set classBits = new HashSet<>(); + for (String id : ids) { + classBits.add(this.getClassIndex(id)); + } + + return ontoEWAHStore.getTypes(classBits, getClassIndex(classId)); + + } + + public EWAHCompressedBitmap getFilteredDirectTypesBM(Set classIds, String classId) { + + Set classBits = new HashSet<>(); + for (String id : classIds) { + classBits.add(this.getClassIndex(id)); + } + + return ontoEWAHStore.getDirectTypes(classBits, getClassIndex(classId)); + + } } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcherTest.java index cd8b00f..19ed604 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcherTest.java @@ -1,25 +1,20 @@ package org.monarchinitiative.owlsim.compute.classmatch; -import static org.junit.Assert.*; - -import java.net.URISyntaxException; -import java.net.URL; -import java.util.List; - import org.apache.log4j.Logger; import org.junit.Test; -import org.monarchinitiative.owlsim.compute.mica.AbstractMICAStoreTest; -import org.monarchinitiative.owlsim.compute.mica.MICAStore; -import org.monarchinitiative.owlsim.compute.mica.impl.MICAStoreImpl; import org.monarchinitiative.owlsim.compute.mica.impl.NoRootException; -import org.monarchinitiative.owlsim.compute.stats.KBStatsCalculator; -import org.monarchinitiative.owlsim.io.OWLLoader; +import org.monarchinitiative.owlsim.io.OwlKnowledgeBase; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.LabelMapper; -import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.model.OWLOntologyCreationException; -import com.google.monitoring.runtime.instrumentation.common.com.google.common.io.Resources; +import java.net.URISyntaxException; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; public class ClassMatcherTest { @@ -28,18 +23,26 @@ private Logger LOG = Logger.getLogger(ClassMatcherTest.class); protected void load(String fn, String... ontfns) throws OWLOntologyCreationException, URISyntaxException, NoRootException { - OWLLoader loader = new OWLLoader(); - LOG.info("Loading: "+fn); - loader.load(IRI.create(Resources.getResource(fn))); - for (String ontfn : ontfns) { - URL res = getClass().getResource(ontfn); - LOG.info("RES="+res); - loader.loadOntologies(res.getFile()); - } - kb = loader.createKnowledgeBaseInterface(); +// OWLLoader loader = new OWLLoader(); +// LOG.info("Loading: "+fn); +// loader.load(IRI.create(Resources.getResource(fn))); +// for (String ontfn : ontfns) { +// URL res = getClass().getResource(ontfn); +// LOG.info("RES="+res); +// loader.ontologies(res.getFile()); +// } +// kb = loader.createKnowledgeBaseInterface(); + kb = OwlKnowledgeBase.loader() + .loadOntology(filePath(fn)) + .loadOntologies(Arrays.stream(ontfns).map(ontfn -> filePath(ontfn)).collect(Collectors.toList())) + .createKnowledgeBase(); classMatcher = new ClassMatcher(kb); } - + + private String filePath(String filename) { + return Paths.get("src/test/resources/", filename).toString(); + } + @Test public void selfTest() throws OWLOntologyCreationException, URISyntaxException, NoRootException { load("mp-subset.ttl"); diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/kb/impl/AbstractOwlTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/kb/impl/AbstractOwlTest.java index c048cf8..ef9fe9b 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/kb/impl/AbstractOwlTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/kb/impl/AbstractOwlTest.java @@ -1,18 +1,16 @@ package org.monarchinitiative.owlsim.compute.kb.impl; -import java.net.URISyntaxException; -import java.util.Set; - +import com.googlecode.javaewah.EWAHCompressedBitmap; import org.apache.log4j.Logger; import org.junit.Assert; -import org.monarchinitiative.owlsim.io.OWLLoader; +import org.monarchinitiative.owlsim.io.OwlKnowledgeBase; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.NonUniqueLabelException; -import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.model.OWLOntologyCreationException; -import com.google.monitoring.runtime.instrumentation.common.com.google.common.io.Resources; -import com.googlecode.javaewah.EWAHCompressedBitmap; +import java.net.URISyntaxException; +import java.nio.file.Paths; +import java.util.Set; /** * Tests a OWLAPI implementation of a KB @@ -27,10 +25,12 @@ private Logger LOG = Logger.getLogger(AbstractOwlTest.class); protected BMKnowledgeBase kb; + protected void load(String fn) throws OWLOntologyCreationException, URISyntaxException { - OWLLoader loader = new OWLLoader(); - loader.load(IRI.create(Resources.getResource(fn))); - kb = loader.createKnowledgeBaseInterface(); +// OWLLoader loader = new OWLLoader(); +// loader.load(IRI.create(Resources.getResource(fn))); +// kb = loader.createKnowledgeBaseInterface(); + kb = OwlKnowledgeBase.loader().loadOntology(Paths.get("src/test/resources", fn).toString()).createKnowledgeBase(); } protected void checkContains(EWAHCompressedBitmap bm, diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/mp/AbstractProfileMatcherMPTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/mp/AbstractProfileMatcherMPTest.java index f75fde6..1a2ebab 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/mp/AbstractProfileMatcherMPTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/mp/AbstractProfileMatcherMPTest.java @@ -1,14 +1,12 @@ package org.monarchinitiative.owlsim.compute.matcher.mp; -import java.util.List; -import java.util.Set; - +import com.google.common.collect.Sets; import org.apache.log4j.Logger; import org.junit.Assert; import org.junit.Test; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.io.JSONWriter; -import org.monarchinitiative.owlsim.io.OWLLoader; +import org.monarchinitiative.owlsim.io.OwlKnowledgeBase; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.model.match.Match; import org.monarchinitiative.owlsim.model.match.MatchSet; @@ -16,7 +14,8 @@ import org.monarchinitiative.owlsim.model.match.impl.ProfileQueryImpl; import org.semanticweb.owlapi.model.OWLOntologyCreationException; -import com.google.common.collect.Sets; +import java.util.List; +import java.util.Set; /** * Tests a ProfileMatcher using the sample mp-subset.ttl ontology @@ -229,13 +228,11 @@ private void testQuery(Set queryClassLabels, } private void load(String fn) throws OWLOntologyCreationException { - //Injector injector = Guice.createInjector(new ConfigModule()); - OWLLoader loader = new OWLLoader(); - loader.load("src/test/resources/"+fn); - ontology = loader.createKnowledgeBaseInterface(); - //profileMatcher = - // injector.getInstance(ProfileMatcher.class); - //profileMatcher = new MaximumInformationContentSimilarityProfileMatcher(ontology); +// OWLLoader loader = new OWLLoader(); +// loader.load("src/test/resources/"+fn); +// ontology = loader.createKnowledgeBaseInterface(); + + ontology = OwlKnowledgeBase.loader().loadOntology("src/test/resources/" + fn).createKnowledgeBase(); profileMatcher = createProfileMatcher(ontology); } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/HomologyPhenoPerfIT.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/HomologyPhenoPerfIT.java index 2e39f74..e8e4270 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/HomologyPhenoPerfIT.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/HomologyPhenoPerfIT.java @@ -1,28 +1,21 @@ package org.monarchinitiative.owlsim.compute.matcher.perf; -import java.io.IOException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.junit.Test; import org.monarchinitiative.owlsim.compute.matcher.AbstractProfileMatcherTest; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; -import org.monarchinitiative.owlsim.compute.matcher.impl.BayesianNetworkProfileMatcher; -import org.monarchinitiative.owlsim.compute.matcher.impl.JaccardSimilarityProfileMatcher; -import org.monarchinitiative.owlsim.compute.matcher.impl.MaximumInformationContentSimilarityProfileMatcher; -import org.monarchinitiative.owlsim.compute.matcher.impl.NaiveBayesFixedWeightTwoStateProfileMatcher; -import org.monarchinitiative.owlsim.compute.matcher.impl.PhenodigmICProfileMatcher; +import org.monarchinitiative.owlsim.compute.matcher.impl.*; import org.monarchinitiative.owlsim.eval.ProfileMatchEvaluator; -import org.monarchinitiative.owlsim.io.OWLLoader; +import org.monarchinitiative.owlsim.io.OwlKnowledgeBase; import org.monarchinitiative.owlsim.io.ReadMappingsUtil; import org.monarchinitiative.owlsim.kb.filter.Filter; import org.monarchinitiative.owlsim.kb.filter.TypeFilter; import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import java.io.IOException; +import java.util.*; + /** * Tests phenotype matcher by finding rank of each homolog when using that matcher * @@ -126,12 +119,19 @@ public void estimateAccuracy() throws Exception { } private void load() throws OWLOntologyCreationException, IOException { - OWLLoader loader = new OWLLoader(); - loader.loadGzippdOntology(getClass().getResource("/ontologies/mammal.obo.gz").getFile()); - loader.loadDataFromTsvGzip(getClass().getResource("/data/gene2taxon.tsv.gz").getFile()); - loader.loadDataFromTsvGzip(getClass().getResource("/data/mouse-pheno.assocs.gz").getFile()); - loader.loadDataFromTsvGzip(getClass().getResource("/data/human-pheno.assocs.gz").getFile()); - kb = loader.createKnowledgeBaseInterface(); + Map curies = new LinkedHashMap<>(); + curies.put("HP", "http://purl.obolibrary.org/obo/HP_"); + curies.put("MP", "http://purl.obolibrary.org/obo/MP_"); + curies.put("NCBITaxon", "http://purl.obolibrary.org/obo/NCBITaxon_"); + + kb = OwlKnowledgeBase.loader() + .loadCuries(curies) + .loadOntology("src/test/resources/ontologies/mammal.obo.gz") + .loadDataFromTsv( + "src/test/resources/data/gene2taxon.tsv.gz", + "src/test/resources/data/mouse-pheno.assocs.gz", + "src/test/resources/data/human-pheno.assocs.gz") + .createKnowledgeBase(); } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/io/OntologySourceDataTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/io/OntologySourceDataTest.java new file mode 100644 index 0000000..c01b275 --- /dev/null +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/io/OntologySourceDataTest.java @@ -0,0 +1,42 @@ +package org.monarchinitiative.owlsim.io; + +import org.junit.Test; + +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * @author Jules Jacobsen + */ +public class OntologySourceDataTest { + + @Test + public void testItAll() { + + Map curies = new LinkedHashMap<>(); + curies.put("HP", "http://purl.obolibrary.org/obo/HP_"); + curies.put("MP", "http://purl.obolibrary.org/obo/MP_"); + curies.put("NCBITaxon", "http://purl.obolibrary.org/obo/NCBITaxon_"); + + OntologySourceData sourceData = OntologySourceData.builder() + .curies(curies) + .ontology("src/test/resources/ontologies/mammal.obo.gz") + .dataTsv("src/test/resources/data/gene2taxon.tsv.gz") + .dataTsv("src/test/resources/data/mouse-pheno.assocs.gz") + .dataTsv("src/test/resources/data/human-pheno.assocs.gz") + .build(); + + System.out.println(sourceData); + } + + @Test(expected = Exception.class) + public void testThrowsExceptionWhenCuriesEmptyAndDataIncludedFromTsv() { + + OntologySourceData sourceData = OntologySourceData.builder() + .dataTsv("src/test/resources/data/gene2taxon.tsv.gz") + .build(); + + System.out.println(sourceData); + } + +} \ No newline at end of file diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/io/OwlKnowledgeBaseTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/io/OwlKnowledgeBaseTest.java new file mode 100644 index 0000000..57fa1ec --- /dev/null +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/io/OwlKnowledgeBaseTest.java @@ -0,0 +1,169 @@ +package org.monarchinitiative.owlsim.io; + +import org.junit.Test; +import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; +import uk.ac.manchester.cs.jfact.JFactFactory; + +import java.io.File; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +/** + * Utility class for creating a {@link BMKnowledgeBase} from input ontologies, curies and data. Ontologies can be in OWL + * or OBO format, gzipped or uncompressed. + * @author Jules Jacobsen + */ +public class OwlKnowledgeBaseTest { + + private static final String SPECIES_OWL = "src/test/resources/species.owl"; + + private Map curies() { + Map curies = new LinkedHashMap<>(); + curies.put("HP", "http://purl.obolibrary.org/obo/HP_"); + curies.put("MP", "http://purl.obolibrary.org/obo/MP_"); + curies.put("NCBITaxon", "http://purl.obolibrary.org/obo/NCBITaxon_"); + return curies; + } + + @Test + public void testLoadOwlFromFile() { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadOntology(new File(SPECIES_OWL)) + .createKnowledgeBase(); + } + + @Test + public void testLoadOwlWithIndividualsFromFilePath() { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadOntology(SPECIES_OWL) + .createKnowledgeBase(); + assertEquals(24, bmKnowledgeBase.getIndividualIdsInSignature().size()); + //why -1? This is because http://www.w3.org/2002/07/owl#Thing is also reported as class. + assertEquals(77, bmKnowledgeBase.getClassIdsInSignature().size() - 1); + } + + @Test + public void testLoadGzippedOboOntology() { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadOntology("src/test/resources/ontologies/mammal.obo.gz") + .createKnowledgeBase(); + } + + @Test + public void testLoadOntologiesFromMultipleSources() { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadOntologies( + Arrays.asList("src/test/resources/species.owl", + "http://purl.obolibrary.org/obo/aeo.owl", + "src/test/resources/ontologies/mammal.obo.gz") + ) + .createKnowledgeBase(); + } + + /** + * Ignored so as not to use network - this is a slow test + */ +// @Ignore + @Test + public void testLoadRemoteOntology() { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadOntology("http://purl.obolibrary.org/obo/aeo.owl") + .createKnowledgeBase(); + } + + @Test(expected = OntologyLoadException.class) + public void testLoadGzippedDataFileNoOntology() { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadDataFromTsv("src/test/resources/data/human-pheno.assocs.gz") + .createKnowledgeBase(); + } + + @Test + public void loadDataFromOntology() { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadOntology("src/test/resources/species.owl") + .loadCuries(curies()) + .loadDataFromOntology("src/test/resources/species.owl") + .createKnowledgeBase(); + } + + @Test + public void loadDataFromOntologies() { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadOntology("src/test/resources/ontologies/mammal.obo.gz") + .loadCuries(curies()) + .loadDataFromOntologies("src/test/resources/mp-subset.ttl", "src/test/resources/mp-subset.ttl") + .createKnowledgeBase(); + } + + @Test + public void loadDataFromTsv() { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadCuries(curies()) + .loadOntology("src/test/resources/ontologies/mammal.obo.gz") + .loadDataFromTsv("src/test/resources/data/human-pheno.assocs.gz") + .createKnowledgeBase(); + } + + @Test + public void loadDataFromGzippedTsv() { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadCuries(curies()) + .loadOntology("src/test/resources/ontologies/mammal.obo.gz") + .loadDataFromTsv("src/test/resources/data/human-pheno.assocs.gz") + .createKnowledgeBase(); + } + + @Test + public void loadDataFromTsvCollection() { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadCuries(curies()) + .loadOntology("src/test/resources/ontologies/mammal.obo.gz") + .loadDataFromTsv(Arrays.asList( + "src/test/resources/data/gene2taxon.tsv.gz", + "src/test/resources/data/mouse-pheno.assocs.gz", + "src/test/resources/data/human-pheno.assocs.gz")) + .createKnowledgeBase(); + + //|classes|=38627 + //|individuals|=14200 + //What should this be? It's different from the OWLLoader version. + //Turns out that without the correct curies the classes are not properly resolved so there will be 53630 classes + // without any curies. About 40,000-odd without the MP curie and 38629 without the NCBITaxon curie (1 mouse, 1 + // human class). + //So remember folks, curies are good, especially with poppadums and beer. + + //lastly, why -1? This is because http://www.w3.org/2002/07/owl#Thing is also reported as class. + assertEquals(38627, bmKnowledgeBase.getClassIdsInSignature().size() - 1); + assertEquals(14200, bmKnowledgeBase.getIndividualIdsInSignature().size()); + } + + @Test + public void testLoadOwlFromFileLocationWithStandardOntologyManager() throws Exception { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .useStandardOntologyManager() + .loadOntology(SPECIES_OWL) + .createKnowledgeBase(); + } + + @Test + public void canSpecifyConcurrentOntologyManager() { + OwlKnowledgeBase.loader().useConcurrentOntologyManager(); + } + + @Test + public void canSpecifyStandardOntologyManager() { + OwlKnowledgeBase.loader().useStandardOntologyManager(); + } + + @Test + public void testUseOtherOwlReasonerFactory() throws Exception { + BMKnowledgeBase bmKnowledgeBase = OwlKnowledgeBase.loader() + .useReasonerFactory(new JFactFactory()) + .loadOntology(SPECIES_OWL) + .createKnowledgeBase(); + } +} \ No newline at end of file diff --git a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java index 9e8da2f..47f3bfd 100644 --- a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java @@ -15,37 +15,34 @@ */ package org.monarchinitiative.owlsim.services; -import java.util.EnumSet; -import java.util.Set; - -import javax.servlet.DispatcherType; -import javax.servlet.FilterRegistration; - -import org.apache.log4j.Logger; -import org.eclipse.jetty.servlets.CrossOriginFilter; -import org.monarchinitiative.owlsim.services.configuration.ApplicationConfiguration; -import org.monarchinitiative.owlsim.services.modules.EnrichmentMapModule; -import org.monarchinitiative.owlsim.services.modules.KnowledgeBaseModule; -import org.monarchinitiative.owlsim.services.modules.MatcherMapModule; -import org.semanticweb.owlapi.OWLAPIParsersModule; -import org.semanticweb.owlapi.OWLAPIServiceLoaderModule; - import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.databind.SerializationFeature; import com.google.common.reflect.ClassPath; import com.google.common.reflect.ClassPath.ClassInfo; import com.google.inject.Guice; import com.google.inject.Injector; - import io.dropwizard.Application; import io.dropwizard.assets.AssetsBundle; import io.dropwizard.setup.Bootstrap; import io.dropwizard.setup.Environment; import io.swagger.jaxrs.config.BeanConfig; import io.swagger.jaxrs.listing.ApiListingResource; +import org.apache.log4j.Logger; +import org.eclipse.jetty.servlets.CrossOriginFilter; +import org.monarchinitiative.owlsim.services.configuration.ApplicationConfiguration; +import org.monarchinitiative.owlsim.services.modules.EnrichmentMapModule; +import org.monarchinitiative.owlsim.services.modules.KnowledgeBaseModule; +import org.monarchinitiative.owlsim.services.modules.MatcherMapModule; +import org.semanticweb.owlapi.OWLAPIParsersModule; +import org.semanticweb.owlapi.OWLAPIServiceLoaderModule; import uk.ac.manchester.cs.owl.owlapi.OWLAPIImplModule; import uk.ac.manchester.cs.owl.owlapi.concurrent.Concurrency; +import javax.servlet.DispatcherType; +import javax.servlet.FilterRegistration; +import java.util.EnumSet; +import java.util.Set; + public class OwlSimServiceApplication extends Application { private Logger LOG = Logger.getLogger(OwlSimServiceApplication.class); @@ -146,7 +143,7 @@ public void run(ApplicationConfiguration configuration, Environment environment) new KnowledgeBaseModule(configuration.getOntologyUris(), configuration.getOntologyDataUris(), configuration.getDataTsvs(), configuration.getCuries()), new EnrichmentMapModule(), new MatcherMapModule()); - LOG.info("BINDINGS =" + i.getAllBindings()); + //removed binding info as this caused things to explode. Wasn't helpful. // Add resources Set resourceClasses = ClassPath.from(getClass().getClassLoader()) .getTopLevelClasses("org.monarchinitiative.owlsim.services.resources"); diff --git a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java index e6b76c7..8a19ef4 100644 --- a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java @@ -1,135 +1,60 @@ package org.monarchinitiative.owlsim.services.modules; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.Collection; -import java.util.Map; -import java.util.Set; -import java.util.zip.GZIPInputStream; - -import javax.inject.Singleton; - -import org.apache.commons.validator.routines.UrlValidator; +import com.google.inject.AbstractModule; +import com.google.inject.Provides; +import com.google.inject.Singleton; +import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.classmatch.ClassMatcher; import org.monarchinitiative.owlsim.compute.enrich.impl.HypergeometricEnrichmentEngine; import org.monarchinitiative.owlsim.compute.matcher.impl.BayesianNetworkProfileMatcher; import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator; import org.monarchinitiative.owlsim.compute.mica.impl.MostInformativeCommonAncestorCalculatorImpl; +import org.monarchinitiative.owlsim.io.OwlKnowledgeBase; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; -import org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl; -import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesDataTsvs; -import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesOwlDataOntologies; -import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesOwlOntologies; -import org.prefixcommons.CurieUtil; -import org.semanticweb.elk.owlapi.ElkReasonerFactory; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyCreationException; -import org.semanticweb.owlapi.model.OWLOntologyManager; -import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; -import com.google.common.collect.ImmutableCollection; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.inject.AbstractModule; -import com.google.inject.Provides; +import java.util.Collection; +import java.util.Map; +import java.util.Set; -/** - * TODO - rewrite this - * - * Reduce duplication of code with OWLLoader - * - */ public class KnowledgeBaseModule extends AbstractModule { - private final ImmutableCollection ontologyUris; - private final ImmutableCollection ontologyDataUris; - private final ImmutableCollection dataTsvs; - private final ImmutableMap curies; - private final UrlValidator urlValdiator = UrlValidator.getInstance(); + Logger logger = Logger.getLogger(KnowledgeBaseModule.class); - public KnowledgeBaseModule(Collection ontologyUris, Collection ontologyDataUris, - Set dataTsvs, Map curies) { - this.ontologyUris = new ImmutableSet.Builder().addAll(ontologyUris).build(); - this.ontologyDataUris = new ImmutableSet.Builder().addAll(ontologyDataUris).build(); - this.dataTsvs = new ImmutableSet.Builder().addAll(dataTsvs).build(); - this.curies = new ImmutableMap.Builder().putAll(curies).build(); - } + private final BMKnowledgeBase bmKnowledgeBase; - @Override - protected void configure() { - bind(BMKnowledgeBase.class).to(BMKnowledgeBaseOWLAPIImpl.class).in(Singleton.class); - bind(OWLReasonerFactory.class).to(ElkReasonerFactory.class); - bind(CurieUtil.class).toInstance(new CurieUtil(curies)); - // bind(OWLOntologyManager.class).to(OWLOntologyManagerImpl.class); - // bind(ReadWriteLock.class).to(NoOpReadWriteLock.class); - // bind(OWLDataFactory.class).to(OWLDataFactoryImpl.class); - // bind(OWLOntologyManager.class).toInstance(OWLManager.createOWLOntologyManager()); - } + public KnowledgeBaseModule(Collection ontologyUris, Collection ontologyDataUris, Set dataTsvs, Map curies) { - @Provides - BMKnowledgeBaseOWLAPIImpl provideBMKnowledgeBaseOWLAPIImpl(@IndicatesOwlOntologies OWLOntology owlOntology, - @IndicatesOwlDataOntologies OWLOntology owlDataOntology, OWLReasonerFactory rf, CurieUtil curieUtil) { - BMKnowledgeBaseOWLAPIImpl bMKnowledgeBaseOWLAPIImpl = new BMKnowledgeBaseOWLAPIImpl(owlOntology, - owlDataOntology, rf, curieUtil); - return bMKnowledgeBaseOWLAPIImpl; - } + logger.info("Loading ontologyUris:"); + ontologyUris.forEach(logger::info); + logger.info("Loading ontologyDataUris:"); + ontologyDataUris.forEach(logger::info); + logger.info("Loading dataTsvs:"); + dataTsvs.forEach(logger::info); + logger.info("Loading curies:"); + curies.entrySet().forEach(logger::info); - OWLOntology loadOntology(OWLOntologyManager manager, String uri) throws OWLOntologyCreationException { - if (urlValdiator.isValid(uri)) { - return manager.loadOntology(IRI.create(uri)); - } else { - File file = new File(uri); - return manager.loadOntologyFromOntologyDocument(file); - } - } + //The OwlKnowledgeBase.Loader uses the ELKReasonerFactory and Concurrency.CONCURRENT as defaults. + this.bmKnowledgeBase = OwlKnowledgeBase.loader() + .loadOntologies(ontologyUris) + .loadDataFromOntologies(ontologyDataUris) + .loadDataFromTsv(dataTsvs) + .loadCuries(curies) + .createKnowledgeBase(); - OWLOntology mergeOntologies(OWLOntologyManager manager, Collection uris) - throws OWLOntologyCreationException, FileNotFoundException, IOException { - OWLOntology ontology = manager.createOntology(); - for (String uri : uris) { - OWLOntology loadedOntology; - if (uri.endsWith(".gz")) { - GZIPInputStream gis = new GZIPInputStream(new FileInputStream(uri)); - BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); - loadedOntology = manager.loadOntologyFromOntologyDocument(gis); - } else { - loadedOntology = loadOntology(manager, uri); - } - manager.addAxioms(ontology, loadedOntology.getAxioms()); - } - return ontology; + logger.info("Created BMKnowledgebase"); } - @Provides - @IndicatesOwlOntologies - @Singleton - OWLOntology getOwlOntologies(OWLOntologyManager manager) - throws OWLOntologyCreationException, FileNotFoundException, IOException { - return mergeOntologies(manager, ontologyUris); + @Override + protected void configure() { } @Provides - @IndicatesOwlDataOntologies @Singleton - OWLOntology getOwlDataOntologies(OWLOntologyManager manager) - throws OWLOntologyCreationException, FileNotFoundException, IOException { - return mergeOntologies(manager, ontologyDataUris); + BMKnowledgeBase provideBMKnowledgeBaseOWLAPIImpl() { + return bmKnowledgeBase; } @Provides - @IndicatesDataTsvs - @Singleton - OWLOntology getDataTsvs(OWLOntologyManager manager) - throws OWLOntologyCreationException, FileNotFoundException, IOException { - return mergeOntologies(manager, dataTsvs); - } - - @Provides MostInformativeCommonAncestorCalculator getMostInformativeCommonAncestorCalculator(BMKnowledgeBase knowledgeBase) { return new MostInformativeCommonAncestorCalculatorImpl(knowledgeBase); } diff --git a/pom.xml b/pom.xml index 0517d25..6c0128f 100644 --- a/pom.xml +++ b/pom.xml @@ -127,7 +127,7 @@ com.google.guava guava - 18.0 + 21.0 org.mockito