From 58e533b595596aa68ea51d3d4460a5f8f54ba471 Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Thu, 2 Mar 2017 00:19:43 -0500 Subject: [PATCH 1/7] first pass at frequency-awareness, issue #56 --- ...aiveBayesFixedWeightTwoStateProfileMatcher.java | 223 +++++++++++++--- .../owlsim/kb/BMKnowledgeBase.java | 37 ++- .../owlsim/kb/ewah/EWAHKnowledgeBaseStore.java | 3 +- .../owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java | 67 ++++- .../matcher/AbstractProfileMatcherTest.java | 23 +- ...BayesFixedWeightTwoStateProfileMatcherTest.java | 53 ++++ .../src/test/resources/simple-pheno-with-freqs.owl | 282 +++++++++++++++++++++ 7 files changed, 628 insertions(+), 60 deletions(-) create mode 100644 owlsim-core/src/test/resources/simple-pheno-with-freqs.owl diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java index 9e5d464..31194ed 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java @@ -1,7 +1,10 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; +import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import javax.inject.Inject; @@ -36,18 +39,44 @@ public class NaiveBayesFixedWeightTwoStateProfileMatcher extends AbstractProfileMatcher implements ProfileMatcher { private Logger LOG = Logger.getLogger(NaiveBayesFixedWeightTwoStateProfileMatcher.class); + + // set this to more than 1 for frequency-aware; + // a value of 0 defaults to frequency-unaware + private int kLeastFrequent = 0; + @Deprecated private double defaultFalsePositiveRate = 0.002; // alpha @Deprecated private double defaultFalseNegativeRate = 0.10; // beta + + /** + * A tuple of (weight, Classes) + * + */ + private class WeightedTypesBM { + // bitmap representing a set of classes assumed to be on + final EWAHCompressedBitmap typesBM; + + // probability of the state in which all such classes are on + final double weight; + + public WeightedTypesBM(EWAHCompressedBitmap typesBM, Double weight) { + super(); + this.typesBM = typesBM; + this.weight = weight; + } + } // TODO - replace when tetsing is over //private double[] defaultFalsePositiveRateArr = new double[]{0.002}; //private double[] defaultFalseNegativeRateArr = new double[] {0.10}; private double[] defaultFalsePositiveRateArr = new double[]{1e-10,0.0005,0.001,0.005,0.01}; private double[] defaultFalseNegativeRateArr = new double[] {1e-10,0.005,0.01,0.05,0.1,0.2,0.4,0.8,0.9}; + + // for maps a pair of (Individual, InterpretationIndex) to a set of inferred (self, direct, indirect) types + private Map> individualToInterpretationToTypesBM = new HashMap<>(); @Inject protected NaiveBayesFixedWeightTwoStateProfileMatcher(BMKnowledgeBase kb) { @@ -70,8 +99,31 @@ public boolean isUseBlanket() { public String getShortName() { return "naive-bayes-fixed-weight-two-state"; } + + /** + * @return the kLeastFrequent + */ + public int getkLeastFrequent() { + return kLeastFrequent; + } + + /** + * The default for this should be 0. When 0, the behavior is as for frequency unaware + * (i.e. every instance-class association with frequency info will be treated as normal instance-class) + * + * When k>1, will make use of the k least frequent annotations in probabilistic calculation + * + * @param kLeastFrequent the kLeastFrequent to set + */ + public void setkLeastFrequent(int kLeastFrequent) { + // reset cache + individualToInterpretationToTypesBM = new HashMap<>(); + this.kLeastFrequent = kLeastFrequent; + } + + /** * Extends the query profile - for every node c, all the direct parents of c are in * the query profile, then add c to the query profile. * @@ -132,50 +184,82 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { double pvector[] = new double[indIds.size()]; String indArr[] = new String[indIds.size()]; int n=0; + + for (String itemId : indIds) { - EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - // any node which has an off query parent is discounted - targetProfileBM = targetProfileBM.and(queryBlanketProfileBM); - LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); - - - // two state model. - // mapping to Bauer et al: these correspond to mxy1, x=Q, y=H/T - int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); - int numInQueryAndNOTInTarget = queryProfileBM.andNotCardinality(targetProfileBM); - int numNOTInQueryAndInTarget = targetProfileBM.andNotCardinality(queryProfileBM); - int numNOTInQueryAndNOTInTarget = - numClassesConsidered - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); - - double p = 0.0; - // TODO: optimize this - // integrate over a Dirichlet prior for alpha & beta, rather than gridsearch - // this can be done closed-form - for (double fnr : defaultFalseNegativeRateArr) { - for (double fpr : defaultFalsePositiveRateArr) { - - double pQ1T1 = Math.pow(1-fnr, numInQueryAndInTarget); - double pQ0T1 = Math.pow(fnr, numNOTInQueryAndInTarget); - double pQ1T0 = Math.pow(fpr, numInQueryAndNOTInTarget); - double pQ0T0 = Math.pow(1-fpr, numNOTInQueryAndNOTInTarget); - - - - //LOG.debug("pQ1T1 = "+(1-fnr)+" ^ "+ numInQueryAndInTarget+" = "+pQ1T1); - //LOG.debug("pQ0T1 = "+(fnr)+" ^ "+ numNOTInQueryAndInTarget+" = "+pQ0T1); - //LOG.debug("pQ1T0 = "+(fpr)+" ^ "+ numInQueryAndNOTInTarget+" = "+pQ1T0); - //LOG.debug("pQ0T0 = "+(1-fpr)+" ^ "+ numNOTInQueryAndNOTInTarget+" = "+pQ0T0); - //TODO: optimization. We can precalculate the logs for different integers - p += - Math.exp(Math.log(pQ1T1) + Math.log(pQ0T1) + Math.log(pQ1T0) + Math.log(pQ0T0)); - - } - } - pvector[n] = p; - indArr[n] = itemId; - sumOfProbs += p; + + int effectiveK = kLeastFrequent; + int twoToTheK = (int) Math.pow(2, kLeastFrequent); + int numWeightedTypes = knowledgeBase.getDirectWeightedTypes(itemId).size(); + if (numWeightedTypes < kLeastFrequent) { + twoToTheK = (int) Math.pow(2, numWeightedTypes); + effectiveK = numWeightedTypes; + } + + double cumulativePr = 0; + for (int comboIndex = 0; comboIndex < twoToTheK; comboIndex++) { + + Double comboPr = null; + EWAHCompressedBitmap targetProfileBM; + if (kLeastFrequent == 0) { + targetProfileBM = knowledgeBase.getTypesBM(itemId); + } + else { + WeightedTypesBM wtbm = getTypesFrequencyAware(itemId, comboIndex, effectiveK); + comboPr = wtbm.weight; + targetProfileBM = wtbm.typesBM; + } + + // any node which has an off query parent is discounted + targetProfileBM = targetProfileBM.and(queryBlanketProfileBM); + LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); + + + // two state model. + // mapping to Bauer et al: these correspond to mxy1, x=Q, y=H/T + int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); + int numInQueryAndNOTInTarget = queryProfileBM.andNotCardinality(targetProfileBM); + int numNOTInQueryAndInTarget = targetProfileBM.andNotCardinality(queryProfileBM); + int numNOTInQueryAndNOTInTarget = + numClassesConsidered - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); + + double p = 0.0; + // TODO: optimize this + // integrate over a Dirichlet prior for alpha & beta, rather than gridsearch + // this can be done closed-form + for (double fnr : defaultFalseNegativeRateArr) { + for (double fpr : defaultFalsePositiveRateArr) { + + double pQ1T1 = Math.pow(1-fnr, numInQueryAndInTarget); + double pQ0T1 = Math.pow(fnr, numNOTInQueryAndInTarget); + double pQ1T0 = Math.pow(fpr, numInQueryAndNOTInTarget); + double pQ0T0 = Math.pow(1-fpr, numNOTInQueryAndNOTInTarget); + + + + //LOG.debug("pQ1T1 = "+(1-fnr)+" ^ "+ numInQueryAndInTarget+" = "+pQ1T1); + //LOG.debug("pQ0T1 = "+(fnr)+" ^ "+ numNOTInQueryAndInTarget+" = "+pQ0T1); + //LOG.debug("pQ1T0 = "+(fpr)+" ^ "+ numInQueryAndNOTInTarget+" = "+pQ1T0); + //LOG.debug("pQ0T0 = "+(1-fpr)+" ^ "+ numNOTInQueryAndNOTInTarget+" = "+pQ0T0); + //TODO: optimization. We can precalculate the logs for different integers + p += + Math.exp(Math.log(pQ1T1) + Math.log(pQ0T1) + Math.log(pQ1T0) + Math.log(pQ0T0)); + + } + } + + if (comboPr != null) { + p *= comboPr; + } + cumulativePr += p; + } + pvector[n] = cumulativePr; + indArr[n] = itemId; + + sumOfProbs += cumulativePr; n++; - LOG.debug("p for "+itemId+" = "+p); + LOG.debug("p for "+itemId+" = "+cumulativePr); + } for (n = 0; n()); + } + Map m = individualToInterpretationToTypesBM.get(iix); + if (m.containsKey(n)) { + // use cached value + return m.get(n); + } + + // default direct type map. + // note that associations with frequency annotations are includes here alongside + // normal associations + EWAHCompressedBitmap dtmap = knowledgeBase.getDirectTypesBM(itemId); + + // associations with frequency info + // map is from ClassIndex -> Weight + Map wmap = knowledgeBase.getDirectWeightedTypes(itemId); + + // sort with least frequent first + List sortedTypeIndices = new ArrayList<>(wmap.keySet()); + sortedTypeIndices.sort( (Integer i, Integer j) -> wmap.get(i) - wmap.get(j)); + + EWAHCompressedBitmap mask = new EWAHCompressedBitmap(); + double pr = 1.0; + for (int i=0; i< effectiveK; i++) { + Integer iClassIx = sortedTypeIndices.get(i); + Double w = wmap.get(iClassIx) / 100.0; + //LOG.info("Class "+iClassIx +" which is "+i+"-least frequent has weight "+w+" for individual "+itemId+" in combo "+n); + if ( (n >> i) % 2 == 0) { + mask.set(iClassIx); + pr *= 1-w; + } + else { + pr *= w; + } + } + //LOG.info("Instance "+itemId+" in combo "+n+" has Pr = "+pr); + + EWAHCompressedBitmap dtmapMasked = dtmap.xor(mask); + EWAHCompressedBitmap inferredTypesBM = knowledgeBase.getSuperClassesBM(dtmapMasked); + WeightedTypesBM wtbm = new WeightedTypesBM(inferredTypesBM, pr); + m.put(n, wtbm); + return wtbm; + } /** * @return probability a query class is a false positive diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java index 650f0bf..65bbfbd 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java @@ -144,12 +144,13 @@ public EWAHCompressedBitmap getDirectSubClassesBM(String classId); - /** - * @param classIds - * @return union of all superclasses (direct and indirect and equivalent) as a bitmap - */ - public EWAHCompressedBitmap getSubClassesBM(Set classIds); + /** + * @param classIds + * @return union of all superclasses (direct and indirect and equivalent) as a bitmap + */ + public EWAHCompressedBitmap getSubClassesBM(Set classIds); + /** * @param classIds * @return union of all direct subclasses as a bitmap @@ -179,7 +180,13 @@ * @return union of all superclasses as a bitmap */ public EWAHCompressedBitmap getSuperClassesBM(Set classIds); - + + /** + * @param classIds + * @return union of all superclasses (direct and indirect and equivalent) as a bitmap + */ + public EWAHCompressedBitmap getSuperClassesBM(EWAHCompressedBitmap classesBM); + /** * @param classIndex * @return superclasses (direct and indirect and equivalent) of classId as bitmap @@ -199,12 +206,18 @@ */ public EWAHCompressedBitmap getTypesBM(String id); - /** - * @param id - an individual - * @return direct types as bitmap - */ - public EWAHCompressedBitmap getDirectTypesBM(String id); - + /** + * @param id - an individual + * @return direct types as bitmap + */ + public EWAHCompressedBitmap getDirectTypesBM(String id); + + /** + * @param id - an individual + * @return map between Type class index and 0 getDirectWeightedTypes(String id); + /** * @param itemId * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java index 81c0a2c..2de3213 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java @@ -1,5 +1,6 @@ package org.monarchinitiative.owlsim.kb.ewah; +import java.util.Collection; import java.util.Set; import com.googlecode.javaewah.EWAHCompressedBitmap; @@ -63,7 +64,7 @@ public EWAHCompressedBitmap getSuperClasses(int clsIndex) { return storedSuperClasses[clsIndex]; } - public EWAHCompressedBitmap getClasses(Set clsIndices) { + public EWAHCompressedBitmap getClasses(Collection clsIndices) { EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); for (int i : clsIndices) { bm.set(i); diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java index 42e2ba8..b3b9fb4 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java @@ -26,7 +26,9 @@ import org.prefixcommons.CurieUtil; import org.semanticweb.owlapi.model.AxiomType; import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotation; import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; import org.semanticweb.owlapi.model.OWLAnnotationValue; import org.semanticweb.owlapi.model.OWLClass; import org.semanticweb.owlapi.model.OWLClassAssertionAxiom; @@ -106,6 +108,9 @@ private Map>> propertyValueMapMap; Map> opposingClassMap = new HashMap>(); + + Map> individualToWeightedDirectTypeMap = new HashMap<>(); + private int[] individualCountPerClassArray; @@ -511,6 +516,9 @@ private void storeInferences() { ontoEWAHStore.setDirectIndividuals(clsIndex, individualInts); } + + // populate frequency-awareness map + individualToWeightedDirectTypeMap = new HashMap<>(); for (OWLNamedIndividual i : individualsInSignature) { int individualIndex = getIndex(i); // LOG.info("String inferences for "+i+" --> " +individualIndex); @@ -518,8 +526,49 @@ private void storeInferences() { getIntegersForClassSet(owlReasoner.getTypes(i, true))); ontoEWAHStore.setTypes(individualIndex, getIntegersForClassSet(owlReasoner.getTypes(i, false))); + + // TODO - ensure robust for equivalent individuals + Map wmap = new HashMap<>(); + individualToWeightedDirectTypeMap.put(individualIndex, wmap); + for (OWLClassAssertionAxiom caax : owlOntology.getClassAssertionAxioms(i)) { + int cix; + + // only associations to named classes + if (caax.getClassExpression().isAnonymous()) { + continue; + } + cix = getIndex(caax.getClassExpression().asOWLClass()); + + // we use reification to store probability + for (OWLAnnotation ann : caax.getAnnotations()) { + OWLAnnotationProperty prop = ann.getProperty(); + OWLAnnotationValue v = ann.getValue(); + if (v instanceof OWLLiteral) { + OWLLiteral lv = v.asLiteral().get(); + Double pr = null; + if (lv.isDouble()) { + pr = lv.parseDouble(); + } + if (lv.isFloat()) { + pr = (double) lv.parseFloat(); + } + if (pr != null) { + // TODO : decide on a vocabulary + if (prop.getIRI().toString().contains("probability")) { + wmap.put(cix, (int) (pr * 100)); + } + } + if (lv.isInteger()) { + if (prop.getIRI().toString().contains("frequenct")) { + wmap.put(cix, lv.parseInteger()); + } + + } + } + } + } - // Treat CLassAssertion( ComplementOf(c) i) as a negative assertion + // Treat ClassAssertion( ComplementOf(c) i) as a negative assertion Set ncs = new HashSet(); Set ncsDirect = new HashSet(); for (OWLClassAssertionAxiom cx : owlOntology.getClassAssertionAxioms(i)) { @@ -813,6 +862,13 @@ protected EWAHCompressedBitmap getSuperClassesBMByOWLClassSet(Set clsS return ontoEWAHStore.getSuperClasses(classIndices); } + /* (non-Javadoc) + * @see org.monarchinitiative.owlsim.kb.BMKnowledgeBase#getSuperClassesBM(com.googlecode.javaewah.EWAHCompressedBitmap) + */ + public EWAHCompressedBitmap getSuperClassesBM(EWAHCompressedBitmap classesBM) { + return ontoEWAHStore.getSuperClasses(new HashSet<>(classesBM.getPositions())); + } + public EWAHCompressedBitmap getSuperClassesBM(String cid) { return ontoEWAHStore.getSuperClasses(getClassIndex(cid)); } @@ -950,6 +1006,15 @@ public EWAHCompressedBitmap getTypesBM(String id) { public EWAHCompressedBitmap getTypesBM(int individualIndex) { return ontoEWAHStore.getTypes(individualIndex); } + + /* (non-Javadoc) + * @see org.monarchinitiative.owlsim.kb.BMKnowledgeBase#getDirectWeightedTypes(java.lang.String) + */ + public Map getDirectWeightedTypes(String id) { + int iix = getIndividualIndex(id); + return individualToWeightedDirectTypeMap.get(iix); + } + /** * @param id diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java index 9318ede..041b20a 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java @@ -109,10 +109,13 @@ protected void load(String fn, String... ontfns) throws OWLOntologyCreationExcep kb = loader.createKnowledgeBaseInterface(); } - protected void loadSimplePhenoWithNegation() throws OWLOntologyCreationException { - load("simple-pheno-with-negation.owl"); - - } + protected void loadSimplePhenoWithNegation() throws OWLOntologyCreationException { + load("simple-pheno-with-negation.owl"); + } + + protected void loadSimplePhenoWithFrequency() throws OWLOntologyCreationException { + load("simple-pheno-with-freqs.owl"); + } @Deprecated protected void search(ProfileMatcher profileMatcher, @@ -188,5 +191,17 @@ protected boolean isRankedLast(String matchId, MatchSet matchSet) { LOG.info("Rank of match "+matchId+" is "+matchRank+" which is last or joint last"); return true; } + + protected boolean isRankedAt(String matchId, MatchSet matchSet, int expectedRank) { + int matchRank = 0; + for (Match m : matchSet.getMatches()) { + int rank = m.getRank(); + + if (m.getMatchId().equals(matchId)) { + return (rank == expectedRank); + } + } + return false; + } } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java index b83ab08..d40c7d3 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java @@ -80,6 +80,59 @@ public void testExamplePositiveOnly() throws Exception { } + @Test + public void testFrequencyWare() throws Exception { + loadSimplePhenoWithFrequency(); + //LOG.info("INDS="+kb.getIndividualIdsInSignature()); + ProfileMatcher profileMatcher = createProfileMatcher(kb); + ((NaiveBayesFixedWeightTwoStateProfileMatcher) profileMatcher).setkLeastFrequent(3); + + Assert.assertTrue(kb.getIndividualIdsInSignature().size() > 0); + + int nOk = 0; + for (String i : kb.getIndividualIdsInSignature()) { + + ProfileQuery pq = profileMatcher.createPositiveProfileQuery(i); + TestQuery tq = new TestQuery(pq, i, 4); // self should always be ranked first + String fn = i.replaceAll(".*/", ""); + eval.writeJsonTo("target/naivebfreq-test-results-"+fn+".json"); + Assert.assertTrue(eval.evaluateTestQuery(profileMatcher, tq)); + + if (i.equals("http://x.org/ind-dec-all")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-no-brain-phenotype", tq.matchSet)); + nOk++; + } + if (i.equals("http://x.org/ind-big-heart-small-brain")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-big-femur", tq.matchSet)); + + // targets with frequency + Assert.assertTrue(isRankedAt("http://x.org/fplus-big-heart-small-brain", tq.matchSet, 2)); + Assert.assertTrue(isRankedAt("http://x.org/f0-big-heart-small-brain", tq.matchSet, 3)); + Assert.assertTrue(isRankedAt("http://x.org/fminus-big-heart-small-brain", tq.matchSet, 4)); + nOk++; + } + if (i.equals("http://x.org/ind-small-heart-big-brain")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-big-femur", tq.matchSet)); + + // targets with frequency + Assert.assertTrue(isRankedAt("http://x.org/fminus-big-heart-small-brain", tq.matchSet, 2)); + Assert.assertTrue(isRankedAt("http://x.org/f0-big-heart-small-brain", tq.matchSet, 3)); + Assert.assertTrue(isRankedAt("http://x.org/fplus-big-heart-small-brain", tq.matchSet, 4)); + nOk++; + } + if (i.equals("http://x.org/ind-unstated-phenotype")) { + //Assert.assertTrue(isRankedLast("http://x.org/ind-no-phenotype", tq.matchSet)); + //temporarily removed the no-phenotype individual from test; auto-pass this for now + nOk++; + } + if (i.equals("http://x.org/ind-no-brain-phenotype")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-inc-all", tq.matchSet)); + nOk++; + } + + } + Assert.assertEquals(5, nOk); + } } diff --git a/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl b/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl new file mode 100644 index 0000000..2b5850a --- /dev/null +++ b/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl @@ -0,0 +1,282 @@ +Prefix: : +Prefix: dc: +Prefix: owl: +Prefix: rdf: +Prefix: rdfs: +Prefix: xml: +Prefix: xsd: +Prefix: x: + + +Ontology: + + +AnnotationProperty: x:probability + + +Datatype: xsd:double + + +Class: absent-heart + + SubClassOf: + hypoplastic-heart + + +Class: bone-length + + SubClassOf: + bone-morphology + + +Class: bone-morphology + + SubClassOf: + skeletal-phenotype + + +Class: bone-shape + + SubClassOf: + bone-morphology + + +Class: brain-morphology + + SubClassOf: + neuro-phenotype + + +Class: brain-shape + + SubClassOf: + brain-morphology + + +Class: brain-size + + SubClassOf: + brain-morphology + + +Class: circulatory-phenotype + + SubClassOf: + phenotype + + +Class: dec-bone-length + + SubClassOf: + bone-length + + + +Class: dec-brain-size + + SubClassOf: + brain-size + + + +Class: dec-femur-length + + SubClassOf: + dec-bone-length + + + +Class: heart-morphology + + SubClassOf: + circulatory-phenotype + + +Class: heart-shape + + SubClassOf: + heart-morphology + + +Class: heart-size + + SubClassOf: + heart-morphology + + +Class: hyperplastic-heart + + SubClassOf: + heart-size + + + +Class: hypoplastic-heart + + SubClassOf: + heart-size + + + +Class: inc-bone-length + + SubClassOf: + bone-length + + + +Class: inc-brain-size + + SubClassOf: + brain-size + + + +Class: inc-femur-length + + SubClassOf: + inc-bone-length + + + +Class: neuro-phenotype + + SubClassOf: + phenotype + + +Class: phenotype + + +Class: skeletal-phenotype + + SubClassOf: + phenotype + + +Individual: ind-big-femur + + Types: + inc-femur-length + + +Individual: ind-big-heart-big-brain + + Types: + hyperplastic-heart, + inc-brain-size + +Individual: fplus-big-heart-small-brain + + Types: + Annotations: x:probability "0.75"^^xsd:double dec-brain-size, + Annotations: x:probability "0.25"^^xsd:double inc-brain-size, + Annotations: x:probability "0.75"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.25"^^xsd:double hypoplastic-heart + +Individual: f0-big-heart-small-brain + + Types: + Annotations: x:probability "0.5"^^xsd:double dec-brain-size, + Annotations: x:probability "0.5"^^xsd:double inc-brain-size, + Annotations: x:probability "0.5"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.5"^^xsd:double hypoplastic-heart + +Individual: fminus-big-heart-small-brain + + Types: + Annotations: x:probability "0.25"^^xsd:double dec-brain-size, + Annotations: x:probability "0.75"^^xsd:double inc-brain-size, + Annotations: x:probability "0.25"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.75"^^xsd:double hypoplastic-heart + + +Individual: ind-big-heart-small-brain + + Types: dec-brain-size, + hyperplastic-heart + + +Individual: ind-bone + + Types: + bone-morphology + + +Individual: ind-brain + + Types: + brain-morphology + + +Individual: ind-dec-all + + Types: + dec-bone-length, + dec-brain-size, + hypoplastic-heart + + +Individual: ind-heart-bone + + Types: + bone-morphology, + heart-morphology + + +Individual: ind-heart-brain + + Types: + brain-morphology, + heart-morphology + + +Individual: ind-heart-brain-bone + + Types: + bone-morphology, + brain-morphology, + heart-morphology + + +Individual: ind-inc-all + + Types: + hyperplastic-heart, + inc-bone-length, + inc-brain-size + + +Individual: ind-no-brain-phenotype + + Types: + phenotype, + not (brain-morphology) + + +Individual: ind-small-femur + + Types: + dec-femur-length + + +Individual: ind-small-heart-big-brain + + Types: + hypoplastic-heart, + inc-brain-size + + +Individual: ind-small-heart-small-brain + + Types: + dec-brain-size, + hypoplastic-heart + + +Individual: ind-unstated-phenotype + + Types: + phenotype + + From b9c84c66db4a08b507cea21268d1d8ac6d04ab1d Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Thu, 2 Mar 2017 12:12:20 -0800 Subject: [PATCH 2/7] fixing typo in property name --- .../org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java index b3b9fb4..cd3f3ef 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java @@ -559,7 +559,7 @@ private void storeInferences() { } } if (lv.isInteger()) { - if (prop.getIRI().toString().contains("frequenct")) { + if (prop.getIRI().toString().contains("frequency")) { wmap.put(cix, lv.parseInteger()); } From 2c57dc76d27c09eb07029872fac1e4905a309ce3 Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Thu, 2 Mar 2017 12:12:30 -0800 Subject: [PATCH 3/7] reducing logging --- .../matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java | 4 ++-- .../compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java index afcf28c..7f13bc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java @@ -213,7 +213,7 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { // any node which has an off query parent is discounted //EWAHCompressedBitmap maskedTargetProfileBM = nodesHtBM.and(queryBlanketProfileBM); - LOG.info("TARGET PROFILE for "+itemId+" "+nodesHtBM); + //LOG.info("TARGET PROFILE for "+itemId+" "+nodesHtBM); // cumulative log-probability double logp = 0.0; @@ -398,7 +398,7 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { indArr[n] = itemId; sumOfProbs += p; n++; - LOG.info("logp for "+itemId+" = "+logp+" sumOfLogProbs="+sumOfProbs); + //LOG.info("logp for "+itemId+" = "+logp+" sumOfLogProbs="+sumOfProbs); } for (n = 0; n Date: Fri, 3 Mar 2017 20:45:49 -0800 Subject: [PATCH 5/7] [#62] moved guice modules away from core --- .../owlsim/compute/classmatch/ClassMatcher.java | 109 +- .../owlsim/compute/classmatch/package-info.java | 3 - .../SimplePairwiseConditionalProbabilityIndex.java | 2 - .../owlsim/compute/enrich/EnrichmentConfig.java | 2 - .../impl/HypergeometricEnrichmentEngine.java | 5 +- .../matcher/impl/AbstractProfileMatcher.java | 463 +++-- .../AbstractSemanticSimilarityProfileMatcher.java | 23 +- .../impl/BayesianNetworkProfileMatcher.java | 214 +- .../matcher/impl/GridNegatedProfileMatcher.java | 3 - .../compute/matcher/impl/GridProfileMatcher.java | 46 +- .../impl/JaccardSimilarityProfileMatcher.java | 27 +- ...InformationContentSimilarityProfileMatcher.java | 42 +- ...veBayesFixedWeightThreeStateProfileMatcher.java | 763 ++++--- ...FixedWeightTwoStateNoBlanketProfileMatcher.java | 9 +- ...aiveBayesFixedWeightTwoStateProfileMatcher.java | 118 +- .../NaiveBayesVariableWeightProfileMatcher.java | 79 +- .../matcher/impl/PhenodigmICProfileMatcher.java | 74 +- .../ThreeStateBayesianNetworkProfileMatcher.java | 171 +- .../CosineNegativeSimilarityProfileMatcher.java | 4 - .../cosine/CosineSimilarityProfileMatcher.java | 3 - .../CosineWeightedSimilarityProfileMatcher.java | 4 - .../MostInformativeCommonAncestorCalculator.java | 53 +- ...ostInformativeCommonAncestorCalculatorImpl.java | 62 +- .../org/monarchinitiative/owlsim/io/OWLLoader.java | 385 ++-- .../owlsim/kb/BMKnowledgeBase.java | 186 +- .../owlsim/kb/KnowledgeBaseModule.java | 118 -- .../owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java | 2098 ++++++++++---------- .../owlsim/model/match/ProfileQuery.java | 27 +- .../matcher/perf/AbstractProfileMatcherPerfIT.java | 2 - .../owlsim/compute/mica/MICAStoreBench.java | 2 - .../owlsim/services/OwlSimServiceApplication.java | 6 +- .../services/modules}/EnrichmentMapModule.java | 11 +- .../services/modules/KnowledgeBaseModule.java | 127 ++ .../owlsim/services/modules}/MatcherMapModule.java | 33 +- .../owlsim/services/modules}/MatcherModule.java | 26 +- .../modules}/bindings/IndicatesDataTsvs.java | 9 +- .../bindings/IndicatesOwlDataOntologies.java | 9 +- .../modules}/bindings/IndicatesOwlOntologies.java | 9 +- 38 files changed, 2596 insertions(+), 2731 deletions(-) delete mode 100644 owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/KnowledgeBaseModule.java rename {owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich => owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules}/EnrichmentMapModule.java (76%) create mode 100644 owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java rename {owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher => owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules}/MatcherMapModule.java (51%) rename {owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/runner => owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules}/MatcherModule.java (62%) rename {owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb => owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules}/bindings/IndicatesDataTsvs.java (65%) rename {owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb => owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules}/bindings/IndicatesOwlDataOntologies.java (64%) rename {owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb => owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules}/bindings/IndicatesOwlOntologies.java (65%) diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java index e3331c0..f3c784a 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java @@ -3,9 +3,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; - -import javax.inject.Inject; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.LabelMapper; @@ -20,65 +17,57 @@ */ public class ClassMatcher { - BMKnowledgeBase kb; - - - @Inject - public ClassMatcher(BMKnowledgeBase kb) { - super(); - this.kb = kb; - } + BMKnowledgeBase kb; + + public ClassMatcher(BMKnowledgeBase kb) { + super(); + this.kb = kb; + } + + /** + * Find best match for every class in ont1, where the best match is in ont2 + * + * @param qOnt + * @param tOnt + * @return list of matches + */ + public List matchOntologies(String qOnt, String tOnt) { + Set qids = kb.getClassIdsByOntology(qOnt); + Set tids = kb.getClassIdsByOntology(tOnt); + return matchClassSets(qids, tids); + } + + public List matchClassSets(Set qids, Set tids) { + ArrayList matches = new ArrayList<>(); + for (String q : qids) { + matches.add(getBestMatch(q, tids)); + } + return matches; + } - /** - * Find best match for every class in ont1, where the best - * match is in ont2 - * - * @param qOnt - * @param tOnt - * @return list of matches - */ - public List matchOntologies(String qOnt, String tOnt) { - Set qids = kb.getClassIdsByOntology(qOnt); - Set tids = kb.getClassIdsByOntology(tOnt); - return matchClassSets(qids, tids); - } + private SimpleClassMatch getBestMatch(String q, Set tids) { + EWAHCompressedBitmap qbm = kb.getSuperClassesBM(q); + double bestEqScore = 0.0; + String best = null; + for (String t : tids) { + EWAHCompressedBitmap tbm = kb.getSuperClassesBM(t); + int numInQueryAndInTarget = qbm.andCardinality(tbm); + int numInQueryOrInTarget = qbm.orCardinality(tbm); + double eqScore = numInQueryAndInTarget / (double) numInQueryOrInTarget; + if (eqScore > bestEqScore) { + bestEqScore = eqScore; + best = t; + } + } - public List matchClassSets(Set qids, - Set tids) { - ArrayList matches = new ArrayList<>(); - for (String q : qids) { - matches.add(getBestMatch(q, tids)); - } - return matches; - } + EWAHCompressedBitmap tbm = kb.getSuperClassesBM(best); + int numInQueryAndInTarget = qbm.andCardinality(tbm); + double subClassScore = numInQueryAndInTarget / (double) qbm.cardinality(); + double superClassScore = numInQueryAndInTarget / (double) tbm.cardinality(); - private SimpleClassMatch getBestMatch(String q, Set tids) { - EWAHCompressedBitmap qbm = kb.getSuperClassesBM(q); - double bestEqScore = 0.0; - String best = null; - for (String t : tids) { - EWAHCompressedBitmap tbm = kb.getSuperClassesBM(t); - int numInQueryAndInTarget = qbm.andCardinality(tbm); - int numInQueryOrInTarget = qbm.orCardinality(tbm); - double eqScore = numInQueryAndInTarget / (double) numInQueryOrInTarget; - if (eqScore > bestEqScore) { - bestEqScore = eqScore; - best = t; - } - } - - EWAHCompressedBitmap tbm = kb.getSuperClassesBM(best); - int numInQueryAndInTarget = qbm.andCardinality(tbm); - double subClassScore = numInQueryAndInTarget / (double) qbm.cardinality(); - double superClassScore = numInQueryAndInTarget / (double) tbm.cardinality(); - - LabelMapper lm = kb.getLabelMapper(); - return new SimpleClassMatch(q, best, - lm.getArbitraryLabel(q), - lm.getArbitraryLabel(best), - bestEqScore, - subClassScore, - superClassScore); - } + LabelMapper lm = kb.getLabelMapper(); + return new SimpleClassMatch(q, best, lm.getArbitraryLabel(q), lm.getArbitraryLabel(best), bestEqScore, + subClassScore, superClassScore); + } } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/package-info.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/package-info.java index a1b2a88..2a10b9a 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/package-info.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/package-info.java @@ -1,7 +1,4 @@ /** - * - */ -/** * @author cjm * */ diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/cpt/SimplePairwiseConditionalProbabilityIndex.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/cpt/SimplePairwiseConditionalProbabilityIndex.java index a40717c..dc49f02 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/cpt/SimplePairwiseConditionalProbabilityIndex.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/cpt/SimplePairwiseConditionalProbabilityIndex.java @@ -1,7 +1,5 @@ package org.monarchinitiative.owlsim.compute.cpt; -import java.util.Map; - import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; /** diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentConfig.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentConfig.java index 3d02f07..276d490 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentConfig.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentConfig.java @@ -1,7 +1,5 @@ package org.monarchinitiative.owlsim.compute.enrich; -import javax.inject.Inject; - public class EnrichmentConfig { public enum AnalysisType {OVER, UNDER, BOTH}; diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/impl/HypergeometricEnrichmentEngine.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/impl/HypergeometricEnrichmentEngine.java index 5478031..b8fcc19 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/impl/HypergeometricEnrichmentEngine.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/impl/HypergeometricEnrichmentEngine.java @@ -6,16 +6,14 @@ import java.util.Set; import java.util.stream.Collectors; -import javax.inject.Inject; - import org.apache.commons.math3.distribution.HypergeometricDistribution; import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.enrich.EnrichmentConfig; +import org.monarchinitiative.owlsim.compute.enrich.EnrichmentConfig.AnalysisType; import org.monarchinitiative.owlsim.compute.enrich.EnrichmentEngine; import org.monarchinitiative.owlsim.compute.enrich.EnrichmentQuery; import org.monarchinitiative.owlsim.compute.enrich.EnrichmentResult; import org.monarchinitiative.owlsim.compute.enrich.EnrichmentResultSet; -import org.monarchinitiative.owlsim.compute.enrich.EnrichmentConfig.AnalysisType; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.ewah.EWAHUtils; import org.monarchinitiative.owlsim.kb.filter.Filter; @@ -44,7 +42,6 @@ protected EnrichmentConfig enrichmentConfig = new EnrichmentConfig(); - @Inject public HypergeometricEnrichmentEngine(BMKnowledgeBase kb) { super(); this.kb = kb; diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractProfileMatcher.java index beabc70..d0b15e9 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractProfileMatcher.java @@ -4,8 +4,6 @@ import java.util.List; import java.util.Set; -import javax.inject.Inject; - import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; import org.apache.commons.math3.stat.inference.TestUtils; import org.apache.log4j.Logger; @@ -32,246 +30,235 @@ import com.googlecode.javaewah.EWAHCompressedBitmap; /** - * common methods and variables for all ProfileMatchers + * common methods and variables for all ProfileMatchers * * @author cjm * */ public abstract class AbstractProfileMatcher implements ProfileMatcher { - private Logger LOG = Logger.getLogger(AbstractProfileMatcher.class); - - protected BMKnowledgeBase knowledgeBase; - private FilterEngine filterEngine; - - - /** - * @param knowledgeBase - */ - @Inject - public AbstractProfileMatcher(BMKnowledgeBase knowledgeBase) { - super(); - this.knowledgeBase = knowledgeBase; - this.filterEngine = FilterEngine.create(knowledgeBase); - } - - /** - * @return ontology interface - */ - public BMKnowledgeBase getKnowledgeBase() { - return knowledgeBase; - } - - - - @Inject - private void setKnowledgeBase(BMKnowledgeBase knowledgeBase) { - this.knowledgeBase = knowledgeBase; - } - - public void precompute() { - } - - /** - * all positive nodes in query plus their ancestors - * - * @param q - * @return - */ - protected EWAHCompressedBitmap getProfileBM(ProfileQuery q) { - return knowledgeBase.getSuperClassesBM(q.getQueryClassIds()); - } - protected EWAHCompressedBitmap getDirectProfileBM(ProfileQuery q) { - Set positions = new HashSet(); - for (String cid : q.getQueryClassIds()) { - positions.add(knowledgeBase.getClassIndex(cid)); - } - return EWAHUtils.convertIndexSetToBitmap(positions); - } - - // given an array of class IDs c1...cn, return an array S1...Sn, - // where Si is the set of superclasses (direct and indirect) of ci, - // stored as a bitmap - protected EWAHCompressedBitmap[] getProfileSetBM(String[] qcids) { - EWAHCompressedBitmap[] bms = new EWAHCompressedBitmap[qcids.length]; - for (int i=0; i bits = new HashSet(); - for (String id : nq.getQueryNegatedClassIds()) { - int ci = knowledgeBase.getClassIndex(id); - bits.addAll( knowledgeBase.getSubClasses(ci).getPositions() ); - } - return EWAHUtils.convertIndexSetToBitmap(bits); - } - - protected EWAHCompressedBitmap getDirectNegatedProfileBM(QueryWithNegation q) { - Set bits = new HashSet(); - // TODO: less dumb implementation... - for (String id : q.getQueryNegatedClassIds()) { - int ci = knowledgeBase.getClassIndex(id); - bits.add(ci); - } - return EWAHUtils.convertIndexSetToBitmap(bits); - } - - protected Match createMatch(String matchId, String matchLabel, double s) { - return MatchImpl.create(matchId, matchLabel, s); - } - - /** - * @param filter - * @return list of individuals that satisfy filter - * @throws UnknownFilterException - */ - protected List getFilteredIndividualIds(Filter filter) throws UnknownFilterException { - return filterEngine.applyFilter(filter); - } - - /* (non-Javadoc) - * @see org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher#createProfileQuery(java.lang.String) - */ - public ProfileQuery createProfileQuery(String individualId) { - return createProfileQuery(individualId, null); - } - - public ProfileQuery createPositiveProfileQuery(String individualId) { - return createProfileQuery(individualId, false); - } - - public ProfileQuery createProfileQueryWithNegation(String individualId) { - return createProfileQuery(individualId, true); - } - - public ProfileQuery createProfileQuery(String individualId, Boolean isUseNegation) { - Preconditions.checkNotNull(individualId); - EWAHCompressedBitmap bmi = knowledgeBase.getDirectTypesBM(individualId); - EWAHCompressedBitmap nbmi = knowledgeBase.getDirectNegatedTypesBM(individualId); - Set qcids = knowledgeBase.getClassIds(bmi); - Set nqcids = knowledgeBase.getClassIds(nbmi); - ProfileQuery q; - if (isUseNegation == null) { - if (nqcids.size() == 0) { - q = ProfileQueryImpl.create(qcids); - } - else { - q = QueryWithNegationImpl.create(qcids, nqcids); - } - } - else { - if (isUseNegation) { - q = QueryWithNegationImpl.create(qcids, nqcids); - } - else { - q = ProfileQueryImpl.create(qcids); - } - } - return q; - } - - public ProfileQuery createProfileQueryFromClasses( - Set qcids, Set nqcids) { - ProfileQuery q; - if (nqcids != null && nqcids.size() == 0) { - q = ProfileQueryImpl.create(qcids); - } - else { - q = QueryWithNegationImpl.create(qcids, nqcids); - } - return q; - } - - public MatchSet findMatchProfile(String individualId) throws IncoherentStateException { - ProfileQuery q = createProfileQuery(individualId); - return findMatchProfile(q); - } - - - public MatchSet findMatchProfile(ProfileQuery q) throws IncoherentStateException { - MatchSet ms = findMatchProfileAll(q); - int limit = q.getLimit() == null ? 200 : q.getLimit(); - if (limit > -1) { - ms.truncate(limit); - } - return ms; - } - - public MatchSet findMatchProfile(ProfileQuery q, double alpha) throws IncoherentStateException { - MatchSet ms = findMatchProfileAll(q); - - //use all matches as "background" - //TODO this is a naive assumption, needs refactor - DescriptiveStatistics ds = ms.getScores(); - MatchSet significantMatchingSet = MatchSetImpl.create(q); - - for (Match m : ms.getMatches()) { - double p = TestUtils.tTest(m.getScore(), ds); - if (p < alpha) { - m.setSignificance(p); - significantMatchingSet.add(m); - } - } - return ms; - } - - // additional layer of indirection above Impl, adds standard metadata - private MatchSet findMatchProfileAll(ProfileQuery q) throws IncoherentStateException { - long t1 = System.currentTimeMillis(); - MatchSet ms = findMatchProfileImpl(q); // implementing class - long t2 = System.currentTimeMillis(); - ms.setExecutionMetadata(ExecutionMetadataImpl.create(t1, t2)); - LOG.info("t(ms)="+ms.getExecutionMetadata().getDuration()); - MethodMetadata mmd = new MethodMetadata(); - mmd.methodName = getShortName(); - ms.setMethodMetadata(mmd); - return ms; - } - - public Match compareProfilePair(ProfileQuery q, ProfileQuery t) throws UnknownFilterException, IncoherentStateException { - AnonIndividualFilter filter = new AnonIndividualFilter(t); - q.setFilter(filter); - MatchSet matchSet = findMatchProfile(q); - return matchSet.getMatches().get(0); - } - - - // handling of anonymous individuals - - private boolean isAnonymousIndividual(String individualId) { - return individualId.startsWith(AnonIndividualFilter.PREFIX); - } - - - protected EWAHCompressedBitmap getDirectTypesBM(String individualId) { - if (isAnonymousIndividual(individualId)) { - Set cids = - AnonIndividualFilter.getClassIdsFromExpression(individualId); - return knowledgeBase.getClassesBM(cids); - } - else - return knowledgeBase.getDirectTypesBM(individualId); - } - protected EWAHCompressedBitmap getTypesBM(String individualId) { - if (isAnonymousIndividual(individualId)) { - Set cids = - AnonIndividualFilter.getClassIdsFromExpression(individualId); - return knowledgeBase.getSuperClassesBM(cids); - } - else - return knowledgeBase.getTypesBM(individualId); - } - - protected abstract MatchSet findMatchProfileImpl(ProfileQuery q) throws IncoherentStateException; + private Logger LOG = Logger.getLogger(AbstractProfileMatcher.class); + + protected BMKnowledgeBase knowledgeBase; + private FilterEngine filterEngine; + + /** + * @param knowledgeBase + */ + public AbstractProfileMatcher(BMKnowledgeBase knowledgeBase) { + super(); + this.knowledgeBase = knowledgeBase; + this.filterEngine = FilterEngine.create(knowledgeBase); + } + + /** + * @return ontology interface + */ + public BMKnowledgeBase getKnowledgeBase() { + return knowledgeBase; + } + + private void setKnowledgeBase(BMKnowledgeBase knowledgeBase) { + this.knowledgeBase = knowledgeBase; + } + + public void precompute() { + } + + /** + * all positive nodes in query plus their ancestors + * + * @param q + * @return + */ + protected EWAHCompressedBitmap getProfileBM(ProfileQuery q) { + return knowledgeBase.getSuperClassesBM(q.getQueryClassIds()); + } + + protected EWAHCompressedBitmap getDirectProfileBM(ProfileQuery q) { + Set positions = new HashSet(); + for (String cid : q.getQueryClassIds()) { + positions.add(knowledgeBase.getClassIndex(cid)); + } + return EWAHUtils.convertIndexSetToBitmap(positions); + } + + // given an array of class IDs c1...cn, return an array S1...Sn, + // where Si is the set of superclasses (direct and indirect) of ci, + // stored as a bitmap + protected EWAHCompressedBitmap[] getProfileSetBM(String[] qcids) { + EWAHCompressedBitmap[] bms = new EWAHCompressedBitmap[qcids.length]; + for (int i = 0; i < qcids.length; i++) { + String qc = qcids[i]; + Preconditions.checkNotNull(qc); + Preconditions.checkNotNull(knowledgeBase.getClassIndex(qc)); + bms[i] = knowledgeBase.getSuperClassesBM(qc); + } + return bms; + } + + // a negated profile implicitly includes subclasses + protected EWAHCompressedBitmap getNegatedProfileBM(ProfileQuery q) { + if (!(q instanceof QueryWithNegation)) { + return new EWAHCompressedBitmap(); + } + QueryWithNegation nq = (QueryWithNegation) q; + Set bits = new HashSet(); + for (String id : nq.getQueryNegatedClassIds()) { + int ci = knowledgeBase.getClassIndex(id); + bits.addAll(knowledgeBase.getSubClasses(ci).getPositions()); + } + return EWAHUtils.convertIndexSetToBitmap(bits); + } + + protected EWAHCompressedBitmap getDirectNegatedProfileBM(QueryWithNegation q) { + Set bits = new HashSet(); + // TODO: less dumb implementation... + for (String id : q.getQueryNegatedClassIds()) { + int ci = knowledgeBase.getClassIndex(id); + bits.add(ci); + } + return EWAHUtils.convertIndexSetToBitmap(bits); + } + + protected Match createMatch(String matchId, String matchLabel, double s) { + return MatchImpl.create(matchId, matchLabel, s); + } + + /** + * @param filter + * @return list of individuals that satisfy filter + * @throws UnknownFilterException + */ + protected List getFilteredIndividualIds(Filter filter) throws UnknownFilterException { + return filterEngine.applyFilter(filter); + } + + /* + * (non-Javadoc) + * + * @see org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher# + * createProfileQuery(java.lang.String) + */ + public ProfileQuery createProfileQuery(String individualId) { + return createProfileQuery(individualId, null); + } + + public ProfileQuery createPositiveProfileQuery(String individualId) { + return createProfileQuery(individualId, false); + } + + public ProfileQuery createProfileQueryWithNegation(String individualId) { + return createProfileQuery(individualId, true); + } + + public ProfileQuery createProfileQuery(String individualId, Boolean isUseNegation) { + Preconditions.checkNotNull(individualId); + EWAHCompressedBitmap bmi = knowledgeBase.getDirectTypesBM(individualId); + EWAHCompressedBitmap nbmi = knowledgeBase.getDirectNegatedTypesBM(individualId); + Set qcids = knowledgeBase.getClassIds(bmi); + Set nqcids = knowledgeBase.getClassIds(nbmi); + ProfileQuery q; + if (isUseNegation == null) { + if (nqcids.size() == 0) { + q = ProfileQueryImpl.create(qcids); + } else { + q = QueryWithNegationImpl.create(qcids, nqcids); + } + } else { + if (isUseNegation) { + q = QueryWithNegationImpl.create(qcids, nqcids); + } else { + q = ProfileQueryImpl.create(qcids); + } + } + return q; + } + + public ProfileQuery createProfileQueryFromClasses(Set qcids, Set nqcids) { + ProfileQuery q; + if (nqcids != null && nqcids.size() == 0) { + q = ProfileQueryImpl.create(qcids); + } else { + q = QueryWithNegationImpl.create(qcids, nqcids); + } + return q; + } + + public MatchSet findMatchProfile(String individualId) throws IncoherentStateException { + ProfileQuery q = createProfileQuery(individualId); + return findMatchProfile(q); + } + + public MatchSet findMatchProfile(ProfileQuery q) throws IncoherentStateException { + MatchSet ms = findMatchProfileAll(q); + int limit = q.getLimit() == null ? 200 : q.getLimit(); + if (limit > -1) { + ms.truncate(limit); + } + return ms; + } + + public MatchSet findMatchProfile(ProfileQuery q, double alpha) throws IncoherentStateException { + MatchSet ms = findMatchProfileAll(q); + + // use all matches as "background" + // TODO this is a naive assumption, needs refactor + DescriptiveStatistics ds = ms.getScores(); + MatchSet significantMatchingSet = MatchSetImpl.create(q); + + for (Match m : ms.getMatches()) { + double p = TestUtils.tTest(m.getScore(), ds); + if (p < alpha) { + m.setSignificance(p); + significantMatchingSet.add(m); + } + } + return ms; + } + + // additional layer of indirection above Impl, adds standard metadata + private MatchSet findMatchProfileAll(ProfileQuery q) throws IncoherentStateException { + long t1 = System.currentTimeMillis(); + MatchSet ms = findMatchProfileImpl(q); // implementing class + long t2 = System.currentTimeMillis(); + ms.setExecutionMetadata(ExecutionMetadataImpl.create(t1, t2)); + LOG.info("t(ms)=" + ms.getExecutionMetadata().getDuration()); + MethodMetadata mmd = new MethodMetadata(); + mmd.methodName = getShortName(); + ms.setMethodMetadata(mmd); + return ms; + } + + public Match compareProfilePair(ProfileQuery q, ProfileQuery t) + throws UnknownFilterException, IncoherentStateException { + AnonIndividualFilter filter = new AnonIndividualFilter(t); + q.setFilter(filter); + MatchSet matchSet = findMatchProfile(q); + return matchSet.getMatches().get(0); + } + + // handling of anonymous individuals + + private boolean isAnonymousIndividual(String individualId) { + return individualId.startsWith(AnonIndividualFilter.PREFIX); + } + + protected EWAHCompressedBitmap getDirectTypesBM(String individualId) { + if (isAnonymousIndividual(individualId)) { + Set cids = AnonIndividualFilter.getClassIdsFromExpression(individualId); + return knowledgeBase.getClassesBM(cids); + } else + return knowledgeBase.getDirectTypesBM(individualId); + } + + protected EWAHCompressedBitmap getTypesBM(String individualId) { + if (isAnonymousIndividual(individualId)) { + Set cids = AnonIndividualFilter.getClassIdsFromExpression(individualId); + return knowledgeBase.getSuperClassesBM(cids); + } else + return knowledgeBase.getTypesBM(individualId); + } + + protected abstract MatchSet findMatchProfileImpl(ProfileQuery q) throws IncoherentStateException; } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java index 6adf28a..eb042fa 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java @@ -1,38 +1,31 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; -import javax.inject.Inject; - import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator; import org.monarchinitiative.owlsim.compute.mica.impl.MostInformativeCommonAncestorCalculatorImpl; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; /** - * common methods and variables for all ProfileMatcher that - * implement semantic similarity techniques, i.e. those involving a MRCA + * common methods and variables for all ProfileMatcher that implement semantic + * similarity techniques, i.e. those involving a MRCA * * @author cjm * */ public abstract class AbstractSemanticSimilarityProfileMatcher extends AbstractProfileMatcher { - - //private Logger LOG = Logger.getLogger(AbstractSemanticSimilarityProfileMatcher.class); - private MostInformativeCommonAncestorCalculator micaCalculator; - + // private Logger LOG = + // Logger.getLogger(AbstractSemanticSimilarityProfileMatcher.class); + private MostInformativeCommonAncestorCalculator micaCalculator; /** * @param knowledgeBase */ - @Inject // TODO - public AbstractSemanticSimilarityProfileMatcher( - BMKnowledgeBase knowledgeBase) { + public AbstractSemanticSimilarityProfileMatcher(BMKnowledgeBase knowledgeBase) { super(knowledgeBase); micaCalculator = new MostInformativeCommonAncestorCalculatorImpl(knowledgeBase); } - - /** * @return object used for calculation of most informative common ancestors */ @@ -43,10 +36,8 @@ public MostInformativeCommonAncestorCalculator getMicaCalculator() { /** * @param micaCalculator */ - private void setMicaCalculator( - MostInformativeCommonAncestorCalculator micaCalculator) { + private void setMicaCalculator(MostInformativeCommonAncestorCalculator micaCalculator) { this.micaCalculator = micaCalculator; } - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java index 4aee08b..ee22a60 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java @@ -5,8 +5,6 @@ import java.util.Map; import java.util.Set; -import javax.inject.Inject; - import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.cpt.ConditionalProbabilityIndex; import org.monarchinitiative.owlsim.compute.cpt.IncoherentStateException; @@ -21,28 +19,31 @@ import com.googlecode.javaewah.EWAHCompressedBitmap; /** - * Calculate probability of observing query (e.g. patient profile) given target as evidence. + * Calculate probability of observing query (e.g. patient profile) given target + * as evidence. * - * This implementation does not explicitly model NOTs, it - * uses a {@link TwoStateConditionalProbabilityIndex}. - * The two states are ON (true/observed) and OFF (unknown/not observed) - * - note the open world assumptions: that the off state means there is no - * information about the truth of the node, it does not mean the node is false. + * This implementation does not explicitly model NOTs, it uses a + * {@link TwoStateConditionalProbabilityIndex}. The two states are ON + * (true/observed) and OFF (unknown/not observed) - note the open world + * assumptions: that the off state means there is no information about the truth + * of the node, it does not mean the node is false. * - * Although we do not model negation as a 3rd state, we still compute on negation, post-hoc, see below. + * Although we do not model negation as a 3rd state, we still compute on + * negation, post-hoc, see below. * *

Calculating probabilities

*

Calculating probabilities for a single query node

* - * Using a {@link TwoStateConditionalProbabilityIndex}, - * probabilities propagate TO a child FROM its parents. + * Using a {@link TwoStateConditionalProbabilityIndex}, probabilities propagate + * TO a child FROM its parents. * * If the query node is ON, and the node is ON in the target, then Pr = 1-fnr; - * otherwise the probability is calculated based on the probability of the parents. + * otherwise the probability is calculated based on the probability of the + * parents. * * - * The probability of a child node being on C=on is dependent on the state of its - * parents; we sum over 2N states + * The probability of a child node being on C=on is dependent on the state of + * its parents; we sum over 2N states * * * @@ -56,28 +57,31 @@ * * * - * For any given query Q=Q1,...Qm, we assume independent probabilities - * and calculate Pr(Q) = Pq(Q1=on,...,Qm=on) + * For any given query Q=Q1,...Qm, we assume independent probabilities and + * calculate Pr(Q) = Pq(Q1=on,...,Qm=on) * *

Negation

* - * Each node can only have two states in this model; the off state can be thought of - * as being the 'unknown' state. We assume an open world assumption. The absence of - * a node in the query should be thought of as 'not observed' rather than 'not'. + * Each node can only have two states in this model; the off state can be + * thought of as being the 'unknown' state. We assume an open world assumption. + * The absence of a node in the query should be thought of as 'not observed' + * rather than 'not'. * - * We still include negation in the calculation; for any negated query node i, we - * calculate Pr(i) = ON, and assign a final probability of 1-fnr (this is the only circumstance - * a fnr can have an effect, since we have the open world model). + * We still include negation in the calculation; for any negated query node i, + * we calculate Pr(i) = ON, and assign a final probability of 1-fnr (this is the + * only circumstance a fnr can have an effect, since we have the open world + * model). * - * Similarly, for any negated target node j, the Pr of any query under this will be 1-fpr + * Similarly, for any negated target node j, the Pr of any query under this will + * be 1-fpr * *

TODOs

* - * Currently this method is too slow to be used for dynamic queries, taking 1-5s per query. - * Some efficiency could be gained by calculating with log-probs. + * Currently this method is too slow to be used for dynamic queries, taking 1-5s + * per query. Some efficiency could be gained by calculating with log-probs. * - * If we cache probabilities per-node for every target, we would gain a lot of speed, - * space = NumClasses x NumTargets + * If we cache probabilities per-node for every target, we would gain a lot of + * speed, space = NumClasses x NumTargets * * * @author cjm @@ -90,18 +94,18 @@ double falseNegativeRate = 0.01; // TODO - do not hardcode double falsePositiveRate = 0.01; // TODO - do not hardcode - ConditionalProbabilityIndex cpi = null; // index of Pr(Node={on,off}|ParentsState) + ConditionalProbabilityIndex cpi = null; // index of + // Pr(Node={on,off}|ParentsState) @Deprecated private Calculator[] calculatorCache; private Double[][] targetClassProbabilityCache; - @Inject private BayesianNetworkProfileMatcher(BMKnowledgeBase kb) { super(kb); int N = kb.getIndividualIdsInSignature().size(); calculatorCache = new Calculator[N]; - for (int i=0; i negatedQueryClassIds = null; if (isUseNegation) { LOG.info("Using QueryWithNegation"); - QueryWithNegation nq = (QueryWithNegation)q; + QueryWithNegation nq = (QueryWithNegation) q; negatedQueryProfileBM = getDirectNegatedProfileBM(nq); negatedQueryClassIds = knowledgeBase.getClassIds(negatedQueryProfileBM); - LOG.info("nqp=" + negatedQueryProfileBM+" // "+negatedQueryClassIds); - } - else { + LOG.info("nqp=" + negatedQueryProfileBM + " // " + negatedQueryClassIds); + } else { LOG.info("Not using QueryWithNegation"); } @@ -170,22 +173,22 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { double pvector[] = new double[indIds.size()]; String indArr[] = new String[indIds.size()]; - int n=0; + int n = 0; // TODO - FOR DEBUGGING ONLY - // int nc=0; - // for (String itemId : indIds) { - // int indIx = knowledgeBase.getIndividualIndex(itemId); - // if (targetClassProbabilityCache[indIx] != null) { - // Double[] a = targetClassProbabilityCache[indIx]; - // for (int i=0; i debugMaxP) { debugMaxP = p; } - + if (Double.isNaN(p)) { - LOG.error("NaN for tgt "+itemId); + LOG.error("NaN for tgt " + itemId); } // NEGATION if (negatedQueryProfileBM != null) { double np = 1 - calc.calculateProbability(negatedQueryClassIds); - //LOG.info("Combined Probability = (POS) "+p+" * (NEG) "+np); - p = p*np; + // LOG.info("Combined Probability = (POS) "+p+" * (NEG) "+np); + p = p * np; } pvector[n] = p; indArr[n] = itemId; sumOfProbs += p; n++; - //LOG.info("p for "+itemId+" = "+p); + // LOG.info("p for "+itemId+" = "+p); } if (sumOfProbs == 0.0) { LOG.error("sumOfProds=0.0"); @@ -236,7 +238,7 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { } int tempNumNans = 0; - for (n = 0; n 0) { - LOG.error("#NaNs "+tempNumNans+" / "+pvector.length); - LOG.error("maxPr = "+debugMaxP); + LOG.error("#NaNs " + tempNumNans + " / " + pvector.length); + LOG.error("maxPr = " + debugMaxP); } mp.sortMatches(); return mp; } /** - * We wrap calculation within a class to allow for cacheing relative to - * a particular targetProfile + * We wrap calculation within a class to allow for cacheing relative to a + * particular targetProfile * * @author cjm * @@ -276,17 +278,16 @@ public Calculator(EWAHCompressedBitmap targetProfileBM, EWAHCompressedBitmap neg /** * Top-level call * - * Calculate the probability of all queryClasses being on, - * given the nodes in the target profile are not + * Calculate the probability of all queryClasses being on, given the + * nodes in the target profile are not * - * Note: currently this is asymmetric; ie we do not calculate - * the probability of the target given the query nodes are on; - * this has the effect of penalizing large queries; for a fixed - * query this is not an issue. However, it also does *not* penalize - * broad-spectrum targets. + * Note: currently this is asymmetric; ie we do not calculate the + * probability of the target given the query nodes are on; this has the + * effect of penalizing large queries; for a fixed query this is not an + * issue. However, it also does *not* penalize broad-spectrum targets. * - * This also means the FNR is meaningless, - * unless negation is explicitly used + * This also means the FNR is meaningless, unless negation is explicitly + * used * * @param queryClassIds * @param targetProfileBM @@ -298,23 +299,25 @@ public double calculateProbability(Set queryClassIds) { // treat set of query class Ids as a leaf node that is the // class intersection of all members; ie q1^...^qn // for a class intersection, the CPT is always such that - // Pr=1.0, if all parents=1 - // Pr=0.0 otherwise + // Pr=1.0, if all parents=1 + // Pr=0.0 otherwise for (String queryClassId : queryClassIds) { double p = calculateProbability(queryClassId); if (Double.isNaN(p)) { - LOG.error("NaN for qc="+queryClassId); + LOG.error("NaN for qc=" + queryClassId); } // NEGATION - // the FNR only comes into play if negation is explicitly specified. - // If the query is on but a superclass in the target has been negated, + // the FNR only comes into play if negation is explicitly + // specified. + // If the query is on but a superclass in the target has been + // negated, // we assume the query is a false positive if (negatedTargetProfileBM != null) { if (knowledgeBase.getSuperClassesBM(queryClassId).andCardinality(negatedTargetProfileBM) > 0) { - LOG.info("NEGATIVE EVIDENCE for "+queryClassId); - p *= falsePositiveRate; + LOG.info("NEGATIVE EVIDENCE for " + queryClassId); + p *= falsePositiveRate; } } cump *= p; @@ -323,8 +326,8 @@ public double calculateProbability(Set queryClassIds) { } /** - * probability of queryClass being true, given that all - * nodes in target profile are on + * probability of queryClass being true, given that all nodes in target + * profile are on * * @param queryClassId * @param targetProfileBM @@ -339,53 +342,55 @@ private double calculateProbability(String queryClassId) { /** * Calculate the probability that a node qc is ON. * - * - If this is specified in the query, then a set value is returned (1-FP); - * - If not specified, equal to sum of probabilities of all states of parents - * - * Side effects: caches probability + * - If this is specified in the query, then a set value is returned + * (1-FP); - If not specified, equal to sum of probabilities of all + * states of parents + * + * Side effects: caches probability * * @param qcix * @return Pr(Qi=on|T) */ private double calculateProbability(int qcix) { if (probCache[qcix] != null) { - LOG.debug("Using cached for "+qcix); + LOG.debug("Using cached for " + qcix); return probCache[qcix]; } BMKnowledgeBase kb = getKnowledgeBase(); - LOG.debug("Calculating probability for "+qcix+" ie "+kb.getClassId(qcix)); + LOG.debug("Calculating probability for " + qcix + " ie " + kb.getClassId(qcix)); double probQiGivenT; - // TODO - optimization: determine efficiency of using get(ix) vs other methods + // TODO - optimization: determine efficiency of using get(ix) vs + // other methods if (targetProfileBM.get(qcix)) { LOG.debug("Qi is in target profile"); - probQiGivenT = 1-falsePositiveRate; - } - else { + probQiGivenT = 1 - falsePositiveRate; + } else { // Qi is NOT in target profile; - // Pr(Qi=on | T) = Pr(QiP1=on, QiP2=on, ..|T)Pr(on on...) + Pr(QiP1=off, ...) + // Pr(Qi=on | T) = Pr(QiP1=on, QiP2=on, ..|T)Pr(on on...) + + // Pr(QiP1=off, ...) List pixs = kb.getDirectSuperClassesBM(qcix).getPositions(); double[] parentProbs = new double[pixs.size()]; LOG.debug("calculating probabilities for parents"); - for (int i=0; i qClassIds = q.getQueryClassIds(); int qsize = qClassIds.size(); queryClassArray = qClassIds.toArray(new String[qsize]); EWAHCompressedBitmap queryProfileBMArr[] = getProfileSetBM(queryClassArray); - - MatchSet mp = MatchSetImpl.create(q); - + + MatchSet mp = MatchSetImpl.create(q); + List indIds = getFilteredIndividualIds(q.getFilter()); for (String itemId : indIds) { EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - //LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); - + // LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); + double score = 0; ClassInformationContentPair[] qmatchArr = new ClassInformationContentPair[qsize]; - for (int j = 0; j indIds = getFilteredIndividualIds(q.getFilter()); for (String itemId : indIds) { EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - - //LOG.info("TARGET PROFILE for "+itemId+" "+targetProfileBM); + + // LOG.info("TARGET PROFILE for "+itemId+" "+targetProfileBM); int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); int numInQueryOrInTarget = queryProfileBM.orCardinality(targetProfileBM); double j = numInQueryAndInTarget / (double) numInQueryOrInTarget; @@ -75,8 +70,4 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) throws UnknownFilterExcepti return mp; } - - - - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java index f2cd581..c14bec9 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java @@ -2,8 +2,6 @@ import java.util.List; -import javax.inject.Inject; - import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator.ClassInformationContentPair; @@ -15,27 +13,25 @@ import com.googlecode.javaewah.EWAHCompressedBitmap; /** - * Given a query profile (a set of classes c1, .., cn) return a match profile, - * where each candidate individual is assigned a maximum Information Content score + * Given a query profile (a set of classes c1, .., cn) return a match profile, + * where each candidate individual is assigned a maximum Information Content + * score * * @author cjm * */ -public class MaximumInformationContentSimilarityProfileMatcher - extends AbstractSemanticSimilarityProfileMatcher - implements ProfileMatcher { - +public class MaximumInformationContentSimilarityProfileMatcher extends AbstractSemanticSimilarityProfileMatcher + implements ProfileMatcher { + private Logger LOG = Logger.getLogger(MaximumInformationContentSimilarityProfileMatcher.class); - /** * @param kb */ - @Inject private MaximumInformationContentSimilarityProfileMatcher(BMKnowledgeBase kb) { super(kb); } - + /** * @param kb * @return new instance @@ -43,7 +39,7 @@ private MaximumInformationContentSimilarityProfileMatcher(BMKnowledgeBase kb) { public static MaximumInformationContentSimilarityProfileMatcher create(BMKnowledgeBase kb) { return new MaximumInformationContentSimilarityProfileMatcher(kb); } - + @Override public String getShortName() { return "max-information"; @@ -54,21 +50,20 @@ public String getShortName() { * @return match profile containing probabilities of each individual */ public MatchSet findMatchProfileImpl(ProfileQuery q) { - + EWAHCompressedBitmap queryProfileBM = getProfileBM(q); - //LOG.info("QUERY PROFILE for "+q+" "+queryProfileBM.getPositions()); - - MatchSet mp = MatchSetImpl.create(q); - + // LOG.info("QUERY PROFILE for "+q+" "+queryProfileBM.getPositions()); + + MatchSet mp = MatchSetImpl.create(q); + List indIds = getFilteredIndividualIds(q.getFilter()); for (String itemId : indIds) { EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - - //LOG.info("TARGET PROFILE for "+itemId+" "+targetProfileBM); - ClassInformationContentPair mica = - getMicaCalculator().getMostInformativeCommonAncestorWithIC(queryProfileBM, - targetProfileBM); - //LOG.info("mica="+mica); + + // LOG.info("TARGET PROFILE for "+itemId+" "+targetProfileBM); + ClassInformationContentPair mica = getMicaCalculator() + .getMostInformativeCommonAncestorWithIC(queryProfileBM, targetProfileBM); + // LOG.info("mica="+mica); String label = knowledgeBase.getLabelMapper().getArbitraryLabel(itemId); mp.add(createMatch(itemId, label, mica.ic)); } @@ -76,5 +71,4 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { return mp; } - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java index afcf28c..1c7bcd6 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java @@ -4,8 +4,6 @@ import java.util.List; import java.util.Set; -import javax.inject.Inject; - import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.NegationAwareProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -26,392 +24,379 @@ * @author cjm * */ -public class NaiveBayesFixedWeightThreeStateProfileMatcher extends AbstractProfileMatcher implements NegationAwareProfileMatcher { - - private Logger LOG = Logger.getLogger(NaiveBayesFixedWeightThreeStateProfileMatcher.class); - - - @Inject - private NaiveBayesFixedWeightThreeStateProfileMatcher(BMKnowledgeBase kb) { - super(kb); - } - - /** - * @param kb - * @return new instance - */ - public static NaiveBayesFixedWeightThreeStateProfileMatcher create(BMKnowledgeBase kb) { - return new NaiveBayesFixedWeightThreeStateProfileMatcher(kb); - } - - @Override - public String getShortName() { - return "naive-bayes-fixed-weight-three-state"; - } - - private EWAHCompressedBitmap getQueryBlanketBM(ProfileQuery q) { - EWAHCompressedBitmap onQueryNodesBM = getProfileBM(q); - Set nodesWithOnParents = new HashSet(); - - // there may be more efficient ways of doing this, but this is - // called once at the start of the search... - for (String cid : knowledgeBase.getClassIdsInSignature()) { - int cix = knowledgeBase.getClassIndex(cid); - EWAHCompressedBitmap supsBM = knowledgeBase.getDirectSuperClassesBM(cid); - int nParents = supsBM.cardinality(); - if (supsBM.andCardinality(onQueryNodesBM) == nParents) { - nodesWithOnParents.add(cix); - } - } - - return onQueryNodesBM.or(EWAHUtils.convertIndexSetToBitmap(nodesWithOnParents)); - } - - // any negated query node that has at least one negated parent; - // these are counted as no-transition - private EWAHCompressedBitmap getQueryNegatedNoTransition(EWAHCompressedBitmap negatedQueryProfileBM) { - Set nodes = new HashSet(); - - // there may be more efficient ways of doing this, but this is - // called once at the start of the search... - for (int cix : negatedQueryProfileBM.getPositions()) { - EWAHCompressedBitmap supsBM = knowledgeBase.getDirectSuperClassesBM(cix); - int nParents = supsBM.cardinality(); - if (supsBM.andCardinality(negatedQueryProfileBM) > 0) { - nodes.add(cix); - } - } - - return EWAHUtils.convertIndexSetToBitmap(nodes); - } - - - /** - * @param q - * @return match profile containing probabilities of each individual - */ - public MatchSet findMatchProfileImpl(ProfileQuery q) { - - - //double fpr = getFalsePositiveRate(); - //double fnr = getFalseNegativeRate(); - double sumOfProbs = 0.0; - - EWAHCompressedBitmap nodesQtBM = getProfileBM(q); - EWAHCompressedBitmap nodesQfBM = getNegatedProfileBM(q); - - // first, given a query (on and off states), - // group all nodes according to transitions from parent node - - // nomenclature: QUERY {unk,true,false} PARENTS {unk,true,false}+ - // multiple values taken as union - - // uncommitted nodes with a true parent (trans) - Set nodesQuPt = new HashSet(); - - // uncommitted nodes with an uncommitted parent (no trans) - Set nodesQuPu = new HashSet(); - - // off nodes with on or uncommitted parent (trans) - Set nodesQfPtu = new HashSet(); - - // off nodes with uncommitted parent - Set nodesQfPu = new HashSet(); - - // calculate transitions for all query nodes - /* - if Q=t, then ALL parents MUST be t (NO transitions) - if Q=u, then EITHER - ALL parents ARE t : TRANSITION T->U - AT LEAST ONE parent is=u : NO TRANSITION U->U - NO PARENT is f - if Q=f, then EITHER - ONE parent IS f : NO TRANSITION F->F - ALL parents ARE t : TRANSITION T->F - ELSE : TRANSITION U->F - */ - for (String cid : knowledgeBase.getClassIdsInSignature()) { - int cix = knowledgeBase.getClassIndex(cid); - if (nodesQtBM.getPositions().contains(cix)) { - // state T, transition must be T->T - continue; - } - EWAHCompressedBitmap parentsBM = knowledgeBase.getDirectSuperClassesBM(cix); - if (nodesQfBM.getPositions().contains(cix)) { - // state = F - if (parentsBM.andCardinality(nodesQfBM) == 0) { - // transition T,U -> F - nodesQfPtu.add(cix); - } - else { - // F->F - } - } - else { - // state = U - if (parentsBM.andCardinality(nodesQtBM) < parentsBM.cardinality()) { - // transition T -> U ( F->U is impossible ) - nodesQuPt.add(cix); - } - else { - // U->U - nodesQuPu.add(cix); - } - } - - } - EWAHCompressedBitmap nodesQuPtBM = EWAHUtils.convertIndexSetToBitmap(nodesQuPt); - EWAHCompressedBitmap nodesQuPuBM = EWAHUtils.convertIndexSetToBitmap(nodesQuPu); - EWAHCompressedBitmap nodesQfPtBM = EWAHUtils.convertIndexSetToBitmap(nodesQfPtu); - EWAHCompressedBitmap nodesQfPuBM = EWAHUtils.convertIndexSetToBitmap(nodesQfPu); - - - // include subclasses - - - EWAHCompressedBitmap queryNegatedNoTransitionBM = - getQueryNegatedNoTransition(nodesQfBM); - EWAHCompressedBitmap queryNegatedWithTransitionBM = - nodesQfBM.andNot(queryNegatedNoTransitionBM); - - MatchSet mp = MatchSetImpl.create(q); - - List indIds = getFilteredIndividualIds(q.getFilter()); - - double pvector[] = new double[indIds.size()]; - String indArr[] = new String[indIds.size()]; - int n=0; - - // pr(Q=f | H=t) - double prFalseNegative = 0.000001; - - // pr(Q=t | H=f) - double prFalsePositive = 0.00001; - - // pr(Q=u | H=t) -- like a weaker false negative - double prFalseMiss = 0.01; - - // pr(Q=u | H=f) -- failure to make a call when hidden is false - double prTrueMiss = 0.85; - - //double prWeakFalsePositive = prFalsePositive * 100; - //double prWeakFalsePositive = Math.exp(Math.log(prFalsePositive) /4 ); - double prWeakFalsePositive = 0.1; - - double pprQtHtPt = 1 - (prFalseNegative + prFalseMiss); - double pprQfHfPt = 1 - (prFalsePositive + prTrueMiss); - - //double prWeakTrueMiss = prTrueMiss * 2; // failure to make a call when hidden is non-obvious false - double prWeakTrueMiss = 0.85; - for (String itemId : indIds) { - EWAHCompressedBitmap nodesHtBM = knowledgeBase.getTypesBM(itemId); - - //EWAHCompressedBitmap nodesHfBM = knowledgeBase.getNegatedTypesBM(itemId); - // TODO: consider propagating down - EWAHCompressedBitmap nodesHfBM = knowledgeBase.getDirectNegatedTypesBM(itemId); - - // any node which has an off query parent is discounted - //EWAHCompressedBitmap maskedTargetProfileBM = nodesHtBM.and(queryBlanketProfileBM); - - LOG.info("TARGET PROFILE for "+itemId+" "+nodesHtBM); - - // cumulative log-probability - double logp = 0.0; - // 3^3=27 combos for q (query), h (hidden) and p (parents) - // with states t, f and u - - // --- - // *** Hidden/Target=TRUE - // --- - - // ** T,T - - // 1. P(qi=TRUE | hi=TRUE, p(qi)=TRUE) = 1-(FN + FALSEMISS) - // note that if Q=t and H=t then it's impossible for P=u OR P=f; - // hence we use QtBM - int nQtHtPt = nodesQtBM.andCardinality(nodesHtBM); - if (nQtHtPt > 0) { - double cprQtHtPt = Math.pow(pprQtHtPt, nQtHtPt); - LOG.info(" nQtHtPt="+nQtHtPt+" pr= "+cprQtHtPt); - logp += Math.log(cprQtHtPt); - } - - // P(qi=FALSE | hi=TRUE, p(qi)=TRUE) = FN - // P(qi=FALSE | hi=TRUE, p(qi)=UNK) = FN - // note we can combine P(qi=FALSE | hi=TRUE) for any non-false parent setting - // hence we use QfBM - int nQfHtPt = nodesQfBM.andCardinality(nodesHtBM); - if (nQfHtPt > 0) { - double cprQfHtPt = Math.pow(prFalseNegative, nQfHtPt); - LOG.info(" nQfHtPt="+nQfHtPt+" pr= "+cprQfHtPt); - logp += Math.log(cprQfHtPt); - } - - // P(qi=UNK | hi=TRUE, p(qi)=TRUE) = FALSEMISS - int nQuHtPt = nodesQuPtBM.andCardinality(nodesHtBM); - if (nQuHtPt > 0) { - double cprQuHtPt = Math.pow(prFalseMiss, nQuHtPt); - LOG.info(" nQuHtPt="+nQuHtPt+" pr= "+cprQuHtPt); - logp += Math.log(cprQuHtPt); - } - - - // ** T,F - // none of these contribute to the score - - // P(qi=TRUE | hi=TRUE, p(qi)=FALSE) = 0 - // P(qi=FALSE | hi=TRUE, p(qi)=FALSE) = 1 - // P(qi=UNK | hi=TRUE, p(qi)=FALSE) = 0 - - // T,U - - // P(qi=TRUE | hi=TRUE, p(qi)=UNK) = 0 - // * ALREADY COVERED IN ABOVE: P(qi=FALSE | hi=TRUE, p(qi)=UNK) = FN - - // P(qi=UNK | hi=TRUE, p(qi)=UNK) = 1-FN - int nQuHtPu = nodesQuPuBM.andCardinality(nodesHtBM); - if (nQuHtPu > 0) { - double cprQuHtPu = Math.pow(1-prFalseNegative, nQuHtPu); - LOG.info(" nQuHtPu="+nQuHtPu+" pr= "+cprQuHtPu); - logp += Math.log(cprQuHtPu); - } - - - // --- - // *** Hidden/Target is FALSE - // --- - - // F,T - - // P(qi=TRUE | hi=FALSE, p(qi)=TRUE) = FP // e.g. 0.001 - // TODO: should check c(qi), and negation flows in the other direction - // note that if Q=t, then P=t, hence we use Qt - int nQtHfPt = nodesQtBM.andCardinality(nodesHfBM); - if (nQtHfPt > 0) { - double cprQtHfPt = Math.pow(prFalsePositive, nQtHfPt); - LOG.info(" nQtHfPt="+nQtHfPt+" pr= "+cprQtHfPt); - logp += Math.log(cprQtHfPt); - } - // P(qi=FALSE | hi=FALSE, p(qi)=TRUE) = 1-(FP+TRUEMISS) // keep this high - int nQfHfPt = nodesQfPtBM.andCardinality(nodesHfBM); - if (nQfHfPt > 0) { - double cprQfHfPt = Math.pow(pprQfHfPt, nQfHfPt); - LOG.info(" nQfHfPt="+nQfHfPt+" pr= "+cprQfHfPt); - logp += Math.log(cprQfHfPt); - } - - - // P(qi=UNK | hi=FALSE, p(qi)=TRUE) = TRUEMISS // e.g. 0.05 - int nQuHfPt = nodesQuPtBM.andCardinality(nodesHfBM); - if (nQuHfPt > 0) { - double cprQuHfPt = Math.pow(prTrueMiss, nQuHfPt); - LOG.info(" nQuHfPt="+nQuHfPt+" pr= "+cprQuHfPt); - logp += Math.log(cprQuHfPt); - } - - // F,F - - // P(qi=TRUE | hi=FALSE, p(qi)=FALSE) = 0 - // P(qi=FALSE | hi=FALSE, p(qi)=FALSE) = 1 - // P(qi=UNK | hi=FALSE, p(qi)=FALSE) = 0 - - // F,U - - // P(qi=TRUE | hi=FALSE, p(qi)=UNK) = 0 - // P(qi=FALSE | hi=FALSE, p(qi)=UNK) = 1-TRUEMISS - int nQfHfPu = nodesQfPuBM.andCardinality(nodesHfBM); - if (nQfHfPu > 0) { - double cprQfHfPu = Math.pow(1-prTrueMiss, nQfHfPu); - LOG.info(" nQfHfPu="+nQfHfPu+" pr= "+cprQfHfPu); - logp += Math.log(cprQfHfPu); - } - - - // P(qi=UNK | hi=FALSE, p(qi)=UNK) = TRUEMISS // e.g. - int nQuHfPu = nodesQuPuBM.andCardinality(nodesHfBM); - if (nQuHfPu > 0) { - double cprQuHfPu = Math.pow(prTrueMiss, nQuHfPu); - LOG.info(" nQuHfPu="+nQuHfPu+" pr= "+cprQuHfPu); - logp += Math.log(cprQuHfPu); - } - - - // --- - // Hidden/Target is UNKNOWN (aka FALSE') - // --- - // 'unknown' for a hidden state makes no sense; also it would introduce combinatorial explosions. - // here we interpret the 3rd state as being logically FALSE, but as being false in a non-obvious way, with lower penalties for - // not observing the falseness - - // ** U,T - - // P(qi=TRUE | hi=UNK, p(qi)=TRUE) = FP' // > FP (it's more likely to make a false call if it's non-obvious) - // note that if Q=t, then P=t, hence we use Qt - int nQtHuPt = nodesQtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); - if (nQtHuPt > 0) { - double cprQtHuPt = Math.pow(prWeakFalsePositive, nQtHuPt); - LOG.info(" nQtHuPt="+nQtHuPt+" pr= "+cprQtHuPt); - logp += Math.log(cprQtHuPt); - } - // P(qi=FALSE | hi=UNK, p(qi)=TRUE) = 1-(FP' + TRUEMISS') - int nQfHuPt = nodesQfPtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); - if (nQfHuPt > 0) { - double cprQfHuPt = Math.pow(1-(prWeakFalsePositive + prWeakTrueMiss), nQfHuPt); - LOG.info(" nQfHuPt="+nQfHuPt+" pr= "+cprQfHuPt); - logp += Math.log(cprQfHuPt); - } - // P(qi=UNK | hi=UNK, p(qi)=TRUE) = TRUEMISS' // > TRUEMISS (it's more likely to miss a non-obvious absence than an obvious absence) - int nQuHuPt = nodesQuPtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); - if (nQuHuPt > 0) { - double cprQuHuPt = Math.pow(prWeakTrueMiss, nQuHuPt); - LOG.info(" nQuHuPt="+nQuHuPt+" pr= "+cprQuHuPt); - logp += Math.log(cprQuHuPt); - } - - // ** U,F - - // P(qi=TRUE | hi=UNK, p(qi)=FALSE) = 0 - // P(qi=FALSE | hi=UNK, p(qi)=FALSE) = 1 - // P(qi=UNK | hi=UNK, p(qi)=FALSE) = 0 - - // ** U,U - - // P(qi=TRUE | hi=UNK, p(qi)=UNK) = 0 - // P(qi=FALSE | hi=UNK, p(qi)=UNK) = 1 - TRUEMISS' - int nQfHuPu = nodesQfPuBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); - if (nQfHuPu > 0) { - double cprQfHuPu = Math.pow(1- prWeakTrueMiss, nQfHuPu); - LOG.info(" nQfHuPu="+nQfHuPu+" pr= "+cprQfHuPu); - logp += Math.log(cprQfHuPu); - } - // P(qi=UNK | hi=UNK, p(qi)=UNK) = TRUEMISS' - int nQuHuPu = nodesQuPuBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); - if (nQuHuPu > 0) { - double cprQuHuPu = Math.pow(prWeakTrueMiss, nQuHuPu); - LOG.info(" nQuHuPu="+nQuHuPu+" pr= "+cprQuHuPu); - logp += Math.log(cprQuHuPu); - } - - - double p = Math.exp(logp); - pvector[n] = p; - indArr[n] = itemId; - sumOfProbs += p; - n++; - LOG.info("logp for "+itemId+" = "+logp+" sumOfLogProbs="+sumOfProbs); - } - for (n = 0; n nodesWithOnParents = new HashSet(); + + // there may be more efficient ways of doing this, but this is + // called once at the start of the search... + for (String cid : knowledgeBase.getClassIdsInSignature()) { + int cix = knowledgeBase.getClassIndex(cid); + EWAHCompressedBitmap supsBM = knowledgeBase.getDirectSuperClassesBM(cid); + int nParents = supsBM.cardinality(); + if (supsBM.andCardinality(onQueryNodesBM) == nParents) { + nodesWithOnParents.add(cix); + } + } + + return onQueryNodesBM.or(EWAHUtils.convertIndexSetToBitmap(nodesWithOnParents)); + } + + // any negated query node that has at least one negated parent; + // these are counted as no-transition + private EWAHCompressedBitmap getQueryNegatedNoTransition(EWAHCompressedBitmap negatedQueryProfileBM) { + Set nodes = new HashSet(); + + // there may be more efficient ways of doing this, but this is + // called once at the start of the search... + for (int cix : negatedQueryProfileBM.getPositions()) { + EWAHCompressedBitmap supsBM = knowledgeBase.getDirectSuperClassesBM(cix); + int nParents = supsBM.cardinality(); + if (supsBM.andCardinality(negatedQueryProfileBM) > 0) { + nodes.add(cix); + } + } + + return EWAHUtils.convertIndexSetToBitmap(nodes); + } + + /** + * @param q + * @return match profile containing probabilities of each individual + */ + public MatchSet findMatchProfileImpl(ProfileQuery q) { + + // double fpr = getFalsePositiveRate(); + // double fnr = getFalseNegativeRate(); + double sumOfProbs = 0.0; + + EWAHCompressedBitmap nodesQtBM = getProfileBM(q); + EWAHCompressedBitmap nodesQfBM = getNegatedProfileBM(q); + + // first, given a query (on and off states), + // group all nodes according to transitions from parent node + + // nomenclature: QUERY {unk,true,false} PARENTS {unk,true,false}+ + // multiple values taken as union + + // uncommitted nodes with a true parent (trans) + Set nodesQuPt = new HashSet(); + + // uncommitted nodes with an uncommitted parent (no trans) + Set nodesQuPu = new HashSet(); + + // off nodes with on or uncommitted parent (trans) + Set nodesQfPtu = new HashSet(); + + // off nodes with uncommitted parent + Set nodesQfPu = new HashSet(); + + // calculate transitions for all query nodes + /* + * if Q=t, then ALL parents MUST be t (NO transitions) if Q=u, then + * EITHER ALL parents ARE t : TRANSITION T->U AT LEAST ONE parent is=u : + * NO TRANSITION U->U NO PARENT is f if Q=f, then EITHER ONE parent IS f + * : NO TRANSITION F->F ALL parents ARE t : TRANSITION T->F ELSE : + * TRANSITION U->F + */ + for (String cid : knowledgeBase.getClassIdsInSignature()) { + int cix = knowledgeBase.getClassIndex(cid); + if (nodesQtBM.getPositions().contains(cix)) { + // state T, transition must be T->T + continue; + } + EWAHCompressedBitmap parentsBM = knowledgeBase.getDirectSuperClassesBM(cix); + if (nodesQfBM.getPositions().contains(cix)) { + // state = F + if (parentsBM.andCardinality(nodesQfBM) == 0) { + // transition T,U -> F + nodesQfPtu.add(cix); + } else { + // F->F + } + } else { + // state = U + if (parentsBM.andCardinality(nodesQtBM) < parentsBM.cardinality()) { + // transition T -> U ( F->U is impossible ) + nodesQuPt.add(cix); + } else { + // U->U + nodesQuPu.add(cix); + } + } + + } + EWAHCompressedBitmap nodesQuPtBM = EWAHUtils.convertIndexSetToBitmap(nodesQuPt); + EWAHCompressedBitmap nodesQuPuBM = EWAHUtils.convertIndexSetToBitmap(nodesQuPu); + EWAHCompressedBitmap nodesQfPtBM = EWAHUtils.convertIndexSetToBitmap(nodesQfPtu); + EWAHCompressedBitmap nodesQfPuBM = EWAHUtils.convertIndexSetToBitmap(nodesQfPu); + + // include subclasses + + EWAHCompressedBitmap queryNegatedNoTransitionBM = getQueryNegatedNoTransition(nodesQfBM); + EWAHCompressedBitmap queryNegatedWithTransitionBM = nodesQfBM.andNot(queryNegatedNoTransitionBM); + + MatchSet mp = MatchSetImpl.create(q); + + List indIds = getFilteredIndividualIds(q.getFilter()); + + double pvector[] = new double[indIds.size()]; + String indArr[] = new String[indIds.size()]; + int n = 0; + + // pr(Q=f | H=t) + double prFalseNegative = 0.000001; + + // pr(Q=t | H=f) + double prFalsePositive = 0.00001; + + // pr(Q=u | H=t) -- like a weaker false negative + double prFalseMiss = 0.01; + + // pr(Q=u | H=f) -- failure to make a call when hidden is false + double prTrueMiss = 0.85; + + // double prWeakFalsePositive = prFalsePositive * 100; + // double prWeakFalsePositive = Math.exp(Math.log(prFalsePositive) /4 ); + double prWeakFalsePositive = 0.1; + + double pprQtHtPt = 1 - (prFalseNegative + prFalseMiss); + double pprQfHfPt = 1 - (prFalsePositive + prTrueMiss); + + // double prWeakTrueMiss = prTrueMiss * 2; // failure to make a call + // when hidden is non-obvious false + double prWeakTrueMiss = 0.85; + for (String itemId : indIds) { + EWAHCompressedBitmap nodesHtBM = knowledgeBase.getTypesBM(itemId); + + // EWAHCompressedBitmap nodesHfBM = + // knowledgeBase.getNegatedTypesBM(itemId); + // TODO: consider propagating down + EWAHCompressedBitmap nodesHfBM = knowledgeBase.getDirectNegatedTypesBM(itemId); + + // any node which has an off query parent is discounted + // EWAHCompressedBitmap maskedTargetProfileBM = + // nodesHtBM.and(queryBlanketProfileBM); + + LOG.info("TARGET PROFILE for " + itemId + " " + nodesHtBM); + + // cumulative log-probability + double logp = 0.0; + // 3^3=27 combos for q (query), h (hidden) and p (parents) + // with states t, f and u + + // --- + // *** Hidden/Target=TRUE + // --- + + // ** T,T + + // 1. P(qi=TRUE | hi=TRUE, p(qi)=TRUE) = 1-(FN + FALSEMISS) + // note that if Q=t and H=t then it's impossible for P=u OR P=f; + // hence we use QtBM + int nQtHtPt = nodesQtBM.andCardinality(nodesHtBM); + if (nQtHtPt > 0) { + double cprQtHtPt = Math.pow(pprQtHtPt, nQtHtPt); + LOG.info(" nQtHtPt=" + nQtHtPt + " pr= " + cprQtHtPt); + logp += Math.log(cprQtHtPt); + } + + // P(qi=FALSE | hi=TRUE, p(qi)=TRUE) = FN + // P(qi=FALSE | hi=TRUE, p(qi)=UNK) = FN + // note we can combine P(qi=FALSE | hi=TRUE) for any non-false + // parent setting + // hence we use QfBM + int nQfHtPt = nodesQfBM.andCardinality(nodesHtBM); + if (nQfHtPt > 0) { + double cprQfHtPt = Math.pow(prFalseNegative, nQfHtPt); + LOG.info(" nQfHtPt=" + nQfHtPt + " pr= " + cprQfHtPt); + logp += Math.log(cprQfHtPt); + } + + // P(qi=UNK | hi=TRUE, p(qi)=TRUE) = FALSEMISS + int nQuHtPt = nodesQuPtBM.andCardinality(nodesHtBM); + if (nQuHtPt > 0) { + double cprQuHtPt = Math.pow(prFalseMiss, nQuHtPt); + LOG.info(" nQuHtPt=" + nQuHtPt + " pr= " + cprQuHtPt); + logp += Math.log(cprQuHtPt); + } + + // ** T,F + // none of these contribute to the score + + // P(qi=TRUE | hi=TRUE, p(qi)=FALSE) = 0 + // P(qi=FALSE | hi=TRUE, p(qi)=FALSE) = 1 + // P(qi=UNK | hi=TRUE, p(qi)=FALSE) = 0 + + // T,U + + // P(qi=TRUE | hi=TRUE, p(qi)=UNK) = 0 + // * ALREADY COVERED IN ABOVE: P(qi=FALSE | hi=TRUE, p(qi)=UNK) = FN + + // P(qi=UNK | hi=TRUE, p(qi)=UNK) = 1-FN + int nQuHtPu = nodesQuPuBM.andCardinality(nodesHtBM); + if (nQuHtPu > 0) { + double cprQuHtPu = Math.pow(1 - prFalseNegative, nQuHtPu); + LOG.info(" nQuHtPu=" + nQuHtPu + " pr= " + cprQuHtPu); + logp += Math.log(cprQuHtPu); + } + + // --- + // *** Hidden/Target is FALSE + // --- + + // F,T + + // P(qi=TRUE | hi=FALSE, p(qi)=TRUE) = FP // e.g. 0.001 + // TODO: should check c(qi), and negation flows in the other + // direction + // note that if Q=t, then P=t, hence we use Qt + int nQtHfPt = nodesQtBM.andCardinality(nodesHfBM); + if (nQtHfPt > 0) { + double cprQtHfPt = Math.pow(prFalsePositive, nQtHfPt); + LOG.info(" nQtHfPt=" + nQtHfPt + " pr= " + cprQtHfPt); + logp += Math.log(cprQtHfPt); + } + // P(qi=FALSE | hi=FALSE, p(qi)=TRUE) = 1-(FP+TRUEMISS) // keep this + // high + int nQfHfPt = nodesQfPtBM.andCardinality(nodesHfBM); + if (nQfHfPt > 0) { + double cprQfHfPt = Math.pow(pprQfHfPt, nQfHfPt); + LOG.info(" nQfHfPt=" + nQfHfPt + " pr= " + cprQfHfPt); + logp += Math.log(cprQfHfPt); + } + + // P(qi=UNK | hi=FALSE, p(qi)=TRUE) = TRUEMISS // e.g. 0.05 + int nQuHfPt = nodesQuPtBM.andCardinality(nodesHfBM); + if (nQuHfPt > 0) { + double cprQuHfPt = Math.pow(prTrueMiss, nQuHfPt); + LOG.info(" nQuHfPt=" + nQuHfPt + " pr= " + cprQuHfPt); + logp += Math.log(cprQuHfPt); + } + + // F,F + + // P(qi=TRUE | hi=FALSE, p(qi)=FALSE) = 0 + // P(qi=FALSE | hi=FALSE, p(qi)=FALSE) = 1 + // P(qi=UNK | hi=FALSE, p(qi)=FALSE) = 0 + + // F,U + + // P(qi=TRUE | hi=FALSE, p(qi)=UNK) = 0 + // P(qi=FALSE | hi=FALSE, p(qi)=UNK) = 1-TRUEMISS + int nQfHfPu = nodesQfPuBM.andCardinality(nodesHfBM); + if (nQfHfPu > 0) { + double cprQfHfPu = Math.pow(1 - prTrueMiss, nQfHfPu); + LOG.info(" nQfHfPu=" + nQfHfPu + " pr= " + cprQfHfPu); + logp += Math.log(cprQfHfPu); + } + + // P(qi=UNK | hi=FALSE, p(qi)=UNK) = TRUEMISS // e.g. + int nQuHfPu = nodesQuPuBM.andCardinality(nodesHfBM); + if (nQuHfPu > 0) { + double cprQuHfPu = Math.pow(prTrueMiss, nQuHfPu); + LOG.info(" nQuHfPu=" + nQuHfPu + " pr= " + cprQuHfPu); + logp += Math.log(cprQuHfPu); + } + + // --- + // Hidden/Target is UNKNOWN (aka FALSE') + // --- + // 'unknown' for a hidden state makes no sense; also it would + // introduce combinatorial explosions. + // here we interpret the 3rd state as being logically FALSE, but as + // being false in a non-obvious way, with lower penalties for + // not observing the falseness + + // ** U,T + + // P(qi=TRUE | hi=UNK, p(qi)=TRUE) = FP' // > FP (it's more likely + // to make a false call if it's non-obvious) + // note that if Q=t, then P=t, hence we use Qt + int nQtHuPt = nodesQtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); + if (nQtHuPt > 0) { + double cprQtHuPt = Math.pow(prWeakFalsePositive, nQtHuPt); + LOG.info(" nQtHuPt=" + nQtHuPt + " pr= " + cprQtHuPt); + logp += Math.log(cprQtHuPt); + } + // P(qi=FALSE | hi=UNK, p(qi)=TRUE) = 1-(FP' + TRUEMISS') + int nQfHuPt = nodesQfPtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); + if (nQfHuPt > 0) { + double cprQfHuPt = Math.pow(1 - (prWeakFalsePositive + prWeakTrueMiss), nQfHuPt); + LOG.info(" nQfHuPt=" + nQfHuPt + " pr= " + cprQfHuPt); + logp += Math.log(cprQfHuPt); + } + // P(qi=UNK | hi=UNK, p(qi)=TRUE) = TRUEMISS' // > TRUEMISS (it's + // more likely to miss a non-obvious absence than an obvious + // absence) + int nQuHuPt = nodesQuPtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); + if (nQuHuPt > 0) { + double cprQuHuPt = Math.pow(prWeakTrueMiss, nQuHuPt); + LOG.info(" nQuHuPt=" + nQuHuPt + " pr= " + cprQuHuPt); + logp += Math.log(cprQuHuPt); + } + + // ** U,F + + // P(qi=TRUE | hi=UNK, p(qi)=FALSE) = 0 + // P(qi=FALSE | hi=UNK, p(qi)=FALSE) = 1 + // P(qi=UNK | hi=UNK, p(qi)=FALSE) = 0 + + // ** U,U + + // P(qi=TRUE | hi=UNK, p(qi)=UNK) = 0 + // P(qi=FALSE | hi=UNK, p(qi)=UNK) = 1 - TRUEMISS' + int nQfHuPu = nodesQfPuBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); + if (nQfHuPu > 0) { + double cprQfHuPu = Math.pow(1 - prWeakTrueMiss, nQfHuPu); + LOG.info(" nQfHuPu=" + nQfHuPu + " pr= " + cprQfHuPu); + logp += Math.log(cprQfHuPu); + } + // P(qi=UNK | hi=UNK, p(qi)=UNK) = TRUEMISS' + int nQuHuPu = nodesQuPuBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); + if (nQuHuPu > 0) { + double cprQuHuPu = Math.pow(prWeakTrueMiss, nQuHuPu); + LOG.info(" nQuHuPu=" + nQuHuPu + " pr= " + cprQuHuPu); + logp += Math.log(cprQuHuPu); + } + + double p = Math.exp(logp); + pvector[n] = p; + indArr[n] = itemId; + sumOfProbs += p; + n++; + LOG.info("logp for " + itemId + " = " + logp + " sumOfLogProbs=" + sumOfProbs); + } + for (n = 0; n < pvector.length; n++) { + double p = pvector[n] / sumOfProbs; + String id = indArr[n]; + String label = knowledgeBase.getLabelMapper().getArbitraryLabel(id); + mp.add(createMatch(id, label, p)); + } + mp.sortMatches(); + return mp; + } } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java index 57d51ae..009b47c 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java @@ -1,7 +1,5 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; -import javax.inject.Inject; - import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; /** @@ -10,11 +8,10 @@ */ public class NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher extends NaiveBayesFixedWeightTwoStateProfileMatcher { - @Inject private NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher(BMKnowledgeBase kb) { super(kb); } - + /** * @param kb * @return new instance @@ -23,15 +20,13 @@ public static NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher create(BMKnow return new NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher(kb); } - public boolean isUseBlanket() { return false; } - + @Override public String getShortName() { return "naive-bayes-fixed-weight-two-state-NOBLANKET"; } - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java index 9e5d464..15b8c06 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java @@ -4,8 +4,6 @@ import java.util.List; import java.util.Set; -import javax.inject.Inject; - import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -17,14 +15,14 @@ import com.googlecode.javaewah.EWAHCompressedBitmap; /** - * Calculate likelihood of query 'mutating' into target, assuming each - * node in the ontology is independent (after pre-computing ancestor nodes - * for query and target), using chain rule + * Calculate likelihood of query 'mutating' into target, assuming each node in + * the ontology is independent (after pre-computing ancestor nodes for query and + * target), using chain rule * * p(C1=c1) * p(C2=c2) * ... p(Cn=cn) * - * Where p(Ci=ci) takes on one of 4 possibilities, depending on state - * of query and state of target, corresponding to probability of misclassification. + * Where p(Ci=ci) takes on one of 4 possibilities, depending on state of query + * and state of target, corresponding to probability of misclassification. * * * @@ -44,12 +42,11 @@ private double defaultFalseNegativeRate = 0.10; // beta // TODO - replace when tetsing is over - //private double[] defaultFalsePositiveRateArr = new double[]{0.002}; - //private double[] defaultFalseNegativeRateArr = new double[] {0.10}; - private double[] defaultFalsePositiveRateArr = new double[]{1e-10,0.0005,0.001,0.005,0.01}; - private double[] defaultFalseNegativeRateArr = new double[] {1e-10,0.005,0.01,0.05,0.1,0.2,0.4,0.8,0.9}; + // private double[] defaultFalsePositiveRateArr = new double[]{0.002}; + // private double[] defaultFalseNegativeRateArr = new double[] {0.10}; + private double[] defaultFalsePositiveRateArr = new double[] { 1e-10, 0.0005, 0.001, 0.005, 0.01 }; + private double[] defaultFalseNegativeRateArr = new double[] { 1e-10, 0.005, 0.01, 0.05, 0.1, 0.2, 0.4, 0.8, 0.9 }; - @Inject protected NaiveBayesFixedWeightTwoStateProfileMatcher(BMKnowledgeBase kb) { super(kb); } @@ -72,10 +69,11 @@ public String getShortName() { } /** - * Extends the query profile - for every node c, all the direct parents of c are in - * the query profile, then add c to the query profile. + * Extends the query profile - for every node c, all the direct parents of c + * are in the query profile, then add c to the query profile. * - * We use this to reduce the size of the network when testing for probabilities + * We use this to reduce the size of the network when testing for + * probabilities * * TODO: fully evaluate the consequences of using this method * @@ -106,22 +104,21 @@ private EWAHCompressedBitmap getQueryBlanketBM(ProfileQuery q) { */ public MatchSet findMatchProfileImpl(ProfileQuery q) { - //double fpr = getFalsePositiveRate(); - //double fnr = getFalseNegativeRate(); + // double fpr = getFalsePositiveRate(); + // double fnr = getFalseNegativeRate(); double sumOfProbs = 0.0; EWAHCompressedBitmap queryProfileBM = getProfileBM(q); EWAHCompressedBitmap queryBlanketProfileBM = getQueryBlanketBM(q); - LOG.info("|OnQueryNodes|="+queryProfileBM.cardinality()); - LOG.info("|QueryNodesWithOnParents|="+queryBlanketProfileBM.cardinality()); + LOG.info("|OnQueryNodes|=" + queryProfileBM.cardinality()); + LOG.info("|QueryNodesWithOnParents|=" + queryBlanketProfileBM.cardinality()); - - //int numClassesConsidered = knowledgeBase.getClassIdsInSignature().size(); + // int numClassesConsidered = + // knowledgeBase.getClassIdsInSignature().size(); int numClassesConsidered; if (isUseBlanket()) { numClassesConsidered = queryBlanketProfileBM.cardinality(); - } - else { + } else { numClassesConsidered = knowledgeBase.getClassIdsInSignature().size(); } @@ -131,43 +128,45 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { double pvector[] = new double[indIds.size()]; String indArr[] = new String[indIds.size()]; - int n=0; + int n = 0; for (String itemId : indIds) { EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); // any node which has an off query parent is discounted targetProfileBM = targetProfileBM.and(queryBlanketProfileBM); - LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); - + LOG.debug("TARGET PROFILE for " + itemId + " " + targetProfileBM); // two state model. // mapping to Bauer et al: these correspond to mxy1, x=Q, y=H/T int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); int numInQueryAndNOTInTarget = queryProfileBM.andNotCardinality(targetProfileBM); int numNOTInQueryAndInTarget = targetProfileBM.andNotCardinality(queryProfileBM); - int numNOTInQueryAndNOTInTarget = - numClassesConsidered - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); + int numNOTInQueryAndNOTInTarget = numClassesConsidered + - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); double p = 0.0; // TODO: optimize this - // integrate over a Dirichlet prior for alpha & beta, rather than gridsearch + // integrate over a Dirichlet prior for alpha & beta, rather than + // gridsearch // this can be done closed-form for (double fnr : defaultFalseNegativeRateArr) { for (double fpr : defaultFalsePositiveRateArr) { - double pQ1T1 = Math.pow(1-fnr, numInQueryAndInTarget); - double pQ0T1 = Math.pow(fnr, numNOTInQueryAndInTarget); - double pQ1T0 = Math.pow(fpr, numInQueryAndNOTInTarget); - double pQ0T0 = Math.pow(1-fpr, numNOTInQueryAndNOTInTarget); - - - - //LOG.debug("pQ1T1 = "+(1-fnr)+" ^ "+ numInQueryAndInTarget+" = "+pQ1T1); - //LOG.debug("pQ0T1 = "+(fnr)+" ^ "+ numNOTInQueryAndInTarget+" = "+pQ0T1); - //LOG.debug("pQ1T0 = "+(fpr)+" ^ "+ numInQueryAndNOTInTarget+" = "+pQ1T0); - //LOG.debug("pQ0T0 = "+(1-fpr)+" ^ "+ numNOTInQueryAndNOTInTarget+" = "+pQ0T0); - //TODO: optimization. We can precalculate the logs for different integers - p += - Math.exp(Math.log(pQ1T1) + Math.log(pQ0T1) + Math.log(pQ1T0) + Math.log(pQ0T0)); + double pQ1T1 = Math.pow(1 - fnr, numInQueryAndInTarget); + double pQ0T1 = Math.pow(fnr, numNOTInQueryAndInTarget); + double pQ1T0 = Math.pow(fpr, numInQueryAndNOTInTarget); + double pQ0T0 = Math.pow(1 - fpr, numNOTInQueryAndNOTInTarget); + + // LOG.debug("pQ1T1 = "+(1-fnr)+" ^ "+ + // numInQueryAndInTarget+" = "+pQ1T1); + // LOG.debug("pQ0T1 = "+(fnr)+" ^ "+ + // numNOTInQueryAndInTarget+" = "+pQ0T1); + // LOG.debug("pQ1T0 = "+(fpr)+" ^ "+ + // numInQueryAndNOTInTarget+" = "+pQ1T0); + // LOG.debug("pQ0T0 = "+(1-fpr)+" ^ "+ + // numNOTInQueryAndNOTInTarget+" = "+pQ0T0); + // TODO: optimization. We can precalculate the logs for + // different integers + p += Math.exp(Math.log(pQ1T1) + Math.log(pQ0T1) + Math.log(pQ1T0) + Math.log(pQ0T0)); } } @@ -175,9 +174,9 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { indArr[n] = itemId; sumOfProbs += p; n++; - LOG.debug("p for "+itemId+" = "+p); + LOG.debug("p for " + itemId + " = " + p); } - for (n = 0; n indIds = getFilteredIndividualIds(q.getFilter()); double sumOfProbs = 0; double[] pvector = new double[indIds.size()]; - String[] indIdsVector = new String[indIds.size()]; + String[] indIdsVector = new String[indIds.size()]; int localItemIndex = 0; for (String itemId : indIds) { LOG.info(itemId); EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - // TODO - should not need this; tvector already used to calculate likelihoods + // TODO - should not need this; tvector already used to calculate + // likelihoods int[] tvector = bmToVector(targetProfileBM, knowledgeBase.getNumClassNodes()); int j = knowledgeBase.getIndividualIndex(itemId); double[] likelihoodsForItem = likelihoods[j]; double logpsum = 0; - for (int i=0; i qClassIds = q.getQueryClassIds(); int qsize = qClassIds.size(); queryClassArray = qClassIds.toArray(new String[qsize]); - + // array (in same order as queryClassArray) in which each element // is the set of superclasses of the indexed class EWAHCompressedBitmap queryProfileBMArr[] = getProfileSetBM(queryClassArray); EWAHCompressedBitmap queryProfileBM = getProfileBM(q); - MatchSet mp = MatchSetImpl.create(q); - + MatchSet mp = MatchSetImpl.create(q); + // --- // calculate optimal match, based on matching of profile to itself; - // has two components, maxIC and average of each phenotype in profile to itself + // has two components, maxIC and average of each phenotype in profile to + // itself double maxScoreOfOptimalTarget = getScore(queryProfileBM, queryProfileBM); double avgScoreOfOptimalTarget = 0; - - for (int j = 0; j indIds = getFilteredIndividualIds(q.getFilter()); for (String itemId : indIds) { EWAHCompressedBitmap targetProfileBM = getTypesBM(itemId); - + // calculate maximum IC double maxScore = getScore(queryProfileBM, targetProfileBM); - + EWAHCompressedBitmap targetProfileDirectBM = getDirectTypesBM(itemId); int tsize = targetProfileDirectBM.cardinality(); - + // note: this is an experimental implementation that // does not make use of a MICA cache; it may be replaced by // a version that uses a cache later. double score = 0; // find best match for every class j in query profile - for (int j = 0; j targetToQueryCache; + private Map targetToQueryCache; - @Inject private ThreeStateBayesianNetworkProfileMatcher(BMKnowledgeBase kb) { super(kb); } @@ -66,16 +63,17 @@ public void precompute() { } targetToQueryCache = new HashMap(); } - + public class BitMapPair { public final EWAHCompressedBitmap bm1; public final EWAHCompressedBitmap bm2; - + public BitMapPair(EWAHCompressedBitmap bm1, EWAHCompressedBitmap bm2) { super(); this.bm1 = bm1; this.bm2 = bm2; } + @Override public int hashCode() { final int prime = 31; @@ -85,6 +83,7 @@ public int hashCode() { result = prime * result + ((bm2 == null) ? 0 : bm2.hashCode()); return result; } + @Override public boolean equals(Object obj) { if (this == obj) @@ -108,15 +107,13 @@ public boolean equals(Object obj) { return false; return true; } + private ThreeStateBayesianNetworkProfileMatcher getOuterType() { return ThreeStateBayesianNetworkProfileMatcher.this; } - - + } - - - + /** * note that this is exposed primarily for debugging purposes * @@ -140,11 +137,11 @@ public void calculateConditionalProbabilities(BMKnowledgeBase kb) throws Incoher /** * @param q * @return match profile containing probabilities of each individual - * @throws IncoherentStateException + * @throws IncoherentStateException */ public MatchSet findMatchProfileImpl(ProfileQuery q) throws IncoherentStateException { precompute(); - + boolean isUseNegation = q instanceof QueryWithNegation; if (!isUseNegation) { LOG.error("Consider using TwoState BN, this will be inefficient"); @@ -152,20 +149,19 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) throws IncoherentStateExcep EWAHCompressedBitmap negatedQueryProfileBM = null; Set negatedQueryClassIds = null; - //double fpr = getFalsePositiveRate(); - //double fnr = getFalseNegativeRate(); + // double fpr = getFalsePositiveRate(); + // double fnr = getFalseNegativeRate(); double sumOfProbs = 0.0; - //int numClasses = knowledgeBase.getClassIdsInSignature().size(); - //EWAHCompressedBitmap queryProfileBM = getProfileBM(q); - // EWAHCompressedBitmap negatedQueryProfileBM = null; + // int numClasses = knowledgeBase.getClassIdsInSignature().size(); + // EWAHCompressedBitmap queryProfileBM = getProfileBM(q); + // EWAHCompressedBitmap negatedQueryProfileBM = null; if (isUseNegation) { LOG.info("Using QueryWithNegation"); - QueryWithNegation nq = (QueryWithNegation)q; + QueryWithNegation nq = (QueryWithNegation) q; negatedQueryProfileBM = getDirectNegatedProfileBM(nq); negatedQueryClassIds = knowledgeBase.getClassIds(negatedQueryProfileBM); LOG.info("nqp=" + negatedQueryProfileBM); - } - else { + } else { LOG.info("Not using QueryWithNegation"); } @@ -176,24 +172,24 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) throws IncoherentStateExcep double pvector[] = new double[indIds.size()]; String indArr[] = new String[indIds.size()]; - int n=0; + int n = 0; for (String itemId : indIds) { EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); EWAHCompressedBitmap negatedTargetProfileBM = knowledgeBase.getNegatedTypesBM(itemId); - LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); + LOG.debug("TARGET PROFILE for " + itemId + " " + targetProfileBM); Calculator calc = new Calculator(targetProfileBM, negatedTargetProfileBM); - //double p = calculateProbability(queryClassIds, targetProfileBM); + // double p = calculateProbability(queryClassIds, targetProfileBM); double p = calc.calculateProbability(queryClassIds, negatedQueryClassIds); pvector[n] = p; indArr[n] = itemId; sumOfProbs += p; n++; - LOG.info("p for "+itemId+" = "+p); + LOG.info("p for " + itemId + " = " + p); } - for (n = 0; n queryClassIds, - Set negatedQueryClassIds) throws IncoherentStateException { + public double calculateProbability(Set queryClassIds, Set negatedQueryClassIds) + throws IncoherentStateException { double cump = 1.0; // treat set of query class Ids as a leaf node that is the // class intersection of all members; ie q1^...^qn // for a class intersection, the CPT is always such that - // Pr=1.0, if all parents=1 - // Pr=0.0 otherwise + // Pr=1.0, if all parents=1 + // Pr=0.0 otherwise for (String queryClassId : queryClassIds) { - //LOG.info("+Q"+queryClassId); + // LOG.info("+Q"+queryClassId); double p = calculateProbability(queryClassId).prOn; cump *= p; } if (negatedQueryClassIds != null) { // TODO: prOff=0 for (String negatedQueryClassId : negatedQueryClassIds) { - LOG.info("-Q"+negatedQueryClassId); + LOG.info("-Q" + negatedQueryClassId); double p = calculateProbability(negatedQueryClassId).prOff; - LOG.info(" prOff="+p); + LOG.info(" prOff=" + p); cump *= p; } } @@ -279,54 +273,51 @@ public double calculateProbability(Set queryClassIds, * @param queryClassId * @param targetProfileBM * @return probability - * @throws IncoherentStateException + * @throws IncoherentStateException */ private NodeProbabilities calculateProbability(String queryClassId) throws IncoherentStateException { BMKnowledgeBase kb = getKnowledgeBase(); int qcix = kb.getClassIndex(queryClassId); - return calculateProbability(qcix); + return calculateProbability(qcix); } - /** - * Pr(Child = on | P) = SUM[0 pixs = kb.getDirectSuperClassesBM(qcix).getPositions(); NodeProbabilities[] parentOnProbs = new NodeProbabilities[pixs.size()]; LOG.debug("calculating for parents"); - for (int i=0; i lines = FileUtils.readLines(f); - for (String line : lines) { - String[] vals = line.split("\t", 2); - String[] terms = vals[1].split(";"); - for (String t : terms) { - addInstanceOf(vals[0], t); - } - } - Preconditions.checkNotNull(owlDataOntology); - } - - public void loadDataFromTsvGzip(String path) throws OWLOntologyCreationException, IOException { - GZIPInputStream gis = new GZIPInputStream(new FileInputStream(path)); - BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); - String line; - while ((line = bf.readLine()) != null) { - String[] vals = line.split("\t", 2); - String[] terms = vals[1].split(";"); - for (String t : terms) { - addInstanceOf(vals[0], t); - } - } - Preconditions.checkNotNull(owlDataOntology); - } - - - - private IRI getIRI(String id) { - // TODO - use json-ld - if (id.contains(":")) { - return IRI.create("http://purl.obolibrary.org/obo/" + id.replace(":", "_")); - } else { - return IRI.create(id); - } - } - - private void mergeOntology(OWLOntology o) { - if (owlOntology == null) { - LOG.info("Ont ontology=" + o); - owlOntology = o; - } else { - LOG.info("Merging ont axioms from=" + o); - owlOntology.getOWLOntologyManager().addAxioms(owlOntology, o.getAxioms()); - } - } - - private void addInstanceOf(String i, String c) { - if (owlDataOntology == null) { - owlDataOntology = owlOntology; - } - OWLDataFactory f = manager.getOWLDataFactory(); - OWLClassAssertionAxiom ax = - f.getOWLClassAssertionAxiom(f.getOWLClass(getIRI(c)), f.getOWLNamedIndividual(getIRI(i))); - manager.addAxiom(owlOntology, ax); - } - - - private void mergeData(OWLOntology o) { - if (owlDataOntology == null) { - LOG.info("Data ontology=" + o); - owlDataOntology = o; - } else { - LOG.info("Merging data axioms from=" + o); - owlDataOntology.getOWLOntologyManager().addAxioms(owlDataOntology, o.getAxioms()); - } - } - - private OWLOntologyManager getOWLOntologyManager() { - if (manager == null) - manager = OWLManager.createOWLOntologyManager(); - return manager; - } - - /** - * @return handle for a Bitmap-based Knowledge Base - */ - public BMKnowledgeBase createKnowledgeBaseInterface() { - // TODO: use factories, or injection - return BMKnowledgeBaseOWLAPIImpl.create(owlOntology, owlDataOntology, owlReasonerFactory, - curieUtil); - } - + private Logger LOG = Logger.getLogger(OWLLoader.class); + + OWLOntologyManager manager; + OWLOntology owlOntology; + OWLOntology owlDataOntology; + OWLReasoner owlReasoner; + OWLReasonerFactory owlReasonerFactory = new ElkReasonerFactory(); + CurieUtil curieUtil = new CurieUtil(new HashMap()); + + /** + * @param iri + * @return OWL Ontology + * @throws OWLOntologyCreationException + */ + public OWLOntology loadOWL(IRI iri) throws OWLOntologyCreationException { + return getOWLOntologyManager().loadOntology(iri); + } + + /** + * @param file + * @return OWL Ontology + * @throws OWLOntologyCreationException + */ + public OWLOntology loadOWL(File file) throws OWLOntologyCreationException { + IRI iri = IRI.create(file); + return getOWLOntologyManager().loadOntologyFromOntologyDocument(iri); + } + + /** + * Loads an OWL ontology from a URI or file + * + * @param path + * @return OWL Ontology + * @throws OWLOntologyCreationException + */ + public OWLOntology loadOWL(String path) throws OWLOntologyCreationException { + if (path.startsWith("http")) { + return loadOWL(IRI.create(path)); + } else { + File file = new File(path); + return loadOWL(file); + } + } + + /** + * @param iri + * @throws OWLOntologyCreationException + */ + public void load(IRI iri) throws OWLOntologyCreationException { + owlOntology = getOWLOntologyManager().loadOntology(iri); + Preconditions.checkNotNull(owlOntology); + } + + /** + * @param file + * @throws OWLOntologyCreationException + */ + public void load(File file) throws OWLOntologyCreationException { + owlOntology = loadOWL(file); + Preconditions.checkNotNull(owlOntology); + } + + public void loadGzippdOntology(String path) + throws FileNotFoundException, IOException, OWLOntologyCreationException { + GZIPInputStream gis = new GZIPInputStream(new FileInputStream(path)); + BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); + owlOntology = getOWLOntologyManager().loadOntologyFromOntologyDocument(gis); + Preconditions.checkNotNull(owlOntology); + } + + /** + * Loads an OWL ontology from a URI or file + * + * @param path + * @throws OWLOntologyCreationException + */ + public void load(String path) throws OWLOntologyCreationException { + owlOntology = loadOWL(path); + Preconditions.checkNotNull(owlOntology); + } + + /** + * Loads OWL ontologies from a URI or file + * + * @param path + * @throws OWLOntologyCreationException + */ + public void loadOntologies(String... paths) throws OWLOntologyCreationException { + for (String path : paths) + mergeOntology(loadOWL(path)); + Preconditions.checkNotNull(owlOntology); + } + + /** + * Loads an OWL ontology from a URI or file + * + * @param path + * @throws OWLOntologyCreationException + */ + public void loadData(String... paths) throws OWLOntologyCreationException { + for (String path : paths) + mergeData(loadOWL(path)); + Preconditions.checkNotNull(owlDataOntology); + } + + public void loadDataFromTsv(String path) throws OWLOntologyCreationException, IOException { + File f = new File(path); + // Files.readLines(f, Charset.defaultCharset()); + List lines = FileUtils.readLines(f); + for (String line : lines) { + String[] vals = line.split("\t", 2); + String[] terms = vals[1].split(";"); + for (String t : terms) { + addInstanceOf(vals[0], t); + } + } + Preconditions.checkNotNull(owlDataOntology); + } + + public void loadDataFromTsvGzip(String path) throws OWLOntologyCreationException, IOException { + GZIPInputStream gis = new GZIPInputStream(new FileInputStream(path)); + BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); + String line; + while ((line = bf.readLine()) != null) { + String[] vals = line.split("\t", 2); + String[] terms = vals[1].split(";"); + for (String t : terms) { + addInstanceOf(vals[0], t); + } + } + Preconditions.checkNotNull(owlDataOntology); + } + + private IRI getIRI(String id) { + // TODO - use json-ld + if (id.contains(":")) { + return IRI.create("http://purl.obolibrary.org/obo/" + id.replace(":", "_")); + } else { + return IRI.create(id); + } + } + + private void mergeOntology(OWLOntology o) { + if (owlOntology == null) { + LOG.info("Ont ontology=" + o); + owlOntology = o; + } else { + LOG.info("Merging ont axioms from=" + o); + owlOntology.getOWLOntologyManager().addAxioms(owlOntology, o.getAxioms()); + } + } + + private void addInstanceOf(String i, String c) { + if (owlDataOntology == null) { + owlDataOntology = owlOntology; + } + OWLDataFactory f = manager.getOWLDataFactory(); + OWLClassAssertionAxiom ax = f.getOWLClassAssertionAxiom(f.getOWLClass(getIRI(c)), + f.getOWLNamedIndividual(getIRI(i))); + manager.addAxiom(owlOntology, ax); + } + + private void mergeData(OWLOntology o) { + if (owlDataOntology == null) { + LOG.info("Data ontology=" + o); + owlDataOntology = o; + } else { + LOG.info("Merging data axioms from=" + o); + owlDataOntology.getOWLOntologyManager().addAxioms(owlDataOntology, o.getAxioms()); + } + } + + private OWLOntologyManager getOWLOntologyManager() { + if (manager == null) + manager = OWLManager.createOWLOntologyManager(); + return manager; + } + + /** + * @return handle for a Bitmap-based Knowledge Base + */ + public BMKnowledgeBase createKnowledgeBaseInterface() { + // TODO: use factories, or injection + return BMKnowledgeBaseOWLAPIImpl.create(owlOntology, owlDataOntology, owlReasonerFactory, curieUtil); + } } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java index 650f0bf..373390f 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java @@ -7,51 +7,53 @@ import org.monarchinitiative.owlsim.model.kb.Attribute; import org.monarchinitiative.owlsim.model.kb.Entity; -import com.google.inject.ImplementedBy; import com.googlecode.javaewah.EWAHCompressedBitmap; /** - * An interface to an ontology in which the fundamental unit of representation of - * a set of classes or a set of individuals (elements, items) is a Bitmap vector. + * An interface to an ontology in which the fundamental unit of representation + * of a set of classes or a set of individuals (elements, items) is a Bitmap + * vector. * *

KB formalism

*
    *
  • A KB is a collection of classes (features) and individuals (entities) *
  • Features are connected in a Directed Acyclic Graph (DAG) *
  • Each DAG has a single root, called owl:Thing - this is always included - *
  • Individuals can be described with one or more features. If an individual I is described using C, it is implicitly described by ancestors of C - *
  • Individuals can be also be described by negating one or more features, i.e. I not(C). If an individual is described using not(C), the this propagates to descendants of C + *
  • Individuals can be described with one or more features. If an individual + * I is described using C, it is implicitly described by ancestors of C + *
  • Individuals can be also be described by negating one or more features, + * i.e. I not(C). If an individual is described using not(C), the this + * propagates to descendants of C *
  • TODO: individuals can have one or more features associated by frequency *
* - * Note that OWLAPI terminology is used (e.g. superclasses), but this may be refactored in future to use neutral DAG terminology (e.g. ancestors) + * Note that OWLAPI terminology is used (e.g. superclasses), but this may be + * refactored in future to use neutral DAG terminology (e.g. ancestors) * - *

Mapping to Bitmap positions

- * Bitmap vectors are used for fast set-wise operations. - *
+ *

Mapping to Bitmap positions

Bitmap vectors are used for fast + * set-wise operations.
* - * Each class or individual is identified by a String identifier (e.g a CURIE), but behind the scenes, - * this is mapped to an integer denoting a position in the bitmap. A set of classes or - * a set of individuals can then be represented by a {@link EWAHCompressedBitmap}. - *
+ * Each class or individual is identified by a String identifier (e.g a CURIE), + * but behind the scenes, this is mapped to an integer denoting a position in + * the bitmap. A set of classes or a set of individuals can then be represented + * by a {@link EWAHCompressedBitmap}.
* - * Note that classes and individuals are mutually exclusive, so the ID to Index mapping - * is dependent on the datatype. - *
+ * Note that classes and individuals are mutually exclusive, so the ID to Index + * mapping is dependent on the datatype.
+ * + * Guarantee:Note that the index assigned to a class node is ordered + * according to informativeness. if Ix(C1) < Ix(C2), then + * Informativeness(C1) =< Informativeness(C2). Here Informativeness + * is the IC (with ties broken according to number of ancestors, with more + * ancestors being more informative). This means that an iterator starts with + * the classes with the lowest probability (and highest information content), + * allowing for fast MICA implementations. * - * Guarantee:Note that the index assigned to a class node is ordered according to - * informativeness. if Ix(C1) < Ix(C2), then Informativeness(C1) =< Informativeness(C2). - * Here Informativeness is the IC (with ties broken according to number of ancestors, - * with more ancestors being more informative). - * This means that an iterator starts with the classes - * with the lowest probability (and highest information content), allowing for - * fast MICA implementations. - * * The JavaEWAH library is used for fast bitmap operations. - *

Usage notes

- * Note that it is assumed that the ontology is static - most information is - * cached in-memory. If the underlying ontology changes, it is currently necessary to - * create a new KB object - there is no incremental change model + *

Usage notes

Note that it is assumed that the ontology is static - + * most information is cached in-memory. If the underlying ontology changes, it + * is currently necessary to create a new KB object - there is no incremental + * change model * *

Implementations

* @@ -60,19 +62,18 @@ * *

Labels and IDs

* - * A KB uses an internal integer to refer to all objects. For convenience, these can also - * be referred to by an optional String id, which follows whatever form the input source provides. + * A KB uses an internal integer to refer to all objects. For convenience, these + * can also be referred to by an optional String id, which follows whatever form + * the input source provides. * - * A separate {@link LabelMapper} is used to retrieve labels given an ID, or conversely to look - * up an ID given a label. + * A separate {@link LabelMapper} is used to retrieve labels given an ID, or + * conversely to look up an ID given a label. * * * @author cjm */ -@ImplementedBy(BMKnowledgeBaseOWLAPIImpl.class) public interface BMKnowledgeBase { - - + /** * Note: there can be >1 class in a node * @@ -93,16 +94,15 @@ * @return set of all individual identifiers */ public Set getIndividualIdsInSignature(); - + /** * @param individualId * @return */ public int getIndividualIndex(String individualId); - public String getIndividualId(int index); - + /** * TODO * @@ -124,7 +124,7 @@ * @return direct superclasses of classId as bitmap */ public EWAHCompressedBitmap getDirectSuperClassesBM(String classId); - + /** * @param classIds * @return union of all direct superclasses as a bitmap @@ -136,17 +136,17 @@ * @return subclasses (direct and indirect and equivalent) */ public EWAHCompressedBitmap getSubClasses(int classIndex); - + /** * @param classId * @return direct subclasses of classId as bitmap */ public EWAHCompressedBitmap getDirectSubClassesBM(String classId); - /** * @param classIds - * @return union of all superclasses (direct and indirect and equivalent) as a bitmap + * @return union of all superclasses (direct and indirect and equivalent) as + * a bitmap */ public EWAHCompressedBitmap getSubClassesBM(Set classIds); @@ -170,7 +170,8 @@ /** * @param classId - * @return superclasses (direct, indirect and equivalent) of classId as bitmap + * @return superclasses (direct, indirect and equivalent) of classId as + * bitmap */ public EWAHCompressedBitmap getSuperClassesBM(String classId); @@ -179,35 +180,38 @@ * @return union of all superclasses as a bitmap */ public EWAHCompressedBitmap getSuperClassesBM(Set classIds); - + /** * @param classIndex - * @return superclasses (direct and indirect and equivalent) of classId as bitmap + * @return superclasses (direct and indirect and equivalent) of classId as + * bitmap */ public EWAHCompressedBitmap getSuperClassesBM(int classIndex); /** - * @param classIds - * @return direct translation of a classId list to a bitmap - */ - public EWAHCompressedBitmap getClassesBM(Set classIds); + * @param classIds + * @return direct translation of a classId list to a bitmap + */ + public EWAHCompressedBitmap getClassesBM(Set classIds); - /** - * @param id - an individual + * @param id + * - an individual * @return types (direct and indirect) as bitmap */ public EWAHCompressedBitmap getTypesBM(String id); /** - * @param id - an individual + * @param id + * - an individual * @return direct types as bitmap */ public EWAHCompressedBitmap getDirectTypesBM(String id); - + /** * @param itemId - * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated + * @return bitmap representation of all (direct and indirect) classes known + * to be NOT instantiated */ public EWAHCompressedBitmap getNegatedTypesBM(String itemId); @@ -218,36 +222,44 @@ public EWAHCompressedBitmap getDirectNegatedTypesBM(String itemId); /** - * @param id - individual ID - * @param classId - the class with which to filter the classes mapped to the individual ID + * @param id + * - individual ID + * @param classId + * - the class with which to filter the classes mapped to the + * individual ID * @return */ public EWAHCompressedBitmap getFilteredDirectTypesBM(String id, String classId); /** - * @param ids - a set of class ids - * @param classId - the class with which to filter the class set - * @return a bitmap representation of only the original ids tha are subclasses of classId + * @param ids + * - a set of class ids + * @param classId + * - the class with which to filter the class set + * @return a bitmap representation of only the original ids tha are + * subclasses of classId */ public EWAHCompressedBitmap getFilteredDirectTypesBM(Set ids, String classId); - /** - * @param ids - a set of class ids - * @param classId - the class with which to filter the class set - * @return a bitmap representation of only the original ids tha are subclasses of classId + * @param ids + * - a set of class ids + * @param classId + * - the class with which to filter the class set + * @return a bitmap representation of only the original ids tha are + * subclasses of classId */ public EWAHCompressedBitmap getFilteredTypesBM(Set ids, String classId); - /** * @return utility object to map labels to ids */ public LabelMapper getLabelMapper(); - + /** - * Note: each index can correspond to multiple classes c1...cn if this set is an equivalence set. - * In this case the *representative* classId is returned + * Note: each index can correspond to multiple classes c1...cn if this set + * is an equivalence set. In this case the *representative* classId is + * returned * * @param index * @return classId @@ -263,25 +275,25 @@ /** * Return all classIds corresponding to a single index. * - * Note each index corresponds to a single equivalence set. This returns - * all members of the equivalence set + * Note each index corresponds to a single equivalence set. This returns all + * members of the equivalence set * * @param index * @return classIds */ public Set getClassIds(int index); - + /** * Return all classIds corresponding to a bitmap. * - * Note each index corresponds to a single equivalence set. This returns - * all members of the equivalence set + * Note each index corresponds to a single equivalence set. This returns all + * members of the equivalence set * * @param bm * @return classIds */ public Set getClassIds(EWAHCompressedBitmap bm); - + /** * Returns class ids in the specified ontology. * @@ -292,23 +304,25 @@ */ public Set getClassIdsByOntology(String ont); - /** - * @return array indexed by classIndex yielding the number of individuals per class + * @return array indexed by classIndex yielding the number of individuals + * per class */ public int[] getIndividualCountPerClassArray(); /** - * @param classId - an identifier for a class - * @return a bitmap representation of only the individuals that (directly or indirectly) - * instantiate classId + * @param classId + * - an identifier for a class + * @return a bitmap representation of only the individuals that (directly or + * indirectly) instantiate classId */ public EWAHCompressedBitmap getIndividualsBM(String classId); /** - * @param classIndex - index for a class - * @return a bitmap representation of only the individuals that (directly or indirectly) - * instantiate classId + * @param classIndex + * - index for a class + * @return a bitmap representation of only the individuals that (directly or + * indirectly) instantiate classId */ public EWAHCompressedBitmap getIndividualsBM(int classIndex); @@ -316,8 +330,8 @@ * @param individualId * @return property-value map */ - public Map> getPropertyValueMap(String individualId); - + public Map> getPropertyValueMap(String individualId); + /** * @param individualId * @param property @@ -339,10 +353,4 @@ */ public int getRootIndex(); - - - - - - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/KnowledgeBaseModule.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/KnowledgeBaseModule.java deleted file mode 100644 index c2047ba..0000000 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/KnowledgeBaseModule.java +++ /dev/null @@ -1,118 +0,0 @@ -package org.monarchinitiative.owlsim.kb; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.Collection; -import java.util.Map; -import java.util.Set; -import java.util.zip.GZIPInputStream; - -import javax.inject.Singleton; - -import org.apache.commons.validator.routines.UrlValidator; -import org.monarchinitiative.owlsim.kb.bindings.IndicatesDataTsvs; -import org.monarchinitiative.owlsim.kb.bindings.IndicatesOwlDataOntologies; -import org.monarchinitiative.owlsim.kb.bindings.IndicatesOwlOntologies; -import org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl; -import org.prefixcommons.CurieUtil; -import org.semanticweb.elk.owlapi.ElkReasonerFactory; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyCreationException; -import org.semanticweb.owlapi.model.OWLOntologyManager; -import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; - -import com.google.common.collect.ImmutableCollection; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.inject.AbstractModule; -import com.google.inject.Provides; - -/** - * TODO - rewrite this - * - * Reduce duplication of code with OWLLoader - * - */ -public class KnowledgeBaseModule extends AbstractModule { - - private final ImmutableCollection ontologyUris; - private final ImmutableCollection ontologyDataUris; - private final ImmutableCollection dataTsvs; - private final ImmutableMap curies; - private final UrlValidator urlValdiator = UrlValidator.getInstance(); - - public KnowledgeBaseModule(Collection ontologyUris, - Collection ontologyDataUris, - Set dataTsvs, - Map curies) { - this.ontologyUris = new ImmutableSet.Builder().addAll(ontologyUris).build(); - this.ontologyDataUris = new ImmutableSet.Builder().addAll(ontologyDataUris).build(); - this.dataTsvs = new ImmutableSet.Builder().addAll(dataTsvs).build(); - this.curies = new ImmutableMap.Builder().putAll(curies).build(); - } - - @Override - protected void configure() { - - bind(BMKnowledgeBase.class).to(BMKnowledgeBaseOWLAPIImpl.class).in(Singleton.class); - bind(OWLReasonerFactory.class).to(ElkReasonerFactory.class); - bind(CurieUtil.class).toInstance(new CurieUtil(curies)); -// bind(OWLOntologyManager.class).to(OWLOntologyManagerImpl.class); -// bind(ReadWriteLock.class).to(NoOpReadWriteLock.class); -// bind(OWLDataFactory.class).to(OWLDataFactoryImpl.class); - //bind(OWLOntologyManager.class).toInstance(OWLManager.createOWLOntologyManager()); - } - - OWLOntology loadOntology(OWLOntologyManager manager, String uri) throws OWLOntologyCreationException { - if (urlValdiator.isValid(uri)) { - return manager.loadOntology(IRI.create(uri)); - } else { - File file = new File(uri); - return manager.loadOntologyFromOntologyDocument(file); - } - } - - OWLOntology mergeOntologies(OWLOntologyManager manager, Collection uris) throws OWLOntologyCreationException, FileNotFoundException, IOException { - OWLOntology ontology = manager.createOntology(); - for (String uri: uris) { - OWLOntology loadedOntology; - if (uri.endsWith(".gz")) { - GZIPInputStream gis = new GZIPInputStream(new FileInputStream(uri)); - BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); - loadedOntology = manager.loadOntologyFromOntologyDocument(gis); - } - else { - loadedOntology = loadOntology(manager, uri); - } - manager.addAxioms(ontology, loadedOntology.getAxioms()); - } - return ontology; - } - - @Provides - @IndicatesOwlOntologies - @Singleton - OWLOntology getOwlOntologies(OWLOntologyManager manager) throws OWLOntologyCreationException, FileNotFoundException, IOException { - return mergeOntologies(manager, ontologyUris); - } - - @Provides - @IndicatesOwlDataOntologies - @Singleton - OWLOntology getOwlDataOntologies(OWLOntologyManager manager) throws OWLOntologyCreationException, FileNotFoundException, IOException { - return mergeOntologies(manager, ontologyDataUris); - } - - @Provides - @IndicatesDataTsvs - @Singleton - OWLOntology getDataTsvs(OWLOntologyManager manager) throws OWLOntologyCreationException, FileNotFoundException, IOException { - return mergeOntologies(manager, dataTsvs); - } - -} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java index 42e2ba8..7c22da3 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java @@ -10,15 +10,11 @@ import java.util.Set; import java.util.stream.Collectors; -import javax.inject.Inject; - import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.io.OWLLoader; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.CURIEMapper; import org.monarchinitiative.owlsim.kb.LabelMapper; -import org.monarchinitiative.owlsim.kb.bindings.IndicatesOwlDataOntologies; -import org.monarchinitiative.owlsim.kb.bindings.IndicatesOwlOntologies; import org.monarchinitiative.owlsim.kb.ewah.EWAHKnowledgeBaseStore; import org.monarchinitiative.owlsim.model.kb.Attribute; import org.monarchinitiative.owlsim.model.kb.Entity; @@ -70,8 +66,8 @@ /** * Implementation of {@link BMKnowledgeBase} that uses the OWLAPI. * - * An OWL reasoner is used. This guarantees the graphs is a DAG. (equivalence sets are mapped to the - * same node. subclass is mapped to DAG edges). + * An OWL reasoner is used. This guarantees the graphs is a DAG. (equivalence + * sets are mapped to the same node. subclass is mapped to DAG edges). * * See also: {@link OWLLoader} * @@ -82,1055 +78,1045 @@ */ public class BMKnowledgeBaseOWLAPIImpl implements BMKnowledgeBase { - private Logger LOG = Logger.getLogger(BMKnowledgeBaseOWLAPIImpl.class); - - private KBMetadata kbMetdata; - - private EWAHKnowledgeBaseStore ontoEWAHStore; - private OWLOntology owlOntology; - private OWLOntology owlDataOntology; - private OWLReasoner owlReasoner; - - private Map, Integer> classNodeToIntegerMap; - private Node[] classNodeArray; - private Map, Integer> individualNodeToIntegerMap; - private Node[] individualNodeArray; - - private Set> classNodes; - private Set> individualNodes; - - private Map> classToNodeMap; - private Map> individualToNodeMap; - // private Set classesInSignature; - private Set individualsInSignature; - private Map>> propertyValueMapMap; - Map> opposingClassMap = - new HashMap>(); - - private int[] individualCountPerClassArray; - - CURIEMapper curieMapper; - LabelMapper labelMapper; - CurieUtil curieUtil; - - /** - * @param owlOntology - * @param owlDataOntology TODO - fix this - * @param rf - */ - @Inject - public BMKnowledgeBaseOWLAPIImpl(@IndicatesOwlOntologies OWLOntology owlOntology, - @IndicatesOwlDataOntologies OWLOntology owlDataOntology, OWLReasonerFactory rf, - CurieUtil curieUtil) { - super(); - curieMapper = new CURIEMapperImpl(); - labelMapper = new LabelMapperImpl(curieMapper); - - this.owlOntology = owlOntology; - this.owlDataOntology = owlDataOntology; - if (owlDataOntology != null) { - translateFromDataOntology(); - } - this.owlReasoner = rf.createReasoner(owlOntology); - this.curieUtil = curieUtil; - createMap(); - ontoEWAHStore = new EWAHKnowledgeBaseStore(classNodes.size(), individualNodes.size()); - storeInferences(); - populateLabelsFromOntology(labelMapper, owlOntology); - if (owlDataOntology != null) { - LOG.info("Fetching labels from " + owlDataOntology); - // the data ontology may contain labels of data items - populateLabelsFromOntology(labelMapper, owlDataOntology); - } - } - - public static BMKnowledgeBase create(OWLOntology owlOntology, OWLReasonerFactory rf, - CurieUtil curieUtil) { - return new BMKnowledgeBaseOWLAPIImpl(owlOntology, null, rf, curieUtil); - } - - /** - * @param owlOntology - * @param owlDataOntology - * @param rf - * @return - */ - public static BMKnowledgeBase create(OWLOntology owlOntology, OWLOntology owlDataOntology, - OWLReasonerFactory rf, CurieUtil curieUtil) { - return new BMKnowledgeBaseOWLAPIImpl(owlOntology, owlDataOntology, rf, curieUtil); - } - - - - public KBMetadata getKbMetdata() { - return kbMetdata; - } - - - - public void setKbMetdata(KBMetadata kbMetdata) { - this.kbMetdata = kbMetdata; - } - - private String getShortForm(IRI iri) { - if (curieUtil.getCurieMap().isEmpty()) { - return iri.toString(); - } else { - Optional curie = curieUtil.getCurie(iri.toString()); - if (curie.isPresent()) { - return curie.get(); - } - else { - return iri.toString(); - } - } - } - - private void populateLabelsFromOntology(LabelMapper labelMapper, OWLOntology ontology) { - LOG.info("Populating labels from " + ontology); - int n = 0; - for (OWLAnnotationAssertionAxiom aaa : ontology.getAxioms(AxiomType.ANNOTATION_ASSERTION)) { - if (aaa.getProperty().isLabel()) { - if (aaa.getSubject() instanceof IRI && aaa.getValue() instanceof OWLLiteral) { - labelMapper.add(getShortForm((IRI) aaa.getSubject()), - ((OWLLiteral) aaa.getValue()).getLiteral()); - n++; - } - } - } - if (n == 0) { - LOG.info("Setting labels from fragments"); - Set objs = new HashSet(); - objs.addAll(ontology.getClassesInSignature()); - objs.addAll(ontology.getIndividualsInSignature()); - for (OWLNamedObject obj : objs) { - labelMapper.add(getShortForm(obj.getIRI()), obj.getIRI().getFragment()); - n++; - } - } - LOG.info("Label axioms mapped: " + n); - } - - /** - * @return utility object to map labels to ids - */ - public LabelMapper getLabelMapper() { - return labelMapper; - } - - /** - * @return set of all classes - */ - public Set getClassesInSignature() { - return classToNodeMap.keySet(); // TODO - consider optimizing - } - - /** - * @return set of all class identifiers - */ - public Set getClassIdsInSignature() { - Set ids = new HashSet(); - for (OWLClass i : getClassesInSignature()) { - ids.add(getShortForm(i.getIRI())); - } - return ids; - } - - public Set getClassIdsByOntology(String ont) { - return getClassIdsInSignature().stream().filter(x -> isIn(x, ont)).collect(Collectors.toSet()); - } - - /** - * @param id - * @param ont - * @return true if id is in ontology - */ - public boolean isIn(String id, String ont) { - // TODO - use curie util - return id.startsWith(ont+":") || id.contains("/"+ont+"_"); - } - - public int getNumClassNodes() { - return classNodeArray.length; - } - - - - /** - * @return set of all individual identifiers - */ - protected Set getIndividualsInSignature() { - return individualsInSignature; - } - - /** - * @return ids - */ - public Set getIndividualIdsInSignature() { - Set ids = new HashSet(); - for (OWLNamedIndividual i : getIndividualsInSignature()) { - ids.add(getShortForm(i.getIRI())); - } - return ids; - } - - - - /** - * @return OWLAPI representation of the ontology - */ - protected OWLOntology getOwlOntology() { - return owlOntology; - } - - // Assumption: data ontology includes ObjectPropertyAssertions - // TODO: make flexible - // TODO: extract associations - private void translateFromDataOntology() { - // TODO: allow other axiom types - for (OWLObjectPropertyAssertionAxiom opa : owlDataOntology - .getAxioms(AxiomType.OBJECT_PROPERTY_ASSERTION)) { - OWLIndividual obj = opa.getObject(); - if (obj instanceof OWLNamedIndividual) { - OWLClass type = getOWLDataFactory().getOWLClass(((OWLNamedIndividual) obj).getIRI()); - OWLClassAssertionAxiom ca = - getOWLDataFactory().getOWLClassAssertionAxiom(type, opa.getSubject()); - owlOntology.getOWLOntologyManager().addAxiom(owlOntology, ca); - } - } - } - - - // Each OWLClass and OWLIndividual is mapped to an Integer index - private void createMap() { - LOG.info("Creating mapping from ontology objects to integers"); - classNodes = new HashSet>(); - individualNodes = new HashSet>(); - Set classesInSignature; - classesInSignature = owlOntology.getClassesInSignature(true); - LOG.info("|classes|=" + classesInSignature.size()); - classesInSignature.add(getOWLThing()); - classesInSignature.remove(getOWLNothing()); - individualsInSignature = owlOntology.getIndividualsInSignature(true); - LOG.info("|individuals|=" + individualsInSignature.size()); - classToNodeMap = new HashMap>(); - individualToNodeMap = new HashMap>(); - classNodeToIntegerMap = new HashMap, Integer>(); - individualNodeToIntegerMap = new HashMap, Integer>(); - propertyValueMapMap = new HashMap>>(); - final HashMap, Integer> classNodeToFrequencyMap = - new HashMap, Integer>(); - final HashMap, Double> classNodeToFreqDepthMap = - new HashMap, Double>(); - for (OWLClass c : classesInSignature) { - if (owlReasoner.getInstances(c, false).isEmpty()) { - // TODO: deal with subclasses - // LOG.info("Skipping non-instantiated class: "+c); - // continue; - } - Node node = owlReasoner.getEquivalentClasses(c); - if (node.contains(getOWLNothing())) { - LOG.warn("Ignoring unsatisfiable class: " + c); - continue; - } - classNodes.add(node); - classToNodeMap.put(c, node); - int numAncNodes = owlReasoner.getSuperClasses(c, false).getNodes().size(); - int freq = owlReasoner.getInstances(c, false).getNodes().size(); - classNodeToFrequencyMap.put(node, freq); - - // freq depth is inversely correlated informativeness; - // frequency is primary measure (high freq = low informativeness); - // if frequency is tied, then tie is broken by number of ancestors - // (high ancestors = high informativeness) - // note that if frequency is not tied, then depth/ancestors should make - // no overall difference - we ensure this by taking the proportion of - // ancestor nodes divided by number of classes (there are always equal - // or more classes than nodes) - double freqDepth = freq + 1 - (numAncNodes / (double) classesInSignature.size()); - // LOG.info("freqDepth = "+freq+" "+freqDepth); - classNodeToFreqDepthMap.put(node, freqDepth); - } - - for (OWLNamedIndividual i : individualsInSignature) { - Node node = owlReasoner.getSameIndividuals(i); - individualNodes.add(node); - individualToNodeMap.put(i, node); - setPropertyValues(owlOntology, i); - if (owlDataOntology != null) - setPropertyValues(owlDataOntology, i); - } - - // Order class nodes such that LOW frequencies (HIGH Information Content) - // nodes are have LOWER indices - // TODO: use depth as a tie breaker - List> classNodesSorted = new ArrayList>(classNodes); - Collections.sort(classNodesSorted, new Comparator>() { - public int compare(Node n1, Node n2) { - double f1 = classNodeToFreqDepthMap.get(n1); - double f2 = classNodeToFreqDepthMap.get(n2); - if (f1 < f2) - return -1; - if (f1 > f2) - return 1; - return 0; - } - }); - int numClassNodes = classNodesSorted.size(); - classNodeArray = classNodesSorted.toArray(new Node[numClassNodes]); - individualCountPerClassArray = new int[numClassNodes]; - for (int i = 0; i < numClassNodes; i++) { - classNodeToIntegerMap.put(classNodeArray[i], i); - // LOG.info(classNodeArray[i] + " ix="+i + " - // FREQ="+classNodeToFrequencyMap.get(classNodeArray[i])); - // LOG.info(classNodeArray[i] + " ix="+i + " - // IX_REV="+classNodeToIntegerMap.get(classNodeArray[i])); - individualCountPerClassArray[i] = classNodeToFrequencyMap.get(classNodeArray[i]); - } - individualNodeArray = individualNodes.toArray(new Node[individualNodes.size()]); - for (int i = 0; i < individualNodes.size(); i++) { - individualNodeToIntegerMap.put(individualNodeArray[i], i); - } - - } - - - private void setPropertyValues(OWLOntology ont, OWLNamedIndividual i) { - Preconditions.checkNotNull(i); - Map> pvm = new HashMap>(); - String id = getShortForm(i.getIRI()); - propertyValueMapMap.put(id, pvm); - for (OWLIndividualAxiom ax : ont.getAxioms(i)) { - if (ax instanceof OWLPropertyAssertionAxiom) { - OWLPropertyAssertionAxiom paa = (OWLPropertyAssertionAxiom) ax; - OWLPropertyExpression p = paa.getProperty(); - if (p instanceof OWLObjectProperty || p instanceof OWLDataProperty) { - String pid; - if (p instanceof OWLObjectProperty) - pid = getShortForm(((OWLObjectProperty) p).getIRI()); - else - pid = getShortForm(((OWLDataProperty) p).getIRI()); - OWLPropertyAssertionObject obj = paa.getObject(); - if (obj instanceof OWLLiteral) { - addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); - } else if (obj instanceof OWLNamedIndividual) { - addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); - - } - - } else if (false) { - String pid = getShortForm(((OWLDataProperty) p).getIRI()); - OWLLiteral obj = ((OWLDataPropertyAssertionAxiom) paa).getObject(); - if (obj instanceof OWLLiteral) { - addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); - } else if (obj instanceof OWLNamedIndividual) { - addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); - - } - - } - } - } - - } - - - private void addPropertyValue(Map> pvm, String pid, String v) { - // LOG.debug("PV="+pid+"="+v); - if (!pvm.containsKey(pid)) - pvm.put(pid, new HashSet()); - pvm.get(pid).add(v); - } - - private void addOpposingClassPair(OWLClass c, OWLClassExpression dc) { - addOpposingClassPairAsym(c, dc); - if (!dc.isAnonymous()) - addOpposingClassPairAsym(dc.asOWLClass(), c); - } - - private void addOpposingClassPairAsym(OWLClass c, OWLClassExpression d) { - if (!opposingClassMap.containsKey(c)) - opposingClassMap.put(c, new HashSet()); - opposingClassMap.get(c).add(d); - } - - private void storeInferences() { - - - // Note: if there are any nodes containing >1 class or individual, then - // the store method is called redundantly. This is unlikely to affect performance, - // and the semantics are unchanged - for (OWLClass c : getClassesInSignature()) { - int clsIndex = getIndex(c); - // LOG.info("Storing inferences for "+c+" --> " + clsIndex); - Set sups = getIntegersForClassSet(owlReasoner.getSuperClasses(c, false)); - sups.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); - - Set subs = getIntegersForClassSet(owlReasoner.getSubClasses(c, false)); - subs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); - - ontoEWAHStore.setDirectSuperClasses(clsIndex, - getIntegersForClassSet(owlReasoner.getSuperClasses(c, true))); - ontoEWAHStore.setSuperClasses(clsIndex, sups); - ontoEWAHStore.setDirectSubClasses(clsIndex, - getIntegersForClassSet(owlReasoner.getSubClasses(c, true))); - ontoEWAHStore.setSubClasses(clsIndex, subs); - - // Find all disjoint pairs plus opposing pairs - for (OWLAnnotationAssertionAxiom aaa : owlOntology.getAnnotationAssertionAxioms(c.getIRI())) { - // RO_0002604 is-opposite-of. TODO - use a vocabulary object - if (aaa.getProperty().getIRI().toString() - .equals("http://purl.obolibrary.org/obo/RO_0002604")) { - OWLAnnotationValue v = aaa.getValue(); - if (v instanceof IRI) { - IRI dciri = (IRI) v; - OWLClass dc = - owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(dciri); - addOpposingClassPair(c, dc); - - } - } - } - - for (OWLDisjointClassesAxiom dca : owlOntology.getDisjointClassesAxioms(c)) { - for (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { - addOpposingClassPair(c, dc); - } - } - - - // direct individuals are those asserted to be of type c or anything equivalent to c - Set individualInts = new HashSet(); - for (OWLClass ec : owlReasoner.getEquivalentClasses(c).getEntities()) { - for (OWLClassAssertionAxiom ax : owlOntology.getClassAssertionAxioms(ec)) { - if (ax.getIndividual().isNamed()) { - individualInts.add(getIndex(ax.getIndividual().asOWLNamedIndividual())); - } - } - } - ontoEWAHStore.setDirectIndividuals(clsIndex, individualInts); - - } - for (OWLNamedIndividual i : individualsInSignature) { - int individualIndex = getIndex(i); - // LOG.info("String inferences for "+i+" --> " +individualIndex); - ontoEWAHStore.setDirectTypes(individualIndex, - getIntegersForClassSet(owlReasoner.getTypes(i, true))); - ontoEWAHStore.setTypes(individualIndex, - getIntegersForClassSet(owlReasoner.getTypes(i, false))); - - // Treat CLassAssertion( ComplementOf(c) i) as a negative assertion - Set ncs = new HashSet(); - Set ncsDirect = new HashSet(); - for (OWLClassAssertionAxiom cx : owlOntology.getClassAssertionAxioms(i)) { - // TODO: investigate efficiency - number of items set may be high - if (cx.getClassExpression() instanceof OWLObjectComplementOf) { - OWLObjectComplementOf nx = (OWLObjectComplementOf) (cx.getClassExpression()); - OWLClassExpression nc = nx.getOperand(); - ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(nc, false))); - ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); - ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); - } - } - - // Populate negative assertions from DisjointClasses axioms - for (OWLClass c : owlReasoner.getTypes(i, false).getFlattened()) { - LOG.debug("TESTING FOR DCs: " + c); - if (opposingClassMap.containsKey(c)) { - for (OWLClassExpression dc : opposingClassMap.get(c)) { - LOG.info(i + " Type: " + c + " DisjointWith: " + dc); - ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); - ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - } - } - /* - * for (OWLDisjointClassesAxiom dca : owlOntology.getDisjointClassesAxioms(c)) { for - * (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { - * LOG.info(i+" Type: "+c+" DisjointWith: "+dc); - * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); - * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - * ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); } } for - * (OWLAnnotationAssertionAxiom aaa : owlOntology.getAnnotationAssertionAxioms(c.getIRI())) - * { // RO_0002604 is-opposite-of. TODO - use a vocabulary object if - * (aaa.getProperty().getIRI().toString().equals("http://purl.obolibrary.org/obo/RO_0002604" - * )) { OWLAnnotationValue v = aaa.getValue(); if (v instanceof IRI) { IRI dciri = (IRI)v; - * OWLClass dc = owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(dciri); - * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); - * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - * ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - * - * } } } - */ - } - - ontoEWAHStore.setNegatedTypes(individualIndex, ncs); // TODO - determine if storing all - // inferred negated types is too - // inefficient - ontoEWAHStore.setDirectNegatedTypes(individualIndex, ncsDirect); - } - - } - - // TODO - private void storeIndividualProperties() { - for (OWLNamedIndividual i : individualsInSignature) { - for (OWLIndividualAxiom ax : owlOntology.getAxioms(i)) { - if (ax instanceof OWLObjectPropertyAssertionAxiom) { - OWLObjectPropertyExpression p = ((OWLObjectPropertyAssertionAxiom) ax).getProperty(); - } - } - } - } - - // TODO - complete this - // TODO - separate this out as it is not an OWLAPI model. Maybe sparql is overkill here? - // use sparql to query the memory model - private void storeIndividualToClassFrequencies() { - String sparql = ""; - Query query = QueryFactory.create(sparql); - Model model = null; - QueryExecution qexec = QueryExecutionFactory.create(query, model); - ResultSet results = qexec.execSelect(); - for (; results.hasNext();) { - QuerySolution soln = results.nextSolution(); - RDFNode x = soln.get("varName"); // Get a result variable by name. - Resource r = soln.getResource("VarR"); // Get a result variable - must be a resource - Literal l = soln.getLiteral("VarL"); // Get a result variable - must be a literal - } - } - - - - private Set getIntegersForClassSet(NodeSet nodeset) { - Set bits = new HashSet(); - for (Node n : nodeset.getNodes()) { - if (n.contains(getOWLNothing())) - continue; - bits.add(getIndexForClassNode(n)); - } - return bits; - } - - - private Set getIntegersForIndividualSet(NodeSet nodeset) { - Set bits = new HashSet(); - for (Node n : nodeset.getNodes()) { - bits.add(getIndexForIndividualNode(n)); - } - return bits; - } - - /** - * Each class is mapped to an integer - * - * Note that equivalent classes will be mapped to the same integer - * - * @param c - * @return integer representation of class - */ - protected int getIndex(OWLClass c) { - Preconditions.checkNotNull(c); - return getIndexForClassNode(classToNodeMap.get(c)); - } - - /** - * @param id - * @return integer representation of class with id - */ - public int getClassIndex(String id) { - Preconditions.checkNotNull(id); - return getIndex(getOWLClass(id)); - } - - /** - * @param index - * @return OWLClass Node that corresponds to this index - */ - public Node getClassNode(int index) { - return classNodeArray[index]; - } - - /** - * @param index - * @return OWLClass Node that corresponds to this index - */ - public Node getIndividualNode(int index) { - return individualNodeArray[index]; - } - - /** - * @param cix - * @return bitmap - */ - public EWAHCompressedBitmap getDirectIndividualsBM(int cix) { - return ontoEWAHStore.getDirectIndividuals(cix); - } - - @Override - public EWAHCompressedBitmap getIndividualsBM(String classId) { - return getIndividualsBM(getClassIndex(classId)); - } - - @Override - public EWAHCompressedBitmap getIndividualsBM(int classIndex) { - if (classIndex == getRootIndex()) { - EWAHCompressedBitmap indsBM = new EWAHCompressedBitmap(); - indsBM.setSizeInBits(getIndividualIdsInSignature().size(), true); - return indsBM; - } - EWAHCompressedBitmap subsBM = getSubClasses(classIndex); - EWAHCompressedBitmap indsBM = null; - // Note this implementation iterates through all subclasses - // combining individuals; it is too expensive to store all inferred inds by class - for (int subcix : subsBM.getPositions()) { - EWAHCompressedBitmap bm = getDirectIndividualsBM(subcix); - if (indsBM == null) { - indsBM = bm; - } else { - indsBM = indsBM.or(bm); - } - } - return indsBM; - } - - - /** - * Note: each index can correspond to multiple classes c1...cn if this set is an equivalence set. - * In this case the representative classId is returned - * - * @param index - * @return classId - */ - public String getClassId(int index) { - Node n = getClassNode(index); - OWLClass c = n.getRepresentativeElement(); - return getShortForm(c.getIRI()); - } - - public Set getClassIds(int index) { - Node n = getClassNode(index); - Set cids = new HashSet(); - for (OWLClass c : n.getEntities()) { - cids.add(getShortForm(c.getIRI())); - } - return cids; - } - - public Set getClassIds(EWAHCompressedBitmap bm) { - Set cids = new HashSet(); - for (int x : bm) { - Node n = getClassNode(x); - for (OWLClass c : n.getEntities()) { - cids.add(getShortForm(c.getIRI())); - } - } - return cids; - } - - - /** - * @param id - * @return integer representation of class with id - */ - public int getIndividualIndex(String id) { - Preconditions.checkNotNull(id); - return getIndex(getOWLNamedIndividual(id)); - } - - /** - * Each set of equivalent classes (a class node) is mapped to a unique integer - * - * @param n - * @return integer representation of class node - */ - protected int getIndexForClassNode(Node n) { - Preconditions.checkNotNull(n); - if (!classNodeToIntegerMap.containsKey(n)) - LOG.error("No such node: " + n); - return classNodeToIntegerMap.get(n); - } - - /** - * Each individual is mapped to an integer - * - * Note that individuals that stand in a SameAs relationship to one another will be mapped to the - * same integer - * - * @param i - * @return integer representation of individual - */ - protected int getIndex(OWLNamedIndividual i) { - return getIndexForIndividualNode(individualToNodeMap.get(i)); - } - - /** - * Each set of same individuals (an individual node) is mapped to a unique integer - * - * @param n - * @return integer representation of class node - */ - protected int getIndexForIndividualNode(Node n) { - return individualNodeToIntegerMap.get(n); - } - - - - /** - * @param c - * @return Bitmap representation of set of superclasses of c (direct and indirect) - */ - protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c) { - return ontoEWAHStore.getSuperClasses(getIndex(c)); - } - - /** - * @param c - * @return Bitmap representation of set of direct superclasses of c - */ - protected EWAHCompressedBitmap getDirectSuperClassesBM(OWLClass c) { - return ontoEWAHStore.getDirectSuperClasses(getIndex(c)); - } - - /** - * @param c - * @param isDirect - * @return Bitmap representation of set ofsuperclasses of c - */ - protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c, boolean isDirect) { - return ontoEWAHStore.getSuperClasses(getIndex(c), isDirect); - } - - /** - * @param clsSet - * @return union of all superClasses (direct and indirect) of any input class - */ - protected EWAHCompressedBitmap getSuperClassesBMByOWLClassSet(Set clsSet) { - Set classIndices = new HashSet(); - for (OWLClass c : clsSet) { - classIndices.add(getIndex(c)); - } - return ontoEWAHStore.getSuperClasses(classIndices); - } - - public EWAHCompressedBitmap getSuperClassesBM(String cid) { - return ontoEWAHStore.getSuperClasses(getClassIndex(cid)); - } - - public EWAHCompressedBitmap getDirectSuperClassesBM(String cid) { - return ontoEWAHStore.getDirectSuperClasses(getClassIndex(cid)); - } - - public EWAHCompressedBitmap getSuperClassesBM(int classIndex) { - return ontoEWAHStore.getSuperClasses(classIndex); - } - - public EWAHCompressedBitmap getClassesBM(Set classIds) { - EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); - for (String id : classIds) { - bm.set(getClassIndex(id)); - } - return bm; - } - - - public EWAHCompressedBitmap getDirectSuperClassesBM(int classIndex) { - return ontoEWAHStore.getDirectSuperClasses(classIndex); - } - - public EWAHCompressedBitmap getSubClasses(int classIndex) { - return ontoEWAHStore.getSubClasses(classIndex); - } - - public EWAHCompressedBitmap getDirectSubClassesBM(String cid) { - return ontoEWAHStore.getDirectSubClasses(getClassIndex(cid)); - } - - public EWAHCompressedBitmap getDirectSubClassesBM(int classIndex) { - return ontoEWAHStore.getDirectSubClasses(classIndex); - } - - /** - * @param clsIds - * @return union of all subClasses (direct and indirect) of any input class - */ - public EWAHCompressedBitmap getSubClassesBM(Set clsIds) { - Set classIndices = new HashSet(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getSubClasses(classIndices); - } - - /** - * @param clsIds - * @return union of all direct subClasses of all input classes - */ - public EWAHCompressedBitmap getDirectSubClassesBM(Set clsIds) { - Set classIndices = new HashSet(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getDirectSubClasses(classIndices); - } - - - /** - * @param clsIds - * @return union of all superClasses (direct and indirect) of any input class - */ - public EWAHCompressedBitmap getSuperClassesBM(Set clsIds) { - Set classIndices = new HashSet(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getSuperClasses(classIndices); - } - - /** - * @param clsIds - * @return union of all direct superClasses of all input classes - */ - public EWAHCompressedBitmap getDirectSuperClassesBM(Set clsIds) { - Set classIndices = new HashSet(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getDirectSuperClasses(classIndices); - } - - /** - * @param i - * @return Bitmap representation of set of (direct or indirect) types of i - */ - protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i) { - return ontoEWAHStore.getTypes(getIndex(i)); - } - - /** - * @param i - * @return Bitmap representation of set of direct types of i - */ - protected EWAHCompressedBitmap getDirectTypesBM(OWLNamedIndividual i) { - return ontoEWAHStore.getDirectTypes(getIndex(i)); - } - - /** - * @param i - * @param classFilter - * @return Bitmap representation of the subset of direct types of i, which are descendants of - * classFilter - */ - protected EWAHCompressedBitmap getFilteredDirectTypesBM(OWLNamedIndividual i, OWLClass c) { - return ontoEWAHStore.getDirectTypes(getIndex(i), this.getIndex(c)); - } - - /** - * @param i - * @param isDirect - * @return Bitmap representation of set of (direct or indirect) types of i - */ - protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i, boolean isDirect) { - return ontoEWAHStore.getTypes(getIndex(i), isDirect); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) instantiated classes - */ - public EWAHCompressedBitmap getTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getTypes(getIndividualIndex(id)); - } - - /** - * @param individualIndex - * @return bitmap representation of all (direct and indirect) instantiated classes - */ - public EWAHCompressedBitmap getTypesBM(int individualIndex) { - return ontoEWAHStore.getTypes(individualIndex); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated - */ - public EWAHCompressedBitmap getNegatedTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getNegatedTypes(getIndividualIndex(id)); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated - */ - public EWAHCompressedBitmap getDirectNegatedTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getDirectNegatedTypes(getIndividualIndex(id)); - } - - - /** - * @param id - * @return bitmap representation of all (direct and indirect) instantiated classes - */ - public EWAHCompressedBitmap getDirectTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getDirectTypes(getIndividualIndex(id)); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) instantiated classes that are - * subclasses of classId - */ - public EWAHCompressedBitmap getFilteredDirectTypesBM(String id, String classId) { - Preconditions.checkNotNull(id); - Preconditions.checkNotNull(classId); - return ontoEWAHStore.getDirectTypes(getIndividualIndex(id), getClassIndex(classId)); - } - - - - private OWLClass getOWLThing() { - return getOWLDataFactory().getOWLThing(); - } - - private OWLClass getOWLNothing() { - return getOWLDataFactory().getOWLNothing(); - } - - private OWLDataFactory getOWLDataFactory() { - return owlOntology.getOWLOntologyManager().getOWLDataFactory(); - } - - - /** - * @param obj - * @return CURIE-style identifier - */ - protected String getIdentifier(OWLNamedObject obj) { - return obj.getIRI().toString(); - } - - /** - * @param id CURIE-style - * @return OWLAPI Class object - */ - protected OWLClass getOWLClass(String id) { - Preconditions.checkNotNull(id); - if (curieUtil.getCurieMap().isEmpty()) { - return getOWLClass(IRI.create(id)); - } else { - return getOWLClass(IRI.create(curieUtil.getIri(id).or(id))); - } - } - - /** - * @param iri - * @return OWLAPI Class object - */ - protected OWLClass getOWLClass(IRI iri) { - return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(iri); - } - - /** - * @param iri - * @return OWLAPI Class object - */ - protected OWLNamedIndividual getOWLNamedIndividual(IRI iri) { - return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLNamedIndividual(iri); - } - - /** - * @param id CURIE-style - * @return OWLAPI Class object - */ - public OWLNamedIndividual getOWLNamedIndividual(String id) { - Preconditions.checkNotNull(id); - if (curieUtil.getCurieMap().isEmpty()) { - return getOWLNamedIndividual(IRI.create(id)); - } else { - return getOWLNamedIndividual(IRI.create(curieUtil.getIri(id).or(id))); - } - } - - public Attribute getAttribute(String id) { - Preconditions.checkNotNull(id); - String label = labelMapper.getArbitraryLabel(id); - return new Attribute(id, label); - } - - public Entity getEntity(String id) { - Preconditions.checkNotNull(id); - String label = labelMapper.getArbitraryLabel(id); - return new Entity(id, label); - } - - public int[] getIndividualCountPerClassArray() { - return individualCountPerClassArray; - } - - - - @Override - public Map> getPropertyValueMap(String individualId) { - return propertyValueMapMap.get(individualId); - } - - @Override - public Set getPropertyValues(String individualId, String property) { - Map> m = getPropertyValueMap(individualId); - if (m.containsKey(property)) - return new HashSet(m.get(property)); - else - return Collections.emptySet(); - } - - public EWAHCompressedBitmap[] getStoredDirectSubClassIndex() { - return ontoEWAHStore.getStoredDirectSubClasses(); - } - - @Override - public int getRootIndex() { - return getIndex(getOWLThing()); - } - - - - @Override - public String getIndividualId(int index) { - Node n = getIndividualNode(index); - OWLNamedIndividual ind = n.getRepresentativeElement(); - return getShortForm(ind.getIRI()); - } - - - - @Override - public EWAHCompressedBitmap getFilteredTypesBM(Set ids, String classId) { - - Set classBits = new HashSet(); - for (String id : ids) { - classBits.add(this.getClassIndex(id)); - } - - return ontoEWAHStore.getTypes(classBits, getClassIndex(classId)); - - } - - - public EWAHCompressedBitmap getFilteredDirectTypesBM(Set classIds, String classId) { - - Set classBits = new HashSet(); - for (String id : classIds) { - classBits.add(this.getClassIndex(id)); - } - - return ontoEWAHStore.getDirectTypes(classBits, getClassIndex(classId)); - - } - - + private Logger LOG = Logger.getLogger(BMKnowledgeBaseOWLAPIImpl.class); + + private KBMetadata kbMetdata; + + private EWAHKnowledgeBaseStore ontoEWAHStore; + private OWLOntology owlOntology; + private OWLOntology owlDataOntology; + private OWLReasoner owlReasoner; + + private Map, Integer> classNodeToIntegerMap; + private Node[] classNodeArray; + private Map, Integer> individualNodeToIntegerMap; + private Node[] individualNodeArray; + + private Set> classNodes; + private Set> individualNodes; + + private Map> classToNodeMap; + private Map> individualToNodeMap; + // private Set classesInSignature; + private Set individualsInSignature; + private Map>> propertyValueMapMap; + Map> opposingClassMap = new HashMap>(); + + private int[] individualCountPerClassArray; + + CURIEMapper curieMapper; + LabelMapper labelMapper; + CurieUtil curieUtil; + + /** + * @param owlOntology + * @param owlDataOntology + * TODO - fix this + * @param rf + */ + public BMKnowledgeBaseOWLAPIImpl(OWLOntology owlOntology, OWLOntology owlDataOntology, OWLReasonerFactory rf, + CurieUtil curieUtil) { + super(); + curieMapper = new CURIEMapperImpl(); + labelMapper = new LabelMapperImpl(curieMapper); + + this.owlOntology = owlOntology; + this.owlDataOntology = owlDataOntology; + if (owlDataOntology != null) { + translateFromDataOntology(); + } + this.owlReasoner = rf.createReasoner(owlOntology); + this.curieUtil = curieUtil; + createMap(); + ontoEWAHStore = new EWAHKnowledgeBaseStore(classNodes.size(), individualNodes.size()); + storeInferences(); + populateLabelsFromOntology(labelMapper, owlOntology); + if (owlDataOntology != null) { + LOG.info("Fetching labels from " + owlDataOntology); + // the data ontology may contain labels of data items + populateLabelsFromOntology(labelMapper, owlDataOntology); + } + } + + public static BMKnowledgeBase create(OWLOntology owlOntology, OWLReasonerFactory rf, CurieUtil curieUtil) { + return new BMKnowledgeBaseOWLAPIImpl(owlOntology, null, rf, curieUtil); + } + + /** + * @param owlOntology + * @param owlDataOntology + * @param rf + * @return + */ + public static BMKnowledgeBase create(OWLOntology owlOntology, OWLOntology owlDataOntology, OWLReasonerFactory rf, + CurieUtil curieUtil) { + return new BMKnowledgeBaseOWLAPIImpl(owlOntology, owlDataOntology, rf, curieUtil); + } + + public KBMetadata getKbMetdata() { + return kbMetdata; + } + + public void setKbMetdata(KBMetadata kbMetdata) { + this.kbMetdata = kbMetdata; + } + + private String getShortForm(IRI iri) { + if (curieUtil.getCurieMap().isEmpty()) { + return iri.toString(); + } else { + Optional curie = curieUtil.getCurie(iri.toString()); + if (curie.isPresent()) { + return curie.get(); + } else { + return iri.toString(); + } + } + } + + private void populateLabelsFromOntology(LabelMapper labelMapper, OWLOntology ontology) { + LOG.info("Populating labels from " + ontology); + int n = 0; + for (OWLAnnotationAssertionAxiom aaa : ontology.getAxioms(AxiomType.ANNOTATION_ASSERTION)) { + if (aaa.getProperty().isLabel()) { + if (aaa.getSubject() instanceof IRI && aaa.getValue() instanceof OWLLiteral) { + labelMapper.add(getShortForm((IRI) aaa.getSubject()), ((OWLLiteral) aaa.getValue()).getLiteral()); + n++; + } + } + } + if (n == 0) { + LOG.info("Setting labels from fragments"); + Set objs = new HashSet(); + objs.addAll(ontology.getClassesInSignature()); + objs.addAll(ontology.getIndividualsInSignature()); + for (OWLNamedObject obj : objs) { + labelMapper.add(getShortForm(obj.getIRI()), obj.getIRI().getFragment()); + n++; + } + } + LOG.info("Label axioms mapped: " + n); + } + + /** + * @return utility object to map labels to ids + */ + public LabelMapper getLabelMapper() { + return labelMapper; + } + + /** + * @return set of all classes + */ + public Set getClassesInSignature() { + return classToNodeMap.keySet(); // TODO - consider optimizing + } + + /** + * @return set of all class identifiers + */ + public Set getClassIdsInSignature() { + Set ids = new HashSet(); + for (OWLClass i : getClassesInSignature()) { + ids.add(getShortForm(i.getIRI())); + } + return ids; + } + + public Set getClassIdsByOntology(String ont) { + return getClassIdsInSignature().stream().filter(x -> isIn(x, ont)).collect(Collectors.toSet()); + } + + /** + * @param id + * @param ont + * @return true if id is in ontology + */ + public boolean isIn(String id, String ont) { + // TODO - use curie util + return id.startsWith(ont + ":") || id.contains("/" + ont + "_"); + } + + public int getNumClassNodes() { + return classNodeArray.length; + } + + /** + * @return set of all individual identifiers + */ + protected Set getIndividualsInSignature() { + return individualsInSignature; + } + + /** + * @return ids + */ + public Set getIndividualIdsInSignature() { + Set ids = new HashSet(); + for (OWLNamedIndividual i : getIndividualsInSignature()) { + ids.add(getShortForm(i.getIRI())); + } + return ids; + } + + /** + * @return OWLAPI representation of the ontology + */ + protected OWLOntology getOwlOntology() { + return owlOntology; + } + + // Assumption: data ontology includes ObjectPropertyAssertions + // TODO: make flexible + // TODO: extract associations + private void translateFromDataOntology() { + // TODO: allow other axiom types + for (OWLObjectPropertyAssertionAxiom opa : owlDataOntology.getAxioms(AxiomType.OBJECT_PROPERTY_ASSERTION)) { + OWLIndividual obj = opa.getObject(); + if (obj instanceof OWLNamedIndividual) { + OWLClass type = getOWLDataFactory().getOWLClass(((OWLNamedIndividual) obj).getIRI()); + OWLClassAssertionAxiom ca = getOWLDataFactory().getOWLClassAssertionAxiom(type, opa.getSubject()); + owlOntology.getOWLOntologyManager().addAxiom(owlOntology, ca); + } + } + } + + // Each OWLClass and OWLIndividual is mapped to an Integer index + private void createMap() { + LOG.info("Creating mapping from ontology objects to integers"); + classNodes = new HashSet>(); + individualNodes = new HashSet>(); + Set classesInSignature; + classesInSignature = owlOntology.getClassesInSignature(true); + LOG.info("|classes|=" + classesInSignature.size()); + classesInSignature.add(getOWLThing()); + classesInSignature.remove(getOWLNothing()); + individualsInSignature = owlOntology.getIndividualsInSignature(true); + LOG.info("|individuals|=" + individualsInSignature.size()); + classToNodeMap = new HashMap>(); + individualToNodeMap = new HashMap>(); + classNodeToIntegerMap = new HashMap, Integer>(); + individualNodeToIntegerMap = new HashMap, Integer>(); + propertyValueMapMap = new HashMap>>(); + final HashMap, Integer> classNodeToFrequencyMap = new HashMap, Integer>(); + final HashMap, Double> classNodeToFreqDepthMap = new HashMap, Double>(); + for (OWLClass c : classesInSignature) { + if (owlReasoner.getInstances(c, false).isEmpty()) { + // TODO: deal with subclasses + // LOG.info("Skipping non-instantiated class: "+c); + // continue; + } + Node node = owlReasoner.getEquivalentClasses(c); + if (node.contains(getOWLNothing())) { + LOG.warn("Ignoring unsatisfiable class: " + c); + continue; + } + classNodes.add(node); + classToNodeMap.put(c, node); + int numAncNodes = owlReasoner.getSuperClasses(c, false).getNodes().size(); + int freq = owlReasoner.getInstances(c, false).getNodes().size(); + classNodeToFrequencyMap.put(node, freq); + + // freq depth is inversely correlated informativeness; + // frequency is primary measure (high freq = low informativeness); + // if frequency is tied, then tie is broken by number of ancestors + // (high ancestors = high informativeness) + // note that if frequency is not tied, then depth/ancestors should + // make + // no overall difference - we ensure this by taking the proportion + // of + // ancestor nodes divided by number of classes (there are always + // equal + // or more classes than nodes) + double freqDepth = freq + 1 - (numAncNodes / (double) classesInSignature.size()); + // LOG.info("freqDepth = "+freq+" "+freqDepth); + classNodeToFreqDepthMap.put(node, freqDepth); + } + + for (OWLNamedIndividual i : individualsInSignature) { + Node node = owlReasoner.getSameIndividuals(i); + individualNodes.add(node); + individualToNodeMap.put(i, node); + setPropertyValues(owlOntology, i); + if (owlDataOntology != null) + setPropertyValues(owlDataOntology, i); + } + + // Order class nodes such that LOW frequencies (HIGH Information + // Content) + // nodes are have LOWER indices + // TODO: use depth as a tie breaker + List> classNodesSorted = new ArrayList>(classNodes); + Collections.sort(classNodesSorted, new Comparator>() { + public int compare(Node n1, Node n2) { + double f1 = classNodeToFreqDepthMap.get(n1); + double f2 = classNodeToFreqDepthMap.get(n2); + if (f1 < f2) + return -1; + if (f1 > f2) + return 1; + return 0; + } + }); + int numClassNodes = classNodesSorted.size(); + classNodeArray = classNodesSorted.toArray(new Node[numClassNodes]); + individualCountPerClassArray = new int[numClassNodes]; + for (int i = 0; i < numClassNodes; i++) { + classNodeToIntegerMap.put(classNodeArray[i], i); + // LOG.info(classNodeArray[i] + " ix="+i + " + // FREQ="+classNodeToFrequencyMap.get(classNodeArray[i])); + // LOG.info(classNodeArray[i] + " ix="+i + " + // IX_REV="+classNodeToIntegerMap.get(classNodeArray[i])); + individualCountPerClassArray[i] = classNodeToFrequencyMap.get(classNodeArray[i]); + } + individualNodeArray = individualNodes.toArray(new Node[individualNodes.size()]); + for (int i = 0; i < individualNodes.size(); i++) { + individualNodeToIntegerMap.put(individualNodeArray[i], i); + } + + } + + private void setPropertyValues(OWLOntology ont, OWLNamedIndividual i) { + Preconditions.checkNotNull(i); + Map> pvm = new HashMap>(); + String id = getShortForm(i.getIRI()); + propertyValueMapMap.put(id, pvm); + for (OWLIndividualAxiom ax : ont.getAxioms(i)) { + if (ax instanceof OWLPropertyAssertionAxiom) { + OWLPropertyAssertionAxiom paa = (OWLPropertyAssertionAxiom) ax; + OWLPropertyExpression p = paa.getProperty(); + if (p instanceof OWLObjectProperty || p instanceof OWLDataProperty) { + String pid; + if (p instanceof OWLObjectProperty) + pid = getShortForm(((OWLObjectProperty) p).getIRI()); + else + pid = getShortForm(((OWLDataProperty) p).getIRI()); + OWLPropertyAssertionObject obj = paa.getObject(); + if (obj instanceof OWLLiteral) { + addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); + } else if (obj instanceof OWLNamedIndividual) { + addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); + + } + + } else if (false) { + String pid = getShortForm(((OWLDataProperty) p).getIRI()); + OWLLiteral obj = ((OWLDataPropertyAssertionAxiom) paa).getObject(); + if (obj instanceof OWLLiteral) { + addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); + } else if (obj instanceof OWLNamedIndividual) { + addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); + + } + + } + } + } + + } + + private void addPropertyValue(Map> pvm, String pid, String v) { + // LOG.debug("PV="+pid+"="+v); + if (!pvm.containsKey(pid)) + pvm.put(pid, new HashSet()); + pvm.get(pid).add(v); + } + + private void addOpposingClassPair(OWLClass c, OWLClassExpression dc) { + addOpposingClassPairAsym(c, dc); + if (!dc.isAnonymous()) + addOpposingClassPairAsym(dc.asOWLClass(), c); + } + + private void addOpposingClassPairAsym(OWLClass c, OWLClassExpression d) { + if (!opposingClassMap.containsKey(c)) + opposingClassMap.put(c, new HashSet()); + opposingClassMap.get(c).add(d); + } + + private void storeInferences() { + + // Note: if there are any nodes containing >1 class or individual, then + // the store method is called redundantly. This is unlikely to affect + // performance, + // and the semantics are unchanged + for (OWLClass c : getClassesInSignature()) { + int clsIndex = getIndex(c); + // LOG.info("Storing inferences for "+c+" --> " + clsIndex); + Set sups = getIntegersForClassSet(owlReasoner.getSuperClasses(c, false)); + sups.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); + + Set subs = getIntegersForClassSet(owlReasoner.getSubClasses(c, false)); + subs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); + + ontoEWAHStore.setDirectSuperClasses(clsIndex, getIntegersForClassSet(owlReasoner.getSuperClasses(c, true))); + ontoEWAHStore.setSuperClasses(clsIndex, sups); + ontoEWAHStore.setDirectSubClasses(clsIndex, getIntegersForClassSet(owlReasoner.getSubClasses(c, true))); + ontoEWAHStore.setSubClasses(clsIndex, subs); + + // Find all disjoint pairs plus opposing pairs + for (OWLAnnotationAssertionAxiom aaa : owlOntology.getAnnotationAssertionAxioms(c.getIRI())) { + // RO_0002604 is-opposite-of. TODO - use a vocabulary object + if (aaa.getProperty().getIRI().toString().equals("http://purl.obolibrary.org/obo/RO_0002604")) { + OWLAnnotationValue v = aaa.getValue(); + if (v instanceof IRI) { + IRI dciri = (IRI) v; + OWLClass dc = owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(dciri); + addOpposingClassPair(c, dc); + + } + } + } + + for (OWLDisjointClassesAxiom dca : owlOntology.getDisjointClassesAxioms(c)) { + for (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { + addOpposingClassPair(c, dc); + } + } + + // direct individuals are those asserted to be of type c or anything + // equivalent to c + Set individualInts = new HashSet(); + for (OWLClass ec : owlReasoner.getEquivalentClasses(c).getEntities()) { + for (OWLClassAssertionAxiom ax : owlOntology.getClassAssertionAxioms(ec)) { + if (ax.getIndividual().isNamed()) { + individualInts.add(getIndex(ax.getIndividual().asOWLNamedIndividual())); + } + } + } + ontoEWAHStore.setDirectIndividuals(clsIndex, individualInts); + + } + for (OWLNamedIndividual i : individualsInSignature) { + int individualIndex = getIndex(i); + // LOG.info("String inferences for "+i+" --> " +individualIndex); + ontoEWAHStore.setDirectTypes(individualIndex, getIntegersForClassSet(owlReasoner.getTypes(i, true))); + ontoEWAHStore.setTypes(individualIndex, getIntegersForClassSet(owlReasoner.getTypes(i, false))); + + // Treat CLassAssertion( ComplementOf(c) i) as a negative assertion + Set ncs = new HashSet(); + Set ncsDirect = new HashSet(); + for (OWLClassAssertionAxiom cx : owlOntology.getClassAssertionAxioms(i)) { + // TODO: investigate efficiency - number of items set may be + // high + if (cx.getClassExpression() instanceof OWLObjectComplementOf) { + OWLObjectComplementOf nx = (OWLObjectComplementOf) (cx.getClassExpression()); + OWLClassExpression nc = nx.getOperand(); + ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(nc, false))); + ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); + ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); + } + } + + // Populate negative assertions from DisjointClasses axioms + for (OWLClass c : owlReasoner.getTypes(i, false).getFlattened()) { + LOG.debug("TESTING FOR DCs: " + c); + if (opposingClassMap.containsKey(c)) { + for (OWLClassExpression dc : opposingClassMap.get(c)) { + LOG.info(i + " Type: " + c + " DisjointWith: " + dc); + ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); + ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + } + } + /* + * for (OWLDisjointClassesAxiom dca : + * owlOntology.getDisjointClassesAxioms(c)) { for + * (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { + * LOG.info(i+" Type: "+c+" DisjointWith: "+dc); + * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses( + * dc, false))); + * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses + * (dc))); ncsDirect.add(getIndexForClassNode(owlReasoner. + * getEquivalentClasses(dc))); } } for + * (OWLAnnotationAssertionAxiom aaa : + * owlOntology.getAnnotationAssertionAxioms(c.getIRI())) { // + * RO_0002604 is-opposite-of. TODO - use a vocabulary object if + * (aaa.getProperty().getIRI().toString().equals( + * "http://purl.obolibrary.org/obo/RO_0002604" )) { + * OWLAnnotationValue v = aaa.getValue(); if (v instanceof IRI) + * { IRI dciri = (IRI)v; OWLClass dc = + * owlOntology.getOWLOntologyManager().getOWLDataFactory(). + * getOWLClass(dciri); + * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses( + * dc, false))); + * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses + * (dc))); ncsDirect.add(getIndexForClassNode(owlReasoner. + * getEquivalentClasses(dc))); + * + * } } } + */ + } + + ontoEWAHStore.setNegatedTypes(individualIndex, ncs); // TODO - + // determine + // if + // storing + // all + // inferred + // negated + // types is + // too + // inefficient + ontoEWAHStore.setDirectNegatedTypes(individualIndex, ncsDirect); + } + + } + + // TODO + private void storeIndividualProperties() { + for (OWLNamedIndividual i : individualsInSignature) { + for (OWLIndividualAxiom ax : owlOntology.getAxioms(i)) { + if (ax instanceof OWLObjectPropertyAssertionAxiom) { + OWLObjectPropertyExpression p = ((OWLObjectPropertyAssertionAxiom) ax).getProperty(); + } + } + } + } + + // TODO - complete this + // TODO - separate this out as it is not an OWLAPI model. Maybe sparql is + // overkill here? + // use sparql to query the memory model + private void storeIndividualToClassFrequencies() { + String sparql = ""; + Query query = QueryFactory.create(sparql); + Model model = null; + QueryExecution qexec = QueryExecutionFactory.create(query, model); + ResultSet results = qexec.execSelect(); + for (; results.hasNext();) { + QuerySolution soln = results.nextSolution(); + RDFNode x = soln.get("varName"); // Get a result variable by name. + Resource r = soln.getResource("VarR"); // Get a result variable - + // must be a resource + Literal l = soln.getLiteral("VarL"); // Get a result variable - must + // be a literal + } + } + + private Set getIntegersForClassSet(NodeSet nodeset) { + Set bits = new HashSet(); + for (Node n : nodeset.getNodes()) { + if (n.contains(getOWLNothing())) + continue; + bits.add(getIndexForClassNode(n)); + } + return bits; + } + + private Set getIntegersForIndividualSet(NodeSet nodeset) { + Set bits = new HashSet(); + for (Node n : nodeset.getNodes()) { + bits.add(getIndexForIndividualNode(n)); + } + return bits; + } + + /** + * Each class is mapped to an integer + * + * Note that equivalent classes will be mapped to the same integer + * + * @param c + * @return integer representation of class + */ + protected int getIndex(OWLClass c) { + Preconditions.checkNotNull(c); + return getIndexForClassNode(classToNodeMap.get(c)); + } + + /** + * @param id + * @return integer representation of class with id + */ + public int getClassIndex(String id) { + Preconditions.checkNotNull(id); + return getIndex(getOWLClass(id)); + } + + /** + * @param index + * @return OWLClass Node that corresponds to this index + */ + public Node getClassNode(int index) { + return classNodeArray[index]; + } + + /** + * @param index + * @return OWLClass Node that corresponds to this index + */ + public Node getIndividualNode(int index) { + return individualNodeArray[index]; + } + + /** + * @param cix + * @return bitmap + */ + public EWAHCompressedBitmap getDirectIndividualsBM(int cix) { + return ontoEWAHStore.getDirectIndividuals(cix); + } + + @Override + public EWAHCompressedBitmap getIndividualsBM(String classId) { + return getIndividualsBM(getClassIndex(classId)); + } + + @Override + public EWAHCompressedBitmap getIndividualsBM(int classIndex) { + if (classIndex == getRootIndex()) { + EWAHCompressedBitmap indsBM = new EWAHCompressedBitmap(); + indsBM.setSizeInBits(getIndividualIdsInSignature().size(), true); + return indsBM; + } + EWAHCompressedBitmap subsBM = getSubClasses(classIndex); + EWAHCompressedBitmap indsBM = null; + // Note this implementation iterates through all subclasses + // combining individuals; it is too expensive to store all inferred inds + // by class + for (int subcix : subsBM.getPositions()) { + EWAHCompressedBitmap bm = getDirectIndividualsBM(subcix); + if (indsBM == null) { + indsBM = bm; + } else { + indsBM = indsBM.or(bm); + } + } + return indsBM; + } + + /** + * Note: each index can correspond to multiple classes c1...cn if this set + * is an equivalence set. In this case the representative classId is + * returned + * + * @param index + * @return classId + */ + public String getClassId(int index) { + Node n = getClassNode(index); + OWLClass c = n.getRepresentativeElement(); + return getShortForm(c.getIRI()); + } + + public Set getClassIds(int index) { + Node n = getClassNode(index); + Set cids = new HashSet(); + for (OWLClass c : n.getEntities()) { + cids.add(getShortForm(c.getIRI())); + } + return cids; + } + + public Set getClassIds(EWAHCompressedBitmap bm) { + Set cids = new HashSet(); + for (int x : bm) { + Node n = getClassNode(x); + for (OWLClass c : n.getEntities()) { + cids.add(getShortForm(c.getIRI())); + } + } + return cids; + } + + /** + * @param id + * @return integer representation of class with id + */ + public int getIndividualIndex(String id) { + Preconditions.checkNotNull(id); + return getIndex(getOWLNamedIndividual(id)); + } + + /** + * Each set of equivalent classes (a class node) is mapped to a unique + * integer + * + * @param n + * @return integer representation of class node + */ + protected int getIndexForClassNode(Node n) { + Preconditions.checkNotNull(n); + if (!classNodeToIntegerMap.containsKey(n)) + LOG.error("No such node: " + n); + return classNodeToIntegerMap.get(n); + } + + /** + * Each individual is mapped to an integer + * + * Note that individuals that stand in a SameAs relationship to one another + * will be mapped to the same integer + * + * @param i + * @return integer representation of individual + */ + protected int getIndex(OWLNamedIndividual i) { + return getIndexForIndividualNode(individualToNodeMap.get(i)); + } + + /** + * Each set of same individuals (an individual node) is mapped to a unique + * integer + * + * @param n + * @return integer representation of class node + */ + protected int getIndexForIndividualNode(Node n) { + return individualNodeToIntegerMap.get(n); + } + + /** + * @param c + * @return Bitmap representation of set of superclasses of c (direct and + * indirect) + */ + protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c) { + return ontoEWAHStore.getSuperClasses(getIndex(c)); + } + + /** + * @param c + * @return Bitmap representation of set of direct superclasses of c + */ + protected EWAHCompressedBitmap getDirectSuperClassesBM(OWLClass c) { + return ontoEWAHStore.getDirectSuperClasses(getIndex(c)); + } + + /** + * @param c + * @param isDirect + * @return Bitmap representation of set ofsuperclasses of c + */ + protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c, boolean isDirect) { + return ontoEWAHStore.getSuperClasses(getIndex(c), isDirect); + } + + /** + * @param clsSet + * @return union of all superClasses (direct and indirect) of any input + * class + */ + protected EWAHCompressedBitmap getSuperClassesBMByOWLClassSet(Set clsSet) { + Set classIndices = new HashSet(); + for (OWLClass c : clsSet) { + classIndices.add(getIndex(c)); + } + return ontoEWAHStore.getSuperClasses(classIndices); + } + + public EWAHCompressedBitmap getSuperClassesBM(String cid) { + return ontoEWAHStore.getSuperClasses(getClassIndex(cid)); + } + + public EWAHCompressedBitmap getDirectSuperClassesBM(String cid) { + return ontoEWAHStore.getDirectSuperClasses(getClassIndex(cid)); + } + + public EWAHCompressedBitmap getSuperClassesBM(int classIndex) { + return ontoEWAHStore.getSuperClasses(classIndex); + } + + public EWAHCompressedBitmap getClassesBM(Set classIds) { + EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); + for (String id : classIds) { + bm.set(getClassIndex(id)); + } + return bm; + } + + public EWAHCompressedBitmap getDirectSuperClassesBM(int classIndex) { + return ontoEWAHStore.getDirectSuperClasses(classIndex); + } + + public EWAHCompressedBitmap getSubClasses(int classIndex) { + return ontoEWAHStore.getSubClasses(classIndex); + } + + public EWAHCompressedBitmap getDirectSubClassesBM(String cid) { + return ontoEWAHStore.getDirectSubClasses(getClassIndex(cid)); + } + + public EWAHCompressedBitmap getDirectSubClassesBM(int classIndex) { + return ontoEWAHStore.getDirectSubClasses(classIndex); + } + + /** + * @param clsIds + * @return union of all subClasses (direct and indirect) of any input class + */ + public EWAHCompressedBitmap getSubClassesBM(Set clsIds) { + Set classIndices = new HashSet(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getSubClasses(classIndices); + } + + /** + * @param clsIds + * @return union of all direct subClasses of all input classes + */ + public EWAHCompressedBitmap getDirectSubClassesBM(Set clsIds) { + Set classIndices = new HashSet(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getDirectSubClasses(classIndices); + } + + /** + * @param clsIds + * @return union of all superClasses (direct and indirect) of any input + * class + */ + public EWAHCompressedBitmap getSuperClassesBM(Set clsIds) { + Set classIndices = new HashSet(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getSuperClasses(classIndices); + } + + /** + * @param clsIds + * @return union of all direct superClasses of all input classes + */ + public EWAHCompressedBitmap getDirectSuperClassesBM(Set clsIds) { + Set classIndices = new HashSet(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getDirectSuperClasses(classIndices); + } + + /** + * @param i + * @return Bitmap representation of set of (direct or indirect) types of i + */ + protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i) { + return ontoEWAHStore.getTypes(getIndex(i)); + } + + /** + * @param i + * @return Bitmap representation of set of direct types of i + */ + protected EWAHCompressedBitmap getDirectTypesBM(OWLNamedIndividual i) { + return ontoEWAHStore.getDirectTypes(getIndex(i)); + } + + /** + * @param i + * @param classFilter + * @return Bitmap representation of the subset of direct types of i, which + * are descendants of classFilter + */ + protected EWAHCompressedBitmap getFilteredDirectTypesBM(OWLNamedIndividual i, OWLClass c) { + return ontoEWAHStore.getDirectTypes(getIndex(i), this.getIndex(c)); + } + + /** + * @param i + * @param isDirect + * @return Bitmap representation of set of (direct or indirect) types of i + */ + protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i, boolean isDirect) { + return ontoEWAHStore.getTypes(getIndex(i), isDirect); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) instantiated + * classes + */ + public EWAHCompressedBitmap getTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getTypes(getIndividualIndex(id)); + } + + /** + * @param individualIndex + * @return bitmap representation of all (direct and indirect) instantiated + * classes + */ + public EWAHCompressedBitmap getTypesBM(int individualIndex) { + return ontoEWAHStore.getTypes(individualIndex); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) classes known + * to be NOT instantiated + */ + public EWAHCompressedBitmap getNegatedTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getNegatedTypes(getIndividualIndex(id)); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) classes known + * to be NOT instantiated + */ + public EWAHCompressedBitmap getDirectNegatedTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getDirectNegatedTypes(getIndividualIndex(id)); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) instantiated + * classes + */ + public EWAHCompressedBitmap getDirectTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getDirectTypes(getIndividualIndex(id)); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) instantiated + * classes that are subclasses of classId + */ + public EWAHCompressedBitmap getFilteredDirectTypesBM(String id, String classId) { + Preconditions.checkNotNull(id); + Preconditions.checkNotNull(classId); + return ontoEWAHStore.getDirectTypes(getIndividualIndex(id), getClassIndex(classId)); + } + + private OWLClass getOWLThing() { + return getOWLDataFactory().getOWLThing(); + } + + private OWLClass getOWLNothing() { + return getOWLDataFactory().getOWLNothing(); + } + + private OWLDataFactory getOWLDataFactory() { + return owlOntology.getOWLOntologyManager().getOWLDataFactory(); + } + + /** + * @param obj + * @return CURIE-style identifier + */ + protected String getIdentifier(OWLNamedObject obj) { + return obj.getIRI().toString(); + } + + /** + * @param id + * CURIE-style + * @return OWLAPI Class object + */ + protected OWLClass getOWLClass(String id) { + Preconditions.checkNotNull(id); + if (curieUtil.getCurieMap().isEmpty()) { + return getOWLClass(IRI.create(id)); + } else { + return getOWLClass(IRI.create(curieUtil.getIri(id).or(id))); + } + } + + /** + * @param iri + * @return OWLAPI Class object + */ + protected OWLClass getOWLClass(IRI iri) { + return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(iri); + } + + /** + * @param iri + * @return OWLAPI Class object + */ + protected OWLNamedIndividual getOWLNamedIndividual(IRI iri) { + return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLNamedIndividual(iri); + } + + /** + * @param id + * CURIE-style + * @return OWLAPI Class object + */ + public OWLNamedIndividual getOWLNamedIndividual(String id) { + Preconditions.checkNotNull(id); + if (curieUtil.getCurieMap().isEmpty()) { + return getOWLNamedIndividual(IRI.create(id)); + } else { + return getOWLNamedIndividual(IRI.create(curieUtil.getIri(id).or(id))); + } + } + + public Attribute getAttribute(String id) { + Preconditions.checkNotNull(id); + String label = labelMapper.getArbitraryLabel(id); + return new Attribute(id, label); + } + + public Entity getEntity(String id) { + Preconditions.checkNotNull(id); + String label = labelMapper.getArbitraryLabel(id); + return new Entity(id, label); + } + + public int[] getIndividualCountPerClassArray() { + return individualCountPerClassArray; + } + + @Override + public Map> getPropertyValueMap(String individualId) { + return propertyValueMapMap.get(individualId); + } + + @Override + public Set getPropertyValues(String individualId, String property) { + Map> m = getPropertyValueMap(individualId); + if (m.containsKey(property)) + return new HashSet(m.get(property)); + else + return Collections.emptySet(); + } + + public EWAHCompressedBitmap[] getStoredDirectSubClassIndex() { + return ontoEWAHStore.getStoredDirectSubClasses(); + } + + @Override + public int getRootIndex() { + return getIndex(getOWLThing()); + } + + @Override + public String getIndividualId(int index) { + Node n = getIndividualNode(index); + OWLNamedIndividual ind = n.getRepresentativeElement(); + return getShortForm(ind.getIRI()); + } + + @Override + public EWAHCompressedBitmap getFilteredTypesBM(Set ids, String classId) { + + Set classBits = new HashSet(); + for (String id : ids) { + classBits.add(this.getClassIndex(id)); + } + + return ontoEWAHStore.getTypes(classBits, getClassIndex(classId)); + + } + + public EWAHCompressedBitmap getFilteredDirectTypesBM(Set classIds, String classId) { + + Set classBits = new HashSet(); + for (String id : classIds) { + classBits.add(this.getClassIndex(id)); + } + + return ontoEWAHStore.getDirectTypes(classBits, getClassIndex(classId)); + + } } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/ProfileQuery.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/ProfileQuery.java index b9d748c..0b91b7e 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/ProfileQuery.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/ProfileQuery.java @@ -3,50 +3,49 @@ import java.util.Set; import org.monarchinitiative.owlsim.kb.filter.Filter; -import org.monarchinitiative.owlsim.model.match.impl.ProfileQueryImpl; - -import com.google.inject.ImplementedBy; /** - * The most basic type of query, a positive conjunction of features to be matched. + * The most basic type of query, a positive conjunction of features to be + * matched. * * * @author cjm * */ -@ImplementedBy(ProfileQueryImpl.class) public interface ProfileQuery { - + /** * @return all (positive) class Ids in query */ public Set getQueryClassIds(); - + /** * @return query filter */ public Filter getFilter(); - + public void setFilter(Filter f); - + public Integer getLimit(); /** * set to -1 for no limit (all) + * * @param limit */ public void setLimit(Integer limit); - + /** * an optional set of individuals for which we wish to test ranking. * * This is for use when we with to limit the number of individuals returned, - * but we want to know the ranking and scores of particular individuals outside - * the top N + * but we want to know the ranking and scores of particular individuals + * outside the top N * * @return individual ids */ - public Set getReferenceIndividualIds(); - public void setReferenceIndividualIds(Set indIds); + public Set getReferenceIndividualIds(); + + public void setReferenceIndividualIds(Set indIds); } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/AbstractProfileMatcherPerfIT.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/AbstractProfileMatcherPerfIT.java index e8e4007..1d7df28 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/AbstractProfileMatcherPerfIT.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/AbstractProfileMatcherPerfIT.java @@ -6,8 +6,6 @@ import java.util.List; import java.util.Set; -import javax.inject.Inject; - import org.apache.log4j.Logger; import org.junit.Test; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/mica/MICAStoreBench.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/mica/MICAStoreBench.java index d0b7b44..e8fb58a 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/mica/MICAStoreBench.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/mica/MICAStoreBench.java @@ -2,8 +2,6 @@ import java.util.HashMap; -import javax.inject.Inject; - import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.mica.impl.MICAStoreImpl; import org.monarchinitiative.owlsim.compute.mica.impl.NoRootException; diff --git a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java index 4ab2e13..9e8da2f 100644 --- a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java @@ -23,10 +23,10 @@ import org.apache.log4j.Logger; import org.eclipse.jetty.servlets.CrossOriginFilter; -import org.monarchinitiative.owlsim.compute.enrich.EnrichmentMapModule; -import org.monarchinitiative.owlsim.compute.matcher.MatcherMapModule; -import org.monarchinitiative.owlsim.kb.KnowledgeBaseModule; import org.monarchinitiative.owlsim.services.configuration.ApplicationConfiguration; +import org.monarchinitiative.owlsim.services.modules.EnrichmentMapModule; +import org.monarchinitiative.owlsim.services.modules.KnowledgeBaseModule; +import org.monarchinitiative.owlsim.services.modules.MatcherMapModule; import org.semanticweb.owlapi.OWLAPIParsersModule; import org.semanticweb.owlapi.OWLAPIServiceLoaderModule; diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentMapModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/EnrichmentMapModule.java similarity index 76% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentMapModule.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/EnrichmentMapModule.java index d4b6823..95c03ac 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentMapModule.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/EnrichmentMapModule.java @@ -1,10 +1,11 @@ -package org.monarchinitiative.owlsim.compute.enrich; +package org.monarchinitiative.owlsim.services.modules; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.log4j.Logger; +import org.monarchinitiative.owlsim.compute.enrich.EnrichmentEngine; import org.monarchinitiative.owlsim.compute.enrich.impl.HypergeometricEnrichmentEngine; import com.google.inject.AbstractModule; @@ -13,15 +14,15 @@ public class EnrichmentMapModule extends AbstractModule { - private Logger LOG = Logger.getLogger(EnrichmentMapModule.class); - + private Logger LOG = Logger.getLogger(EnrichmentMapModule.class); @Override protected void configure() { } /*** - *

Note: The class must be injectable by Guice. + *

+ * Note: The class must be injectable by Guice. * * @param injector * @return A mapping of ProfileMatchers @@ -29,7 +30,7 @@ protected void configure() { */ @Provides Map getEnrichmentEngines(Injector injector) throws IOException { - + Map engineMap = new HashMap<>(); EnrichmentEngine e = (EnrichmentEngine) injector.getInstance(HypergeometricEnrichmentEngine.class); engineMap.put(e.getShortName(), e); diff --git a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java new file mode 100644 index 0000000..429aa97 --- /dev/null +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java @@ -0,0 +1,127 @@ +package org.monarchinitiative.owlsim.services.modules; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Collection; +import java.util.Map; +import java.util.Set; +import java.util.zip.GZIPInputStream; + +import javax.inject.Singleton; + +import org.apache.commons.validator.routines.UrlValidator; +import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; +import org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl; +import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesDataTsvs; +import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesOwlDataOntologies; +import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesOwlOntologies; +import org.prefixcommons.CurieUtil; +import org.semanticweb.elk.owlapi.ElkReasonerFactory; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; + +import com.google.common.collect.ImmutableCollection; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.inject.AbstractModule; +import com.google.inject.Provides; + +/** + * TODO - rewrite this + * + * Reduce duplication of code with OWLLoader + * + */ +public class KnowledgeBaseModule extends AbstractModule { + + private final ImmutableCollection ontologyUris; + private final ImmutableCollection ontologyDataUris; + private final ImmutableCollection dataTsvs; + private final ImmutableMap curies; + private final UrlValidator urlValdiator = UrlValidator.getInstance(); + + public KnowledgeBaseModule(Collection ontologyUris, Collection ontologyDataUris, + Set dataTsvs, Map curies) { + this.ontologyUris = new ImmutableSet.Builder().addAll(ontologyUris).build(); + this.ontologyDataUris = new ImmutableSet.Builder().addAll(ontologyDataUris).build(); + this.dataTsvs = new ImmutableSet.Builder().addAll(dataTsvs).build(); + this.curies = new ImmutableMap.Builder().putAll(curies).build(); + } + + @Override + protected void configure() { + bind(BMKnowledgeBase.class).to(BMKnowledgeBaseOWLAPIImpl.class).in(Singleton.class); + bind(OWLReasonerFactory.class).to(ElkReasonerFactory.class); + bind(CurieUtil.class).toInstance(new CurieUtil(curies)); + // bind(OWLOntologyManager.class).to(OWLOntologyManagerImpl.class); + // bind(ReadWriteLock.class).to(NoOpReadWriteLock.class); + // bind(OWLDataFactory.class).to(OWLDataFactoryImpl.class); + // bind(OWLOntologyManager.class).toInstance(OWLManager.createOWLOntologyManager()); + } + + @Provides + BMKnowledgeBaseOWLAPIImpl provideBMKnowledgeBaseOWLAPIImpl(@IndicatesOwlOntologies OWLOntology owlOntology, + @IndicatesOwlDataOntologies OWLOntology owlDataOntology, OWLReasonerFactory rf, CurieUtil curieUtil) { + BMKnowledgeBaseOWLAPIImpl bMKnowledgeBaseOWLAPIImpl = new BMKnowledgeBaseOWLAPIImpl(owlOntology, + owlDataOntology, rf, curieUtil); + return bMKnowledgeBaseOWLAPIImpl; + } + + OWLOntology loadOntology(OWLOntologyManager manager, String uri) throws OWLOntologyCreationException { + if (urlValdiator.isValid(uri)) { + return manager.loadOntology(IRI.create(uri)); + } else { + File file = new File(uri); + return manager.loadOntologyFromOntologyDocument(file); + } + } + + OWLOntology mergeOntologies(OWLOntologyManager manager, Collection uris) + throws OWLOntologyCreationException, FileNotFoundException, IOException { + OWLOntology ontology = manager.createOntology(); + for (String uri : uris) { + OWLOntology loadedOntology; + if (uri.endsWith(".gz")) { + GZIPInputStream gis = new GZIPInputStream(new FileInputStream(uri)); + BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); + loadedOntology = manager.loadOntologyFromOntologyDocument(gis); + } else { + loadedOntology = loadOntology(manager, uri); + } + manager.addAxioms(ontology, loadedOntology.getAxioms()); + } + return ontology; + } + + @Provides + @IndicatesOwlOntologies + @Singleton + OWLOntology getOwlOntologies(OWLOntologyManager manager) + throws OWLOntologyCreationException, FileNotFoundException, IOException { + return mergeOntologies(manager, ontologyUris); + } + + @Provides + @IndicatesOwlDataOntologies + @Singleton + OWLOntology getOwlDataOntologies(OWLOntologyManager manager) + throws OWLOntologyCreationException, FileNotFoundException, IOException { + return mergeOntologies(manager, ontologyDataUris); + } + + @Provides + @IndicatesDataTsvs + @Singleton + OWLOntology getDataTsvs(OWLOntologyManager manager) + throws OWLOntologyCreationException, FileNotFoundException, IOException { + return mergeOntologies(manager, dataTsvs); + } + +} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/MatcherMapModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherMapModule.java similarity index 51% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/MatcherMapModule.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherMapModule.java index 36c91d8..49c0e18 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/MatcherMapModule.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherMapModule.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.owlsim.compute.matcher; +package org.monarchinitiative.owlsim.services.modules; import java.io.IOException; import java.lang.reflect.Modifier; @@ -6,7 +6,7 @@ import java.util.Map; import org.apache.log4j.Logger; -import org.monarchinitiative.owlsim.compute.matcher.impl.JaccardSimilarityProfileMatcher; +import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import com.google.common.reflect.ClassPath; import com.google.inject.AbstractModule; @@ -15,7 +15,7 @@ public class MatcherMapModule extends AbstractModule { - private Logger LOG = Logger.getLogger(MatcherMapModule.class); + private Logger LOG = Logger.getLogger(MatcherMapModule.class); // The package containing ProfileMatcher implementations private static final String matcherPackage = "org.monarchinitiative.owlsim.compute.matcher.impl"; @@ -27,11 +27,14 @@ protected void configure() { /*** * Gets of map of ProfileMatchers. * - *

A convenience method to obviate maintaining hard coded instances of ProfileMatchers. - * matcherPackage is inspected for any non-abstract class that implements ProfileMatcher - * and a map is created between that ProfileMatcher's shortName and an instance of the matcher. + *

+ * A convenience method to obviate maintaining hard coded instances of + * ProfileMatchers. matcherPackage is inspected for any + * non-abstract class that implements ProfileMatcher and a map is created + * between that ProfileMatcher's shortName and an instance of the matcher. * - *

Note: The class must be injectable by Guice. + *

+ * Note: The class must be injectable by Guice. * * @param injector * @return A mapping of ProfileMatchers @@ -40,21 +43,19 @@ protected void configure() { @Provides Map getMatchers(Injector injector) throws IOException { ClassPath classpath = ClassPath.from(getClass().getClassLoader()); - LOG.info("Fetchig classes from: "+classpath.getClass()); - LOG.info("top level of :"+matcherPackage); + LOG.info("Fetchig classes from: " + classpath.getClass()); + LOG.info("top level of :" + matcherPackage); Map matcherMap = new HashMap<>(); - for (ClassPath.ClassInfo info: classpath.getTopLevelClasses(matcherPackage)) { - Class clazz = info.load(); - LOG.info(" Adding: "+info + " class: "+clazz + " ISAB:"+ - Modifier.isAbstract(clazz.getModifiers())); - if (!Modifier.isAbstract(clazz.getModifiers()) && - ProfileMatcher.class.isAssignableFrom(info.load())) { + for (ClassPath.ClassInfo info : classpath.getTopLevelClasses(matcherPackage)) { + Class clazz = info.load(); + LOG.info(" Adding: " + info + " class: " + clazz + " ISAB:" + Modifier.isAbstract(clazz.getModifiers())); + if (!Modifier.isAbstract(clazz.getModifiers()) && ProfileMatcher.class.isAssignableFrom(info.load())) { ProfileMatcher matcher = (ProfileMatcher) injector.getInstance(clazz); matcherMap.put(matcher.getShortName(), matcher); } } - + return matcherMap; } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/runner/MatcherModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherModule.java similarity index 62% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/runner/MatcherModule.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherModule.java index 1106f92..a9ec8b6 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/runner/MatcherModule.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherModule.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.owlsim.compute.runner; +package org.monarchinitiative.owlsim.services.modules; import java.io.IOException; import java.lang.reflect.Modifier; @@ -9,10 +9,8 @@ import com.google.common.reflect.ClassPath; import com.google.inject.AbstractModule; -import com.google.inject.Injector; import com.google.inject.Provides; - // CLONED FROM SERVICES public class MatcherModule extends AbstractModule { @@ -26,28 +24,30 @@ protected void configure() { /*** * Gets of map of ProfileMatchers. * - *

A convenience method to obviate maintaining hard coded instances of ProfileMatchers. - * matcherPackage is inspected for any non-abstract class that implements ProfileMatcher - * and a map is created between that ProfileMatcher's shortName and an instance of the matcher. + *

+ * A convenience method to obviate maintaining hard coded instances of + * ProfileMatchers. matcherPackage is inspected for any + * non-abstract class that implements ProfileMatcher and a map is created + * between that ProfileMatcher's shortName and an instance of the matcher. * - *

Note: The class must be injectable by Guice. + *

+ * Note: The class must be injectable by Guice. * * @param injector * @return A mapping of ProfileMatchers * @throws IOException - * @throws IllegalAccessException - * @throws InstantiationException + * @throws IllegalAccessException + * @throws InstantiationException */ @Provides Map getMatchers() throws IOException, InstantiationException, IllegalAccessException { ClassPath classpath = ClassPath.from(getClass().getClassLoader()); Map matcherMap = new HashMap<>(); - for (ClassPath.ClassInfo info: classpath.getTopLevelClasses(matcherPackage)) { + for (ClassPath.ClassInfo info : classpath.getTopLevelClasses(matcherPackage)) { Class clazz = info.load(); - if (!Modifier.isAbstract(clazz.getModifiers()) && - ProfileMatcher.class.isAssignableFrom(info.load())) { - + if (!Modifier.isAbstract(clazz.getModifiers()) && ProfileMatcher.class.isAssignableFrom(info.load())) { + ProfileMatcher matcher = (ProfileMatcher) clazz.newInstance(); matcherMap.put(matcher.getShortName(), matcher); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesDataTsvs.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesDataTsvs.java similarity index 65% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesDataTsvs.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesDataTsvs.java index c576ce6..b1d0def 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesDataTsvs.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesDataTsvs.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.owlsim.kb.bindings; +package org.monarchinitiative.owlsim.services.modules.bindings; import java.lang.annotation.Retention; import java.lang.annotation.Target; @@ -9,5 +9,8 @@ import static java.lang.annotation.ElementType.FIELD; import static java.lang.annotation.ElementType.METHOD; -@BindingAnnotation @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME) -public @interface IndicatesDataTsvs {} +@BindingAnnotation +@Target({ FIELD, PARAMETER, METHOD }) +@Retention(RUNTIME) +public @interface IndicatesDataTsvs { +} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlDataOntologies.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlDataOntologies.java similarity index 64% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlDataOntologies.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlDataOntologies.java index 1993f18..9de8cc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlDataOntologies.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlDataOntologies.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.owlsim.kb.bindings; +package org.monarchinitiative.owlsim.services.modules.bindings; import java.lang.annotation.Retention; import java.lang.annotation.Target; @@ -9,5 +9,8 @@ import static java.lang.annotation.ElementType.FIELD; import static java.lang.annotation.ElementType.METHOD; -@BindingAnnotation @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME) -public @interface IndicatesOwlDataOntologies {} +@BindingAnnotation +@Target({ FIELD, PARAMETER, METHOD }) +@Retention(RUNTIME) +public @interface IndicatesOwlDataOntologies { +} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlOntologies.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlOntologies.java similarity index 65% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlOntologies.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlOntologies.java index acb815f..97a432e 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlOntologies.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlOntologies.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.owlsim.kb.bindings; +package org.monarchinitiative.owlsim.services.modules.bindings; import java.lang.annotation.Retention; import java.lang.annotation.Target; @@ -9,5 +9,8 @@ import static java.lang.annotation.ElementType.FIELD; import static java.lang.annotation.ElementType.METHOD; -@BindingAnnotation @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME) -public @interface IndicatesOwlOntologies {} +@BindingAnnotation +@Target({ FIELD, PARAMETER, METHOD }) +@Retention(RUNTIME) +public @interface IndicatesOwlOntologies { +} From d436fdf07a08ef84eeafa0339245b3f03648bbff Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Tue, 7 Mar 2017 09:42:29 -0800 Subject: [PATCH 6/7] typos --- .../matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java | 2 +- .../owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java index d40c7d3..5c379b2 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java @@ -81,7 +81,7 @@ public void testExamplePositiveOnly() throws Exception { } @Test - public void testFrequencyWare() throws Exception { + public void testFrequencyAware() throws Exception { loadSimplePhenoWithFrequency(); //LOG.info("INDS="+kb.getIndividualIdsInSignature()); ProfileMatcher profileMatcher = createProfileMatcher(kb); diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java index 3183d95..bdb2a72 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java @@ -75,7 +75,7 @@ public void testCompareProfileFile() throws Exception { Set tcids = kb.getClassIds(kb.getDirectTypesBM(j)); ProfileQuery tp = profileMatcher.createProfileQueryFromClasses(tcids, null); - String fn = i.replaceAll(".*/", ""); + String fn = i.replaceAll(".*/", ""); //eval.writeJsonTo("target/pdgm-test-results-"+fn+".json"); Match pairMatch = profileMatcher.compareProfilePair(qp, tp); From 56a4aa2389918418d53af8fea879b42d5615b33f Mon Sep 17 00:00:00 2001 From: Jeremy Nguyen Xuan Date: Thu, 9 Mar 2017 15:30:34 -0800 Subject: [PATCH 7/7] [#62] we still need the the inject indicator for guice. We can the matcher package through java reflection and let the injector construct the objects. --- .../owlsim/compute/matcher/ProfileMatcher.java | 2 -- .../AbstractSemanticSimilarityProfileMatcher.java | 2 ++ .../impl/BayesianNetworkProfileMatcher.java | 3 +++ .../matcher/impl/GridNegatedProfileMatcher.java | 3 +++ .../compute/matcher/impl/GridProfileMatcher.java | 3 +++ .../impl/JaccardSimilarityProfileMatcher.java | 3 +++ ...InformationContentSimilarityProfileMatcher.java | 3 +++ ...veBayesFixedWeightThreeStateProfileMatcher.java | 3 +++ ...FixedWeightTwoStateNoBlanketProfileMatcher.java | 3 +++ ...aiveBayesFixedWeightTwoStateProfileMatcher.java | 3 +++ .../NaiveBayesVariableWeightProfileMatcher.java | 3 +++ .../matcher/impl/PhenodigmICProfileMatcher.java | 3 +++ .../ThreeStateBayesianNetworkProfileMatcher.java | 3 +++ .../services/modules/KnowledgeBaseModule.java | 25 ++++++++++++++++++++++ 14 files changed, 60 insertions(+), 2 deletions(-) diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java index 52b58cb..a710112 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java @@ -111,6 +111,4 @@ public ProfileQuery createProfileQueryFromClasses(Set classIds, */ BMKnowledgeBase getKnowledgeBase(); - - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java index eb042fa..0b8f0ca 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java @@ -1,5 +1,7 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; +import javax.inject.Inject; + import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator; import org.monarchinitiative.owlsim.compute.mica.impl.MostInformativeCommonAncestorCalculatorImpl; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java index ee22a60..580dd3b 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java @@ -5,6 +5,8 @@ import java.util.Map; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.cpt.ConditionalProbabilityIndex; import org.monarchinitiative.owlsim.compute.cpt.IncoherentStateException; @@ -101,6 +103,7 @@ private Calculator[] calculatorCache; private Double[][] targetClassProbabilityCache; + @Inject private BayesianNetworkProfileMatcher(BMKnowledgeBase kb) { super(kb); int N = kb.getIndividualIdsInSignature().size(); diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java index 04b85c3..0a693e4 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java @@ -1,5 +1,7 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.NegationAwareProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -22,6 +24,7 @@ /** * @param kb */ + @Inject public GridNegatedProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java index c2f4448..499f58e 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java @@ -3,6 +3,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator.ClassInformationContentPair; @@ -32,6 +34,7 @@ /** * @param kb */ + @Inject public GridProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java index cdca4a7..a589c89 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java @@ -2,6 +2,8 @@ import java.util.List; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -26,6 +28,7 @@ /** * @param kb */ + @Inject public JaccardSimilarityProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java index c14bec9..a1f59ce 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java @@ -2,6 +2,8 @@ import java.util.List; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator.ClassInformationContentPair; @@ -28,6 +30,7 @@ /** * @param kb */ + @Inject private MaximumInformationContentSimilarityProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java index bb3b7d2..7f13bc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java @@ -4,6 +4,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.NegationAwareProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -29,6 +31,7 @@ private Logger LOG = Logger.getLogger(NaiveBayesFixedWeightThreeStateProfileMatcher.class); + @Inject private NaiveBayesFixedWeightThreeStateProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java index 009b47c..1518072 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java @@ -1,5 +1,7 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; +import javax.inject.Inject; + import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; /** @@ -8,6 +10,7 @@ */ public class NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher extends NaiveBayesFixedWeightTwoStateProfileMatcher { + @Inject private NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java index 392a021..31fbc80 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java @@ -7,6 +7,8 @@ import java.util.Map; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -76,6 +78,7 @@ public WeightedTypesBM(EWAHCompressedBitmap typesBM, Double weight) { // for maps a pair of (Individual, InterpretationIndex) to a set of inferred (self, direct, indirect) types private Map> individualToInterpretationToTypesBM = new HashMap<>(); + @Inject protected NaiveBayesFixedWeightTwoStateProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java index a06d0ac..8084cc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java @@ -3,6 +3,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -34,6 +36,7 @@ /** * @param kb */ + @Inject public NaiveBayesVariableWeightProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java index 8d3f8f0..62489a6 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java @@ -3,6 +3,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator.ClassInformationContentPair; @@ -32,6 +34,7 @@ /** * @param kb */ + @Inject public PhenodigmICProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java index 2efe966..e7c207d 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java @@ -6,6 +6,8 @@ import java.util.Map; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.cpt.IncoherentStateException; import org.monarchinitiative.owlsim.compute.cpt.impl.NodeProbabilities; @@ -35,6 +37,7 @@ private ThreeStateConditionalProbabilityIndex cpi = null; private Map targetToQueryCache; + @Inject private ThreeStateBayesianNetworkProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java index 429aa97..e6b76c7 100644 --- a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java @@ -14,6 +14,11 @@ import javax.inject.Singleton; import org.apache.commons.validator.routines.UrlValidator; +import org.monarchinitiative.owlsim.compute.classmatch.ClassMatcher; +import org.monarchinitiative.owlsim.compute.enrich.impl.HypergeometricEnrichmentEngine; +import org.monarchinitiative.owlsim.compute.matcher.impl.BayesianNetworkProfileMatcher; +import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator; +import org.monarchinitiative.owlsim.compute.mica.impl.MostInformativeCommonAncestorCalculatorImpl; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl; import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesDataTsvs; @@ -123,5 +128,25 @@ OWLOntology getDataTsvs(OWLOntologyManager manager) throws OWLOntologyCreationException, FileNotFoundException, IOException { return mergeOntologies(manager, dataTsvs); } + + @Provides + MostInformativeCommonAncestorCalculator getMostInformativeCommonAncestorCalculator(BMKnowledgeBase knowledgeBase) { + return new MostInformativeCommonAncestorCalculatorImpl(knowledgeBase); + } + + @Provides + HypergeometricEnrichmentEngine getHypergeometricEnrichmentEngine(BMKnowledgeBase knowledgeBase) { + return new HypergeometricEnrichmentEngine(knowledgeBase); + } + + @Provides + BayesianNetworkProfileMatcher getBayesianNetworkProfileMatcher(BMKnowledgeBase knowledgeBase) { + return BayesianNetworkProfileMatcher.create(knowledgeBase); + } + + @Provides + ClassMatcher getClassMatcher(BMKnowledgeBase knowledgeBase) { + return new ClassMatcher(knowledgeBase); + } }