Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

implemented a simple boolean matcher. #74

Open
wants to merge 1 commit into
from
Jump to file or symbol
Failed to load files and symbols.
+205 −5
Split
@@ -27,8 +27,8 @@ public ClassMatcher(BMKnowledgeBase kb) {
/**
* Find best match for every class in ont1, where the best match is in ont2
*
- * @param qOnt
- * @param tOnt
+ * @param qOnt - ontology prefix
+ * @param tOnt - ontology prefix
* @return list of matches
*/
public List<SimpleClassMatch> matchOntologies(String qOnt, String tOnt) {
@@ -37,8 +37,17 @@ public ClassMatcher(BMKnowledgeBase kb) {
return matchClassSets(qids, tids);
}
+ /**
+ * Find best matches for all class combos {qid1, ...} x {tid1, ...}
+ *
+ * @param qids - classes
+ * @param tids - classes
+ * @return list of matches
+ */
public List<SimpleClassMatch> matchClassSets(Set<String> qids, Set<String> tids) {
ArrayList<SimpleClassMatch> matches = new ArrayList<>();
+
+ // TODO: consider optimization, by first grouping by system
for (String q : qids) {
matches.add(getBestMatch(q, tids));
}
@@ -58,6 +67,8 @@ private SimpleClassMatch getBestMatch(String q, Set<String> tids) {
bestEqScore = eqScore;
best = t;
}
+ if (bestEqScore >= 1.0)
+ break;
}
EWAHCompressedBitmap tbm = kb.getSuperClassesBM(best);
@@ -0,0 +1,90 @@
+package org.monarchinitiative.owlsim.compute.matcher.impl;
+
+import java.util.List;
+
+import javax.inject.Inject;
+
+import org.apache.log4j.Logger;
+import org.monarchinitiative.owlsim.compute.matcher.NegationAwareProfileMatcher;
+import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher;
+import org.monarchinitiative.owlsim.kb.BMKnowledgeBase;
+import org.monarchinitiative.owlsim.kb.filter.UnknownFilterException;
+import org.monarchinitiative.owlsim.model.match.MatchSet;
+import org.monarchinitiative.owlsim.model.match.ProfileQuery;
+import org.monarchinitiative.owlsim.model.match.QueryWithNegation;
+import org.monarchinitiative.owlsim.model.match.impl.MatchSetImpl;
+
+import com.googlecode.javaewah.EWAHCompressedBitmap;
+
+/**
+ * Implements a standard boolean query
+ *
+ * @author cjm
+ *
+ */
+public class BooleanProfileMatcher extends AbstractProfileMatcher implements NegationAwareProfileMatcher {
+
+ private Logger LOG = Logger.getLogger(BooleanProfileMatcher.class);
+
+ /**
+ * @param kb
+ */
+ @Inject
+ public BooleanProfileMatcher(BMKnowledgeBase kb) {
+ super(kb);
+ }
+
+
+ /**
+ * @param kb
+ * @return new instance
+ */
+ public static ProfileMatcher create(BMKnowledgeBase kb) {
+ return new BooleanProfileMatcher(kb);
+ }
+
+ @Override
+ public String getShortName() {
+ return "boolean";
+ }
+
+ /**
+ * @param q
+ * @return match profile containing probabilities of each individual
+ * @throws UnknownFilterException
+ */
+ public MatchSet findMatchProfileImpl(ProfileQuery q) throws UnknownFilterException {
+
+ EWAHCompressedBitmap queryProfileBM = getDirectProfileBM(q);
+ boolean hasNegationQuery = false;
+ EWAHCompressedBitmap negatedQueryProfileBM = null;
+ if (q instanceof QueryWithNegation) {
+ negatedQueryProfileBM = getDirectNegatedProfileBM((QueryWithNegation) q);
+ hasNegationQuery = negatedQueryProfileBM.cardinality() > 0;
+ }
+
+ // TODO
+ MatchSet mp = MatchSetImpl.create(q);
+ int qcard = queryProfileBM.cardinality();
+ List<String> indIds = getFilteredIndividualIds(q.getFilter());
+ for (String itemId : indIds) {
+ EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId);
+ int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM);
+ if (numInQueryAndInTarget == qcard) {
+ if (!hasNegationQuery ||
+ negatedQueryProfileBM.andCardinality(targetProfileBM) == 0) {
+ String label = knowledgeBase.getLabelMapper().getArbitraryLabel(itemId);
+ mp.add(createMatch(itemId, label, 1));
+
+ }
+ }
+ }
+ mp.sortMatches();
+ return mp;
+ }
+
+
+
+
+
+}
@@ -78,6 +78,8 @@ public boolean evaluateTestQuery(ProfileMatcher profileMatcher, TestQuery tq) th
if (jsonWriter != null) {
LOG.info("Writing MatchSet using "+jsonWriter+" results will appear in "+jsonWriter);
+ jsonWriter.write(mp.getMatches().get(0));
+ LOG.info("MATCHES:"+mp);
jsonWriter.write(mp);
}
@@ -27,4 +27,14 @@ public TestQuery(ProfileQuery query, String expectedId, int maxRank) {
this.expectedId = expectedId;
this.maxRank = maxRank;
}
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString() {
+ return "TestQuery [query=" + query + ", expectedId=" + expectedId
+ + ", maxRank=" + maxRank + ", matchSet=" + matchSet + "]";
+ }
+
+
}
@@ -23,7 +23,8 @@
*
*/
public class MatchSetImpl implements MatchSet {
- private ProfileQuery query;
+
+ private ProfileQuery query;
private List<Match> matches; // TODO - make this neutral
ExecutionMetadata executionMetadata;
MethodMetadata methodMetadata;
@@ -210,8 +211,10 @@ public String toString() {
public void calculateMatchSignificance(DescriptiveStatistics background) {
for (Match m : this.matches) {
- double p = TestUtils.tTest(m.getScore(), background);
- m.setSignificance(p);
+ if (background.getN() > 1) {
+ double p = TestUtils.tTest(m.getScore(), background);
+ m.setSignificance(p);
+ }
}
}
@@ -192,6 +192,17 @@ protected boolean isRankedLast(String matchId, MatchSet matchSet) {
return true;
}
+ protected boolean isNotInMatchSet(String matchId, MatchSet matchSet) {
+ for (Match m : matchSet.getMatches()) {
+ if (m.getMatchId().equals(matchId)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+
+
protected boolean isRankedAt(String matchId, MatchSet matchSet, int expectedRank) {
int matchRank = 0;
for (Match m : matchSet.getMatches()) {
@@ -0,0 +1,73 @@
+package org.monarchinitiative.owlsim.compute.matcher;
+
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Test;
+import org.monarchinitiative.owlsim.compute.matcher.impl.BooleanProfileMatcher;
+import org.monarchinitiative.owlsim.eval.TestQuery;
+import org.monarchinitiative.owlsim.kb.BMKnowledgeBase;
+import org.monarchinitiative.owlsim.model.match.ProfileQuery;
+
+public class BooleanProfileMatcherTest extends AbstractProfileMatcherTest {
+
+ private Logger LOG = Logger.getLogger(BooleanProfileMatcherTest.class);
+
+ protected ProfileMatcher createProfileMatcher(BMKnowledgeBase kb) {
+ return BooleanProfileMatcher.create(kb);
+ }
+ @Test
+ public void testBoolean() throws Exception {
+ loadSimplePhenoWithNegation();
+ //LOG.info("INDS="+kb.getIndividualIdsInSignature());
+ ProfileMatcher profileMatcher = createProfileMatcher(kb);
+
+ int nOk = 0;
+ for (String i : kb.getIndividualIdsInSignature()) {
+ LOG.info("I: "+i);
+ if (i.equals("http://x.org/ind-no-brain-phenotype")) {
+ continue;
+ }
+ if (i.equals("http://x.org/ind-unstated-phenotype")) {
+ continue;
+ }
+ ProfileQuery pq = profileMatcher.createProfileQuery(i);
+ TestQuery tq = new TestQuery(pq, i, 1); // self should always be ranked first
+ String fn = i.replaceAll(".*/", "");
+ eval.writeJsonTo("target/boolean-test-results-"+fn+".json");
+
+ LOG.info("Evaluating for "+i);
+ eval.evaluateTestQuery(profileMatcher, tq);
+ //Assert.assertTrue(eval.evaluateTestQuery(profileMatcher, tq));
+
+ if (i.equals("http://x.org/ind-dec-all")) {
+ Assert.assertTrue(isNotInMatchSet("http://x.org/ind-unstated-phenotype", tq.matchSet));
+ nOk++;
+ }
+ if (i.equals("http://x.org/ind-small-heart-big-brain")) {
+ Assert.assertTrue(isNotInMatchSet("http://x.org/ind-bone", tq.matchSet));
+ nOk++;
+ }
+
+ }
+ Assert.assertEquals(2, nOk);
+ }
+
+ @Test
+ public void testExampleWithNegation() throws Exception {
+ loadSimplePhenoWithNegation();
+ //LOG.info("INDS="+kb.getIndividualIdsInSignature());
+ ProfileMatcher profileMatcher = createProfileMatcher(kb);
+
+ int nOk = 0;
+ String i = "http://x.org/ind-small-heart-big-brain";
+
+ ProfileQuery pq = profileMatcher.createProfileQuery(i);
+ TestQuery tq = new TestQuery(pq, i, 1); // self should always be ranked first
+ String fn = i.replaceAll(".*/", "");
+ eval.writeJsonTo("target/boolean-extra-test-results-"+fn+".json");
+ Assert.assertTrue(eval.evaluateTestQuery(profileMatcher, tq));
+
+ Assert.assertTrue(isNotInMatchSet("http://x.org/ind-no-brain-phenotype", tq.matchSet));
+
+ }
+}