diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java index e3331c0..f3c784a 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java @@ -3,9 +3,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; - -import javax.inject.Inject; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.LabelMapper; @@ -20,65 +17,57 @@ */ public class ClassMatcher { - BMKnowledgeBase kb; - - - @Inject - public ClassMatcher(BMKnowledgeBase kb) { - super(); - this.kb = kb; - } + BMKnowledgeBase kb; + + public ClassMatcher(BMKnowledgeBase kb) { + super(); + this.kb = kb; + } + + /** + * Find best match for every class in ont1, where the best match is in ont2 + * + * @param qOnt + * @param tOnt + * @return list of matches + */ + public List matchOntologies(String qOnt, String tOnt) { + Set qids = kb.getClassIdsByOntology(qOnt); + Set tids = kb.getClassIdsByOntology(tOnt); + return matchClassSets(qids, tids); + } + + public List matchClassSets(Set qids, Set tids) { + ArrayList matches = new ArrayList<>(); + for (String q : qids) { + matches.add(getBestMatch(q, tids)); + } + return matches; + } - /** - * Find best match for every class in ont1, where the best - * match is in ont2 - * - * @param qOnt - * @param tOnt - * @return list of matches - */ - public List matchOntologies(String qOnt, String tOnt) { - Set qids = kb.getClassIdsByOntology(qOnt); - Set tids = kb.getClassIdsByOntology(tOnt); - return matchClassSets(qids, tids); - } + private SimpleClassMatch getBestMatch(String q, Set tids) { + EWAHCompressedBitmap qbm = kb.getSuperClassesBM(q); + double bestEqScore = 0.0; + String best = null; + for (String t : tids) { + EWAHCompressedBitmap tbm = kb.getSuperClassesBM(t); + int numInQueryAndInTarget = qbm.andCardinality(tbm); + int numInQueryOrInTarget = qbm.orCardinality(tbm); + double eqScore = numInQueryAndInTarget / (double) numInQueryOrInTarget; + if (eqScore > bestEqScore) { + bestEqScore = eqScore; + best = t; + } + } - public List matchClassSets(Set qids, - Set tids) { - ArrayList matches = new ArrayList<>(); - for (String q : qids) { - matches.add(getBestMatch(q, tids)); - } - return matches; - } + EWAHCompressedBitmap tbm = kb.getSuperClassesBM(best); + int numInQueryAndInTarget = qbm.andCardinality(tbm); + double subClassScore = numInQueryAndInTarget / (double) qbm.cardinality(); + double superClassScore = numInQueryAndInTarget / (double) tbm.cardinality(); - private SimpleClassMatch getBestMatch(String q, Set tids) { - EWAHCompressedBitmap qbm = kb.getSuperClassesBM(q); - double bestEqScore = 0.0; - String best = null; - for (String t : tids) { - EWAHCompressedBitmap tbm = kb.getSuperClassesBM(t); - int numInQueryAndInTarget = qbm.andCardinality(tbm); - int numInQueryOrInTarget = qbm.orCardinality(tbm); - double eqScore = numInQueryAndInTarget / (double) numInQueryOrInTarget; - if (eqScore > bestEqScore) { - bestEqScore = eqScore; - best = t; - } - } - - EWAHCompressedBitmap tbm = kb.getSuperClassesBM(best); - int numInQueryAndInTarget = qbm.andCardinality(tbm); - double subClassScore = numInQueryAndInTarget / (double) qbm.cardinality(); - double superClassScore = numInQueryAndInTarget / (double) tbm.cardinality(); - - LabelMapper lm = kb.getLabelMapper(); - return new SimpleClassMatch(q, best, - lm.getArbitraryLabel(q), - lm.getArbitraryLabel(best), - bestEqScore, - subClassScore, - superClassScore); - } + LabelMapper lm = kb.getLabelMapper(); + return new SimpleClassMatch(q, best, lm.getArbitraryLabel(q), lm.getArbitraryLabel(best), bestEqScore, + subClassScore, superClassScore); + } } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/package-info.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/package-info.java index a1b2a88..2a10b9a 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/package-info.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/package-info.java @@ -1,7 +1,4 @@ /** - * - */ -/** * @author cjm * */ diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/cpt/SimplePairwiseConditionalProbabilityIndex.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/cpt/SimplePairwiseConditionalProbabilityIndex.java index a40717c..dc49f02 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/cpt/SimplePairwiseConditionalProbabilityIndex.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/cpt/SimplePairwiseConditionalProbabilityIndex.java @@ -1,7 +1,5 @@ package org.monarchinitiative.owlsim.compute.cpt; -import java.util.Map; - import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; /** diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentConfig.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentConfig.java index 3d02f07..276d490 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentConfig.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentConfig.java @@ -1,7 +1,5 @@ package org.monarchinitiative.owlsim.compute.enrich; -import javax.inject.Inject; - public class EnrichmentConfig { public enum AnalysisType {OVER, UNDER, BOTH}; diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/impl/HypergeometricEnrichmentEngine.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/impl/HypergeometricEnrichmentEngine.java index 5478031..b8fcc19 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/impl/HypergeometricEnrichmentEngine.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/impl/HypergeometricEnrichmentEngine.java @@ -6,16 +6,14 @@ import java.util.Set; import java.util.stream.Collectors; -import javax.inject.Inject; - import org.apache.commons.math3.distribution.HypergeometricDistribution; import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.enrich.EnrichmentConfig; +import org.monarchinitiative.owlsim.compute.enrich.EnrichmentConfig.AnalysisType; import org.monarchinitiative.owlsim.compute.enrich.EnrichmentEngine; import org.monarchinitiative.owlsim.compute.enrich.EnrichmentQuery; import org.monarchinitiative.owlsim.compute.enrich.EnrichmentResult; import org.monarchinitiative.owlsim.compute.enrich.EnrichmentResultSet; -import org.monarchinitiative.owlsim.compute.enrich.EnrichmentConfig.AnalysisType; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.ewah.EWAHUtils; import org.monarchinitiative.owlsim.kb.filter.Filter; @@ -44,7 +42,6 @@ protected EnrichmentConfig enrichmentConfig = new EnrichmentConfig(); - @Inject public HypergeometricEnrichmentEngine(BMKnowledgeBase kb) { super(); this.kb = kb; diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java index 52b58cb..a710112 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java @@ -111,6 +111,4 @@ public ProfileQuery createProfileQueryFromClasses(Set classIds, */ BMKnowledgeBase getKnowledgeBase(); - - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractProfileMatcher.java index beabc70..d0b15e9 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractProfileMatcher.java @@ -4,8 +4,6 @@ import java.util.List; import java.util.Set; -import javax.inject.Inject; - import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; import org.apache.commons.math3.stat.inference.TestUtils; import org.apache.log4j.Logger; @@ -32,246 +30,235 @@ import com.googlecode.javaewah.EWAHCompressedBitmap; /** - * common methods and variables for all ProfileMatchers + * common methods and variables for all ProfileMatchers * * @author cjm * */ public abstract class AbstractProfileMatcher implements ProfileMatcher { - private Logger LOG = Logger.getLogger(AbstractProfileMatcher.class); - - protected BMKnowledgeBase knowledgeBase; - private FilterEngine filterEngine; - - - /** - * @param knowledgeBase - */ - @Inject - public AbstractProfileMatcher(BMKnowledgeBase knowledgeBase) { - super(); - this.knowledgeBase = knowledgeBase; - this.filterEngine = FilterEngine.create(knowledgeBase); - } - - /** - * @return ontology interface - */ - public BMKnowledgeBase getKnowledgeBase() { - return knowledgeBase; - } - - - - @Inject - private void setKnowledgeBase(BMKnowledgeBase knowledgeBase) { - this.knowledgeBase = knowledgeBase; - } - - public void precompute() { - } - - /** - * all positive nodes in query plus their ancestors - * - * @param q - * @return - */ - protected EWAHCompressedBitmap getProfileBM(ProfileQuery q) { - return knowledgeBase.getSuperClassesBM(q.getQueryClassIds()); - } - protected EWAHCompressedBitmap getDirectProfileBM(ProfileQuery q) { - Set positions = new HashSet(); - for (String cid : q.getQueryClassIds()) { - positions.add(knowledgeBase.getClassIndex(cid)); - } - return EWAHUtils.convertIndexSetToBitmap(positions); - } - - // given an array of class IDs c1...cn, return an array S1...Sn, - // where Si is the set of superclasses (direct and indirect) of ci, - // stored as a bitmap - protected EWAHCompressedBitmap[] getProfileSetBM(String[] qcids) { - EWAHCompressedBitmap[] bms = new EWAHCompressedBitmap[qcids.length]; - for (int i=0; i bits = new HashSet(); - for (String id : nq.getQueryNegatedClassIds()) { - int ci = knowledgeBase.getClassIndex(id); - bits.addAll( knowledgeBase.getSubClasses(ci).getPositions() ); - } - return EWAHUtils.convertIndexSetToBitmap(bits); - } - - protected EWAHCompressedBitmap getDirectNegatedProfileBM(QueryWithNegation q) { - Set bits = new HashSet(); - // TODO: less dumb implementation... - for (String id : q.getQueryNegatedClassIds()) { - int ci = knowledgeBase.getClassIndex(id); - bits.add(ci); - } - return EWAHUtils.convertIndexSetToBitmap(bits); - } - - protected Match createMatch(String matchId, String matchLabel, double s) { - return MatchImpl.create(matchId, matchLabel, s); - } - - /** - * @param filter - * @return list of individuals that satisfy filter - * @throws UnknownFilterException - */ - protected List getFilteredIndividualIds(Filter filter) throws UnknownFilterException { - return filterEngine.applyFilter(filter); - } - - /* (non-Javadoc) - * @see org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher#createProfileQuery(java.lang.String) - */ - public ProfileQuery createProfileQuery(String individualId) { - return createProfileQuery(individualId, null); - } - - public ProfileQuery createPositiveProfileQuery(String individualId) { - return createProfileQuery(individualId, false); - } - - public ProfileQuery createProfileQueryWithNegation(String individualId) { - return createProfileQuery(individualId, true); - } - - public ProfileQuery createProfileQuery(String individualId, Boolean isUseNegation) { - Preconditions.checkNotNull(individualId); - EWAHCompressedBitmap bmi = knowledgeBase.getDirectTypesBM(individualId); - EWAHCompressedBitmap nbmi = knowledgeBase.getDirectNegatedTypesBM(individualId); - Set qcids = knowledgeBase.getClassIds(bmi); - Set nqcids = knowledgeBase.getClassIds(nbmi); - ProfileQuery q; - if (isUseNegation == null) { - if (nqcids.size() == 0) { - q = ProfileQueryImpl.create(qcids); - } - else { - q = QueryWithNegationImpl.create(qcids, nqcids); - } - } - else { - if (isUseNegation) { - q = QueryWithNegationImpl.create(qcids, nqcids); - } - else { - q = ProfileQueryImpl.create(qcids); - } - } - return q; - } - - public ProfileQuery createProfileQueryFromClasses( - Set qcids, Set nqcids) { - ProfileQuery q; - if (nqcids != null && nqcids.size() == 0) { - q = ProfileQueryImpl.create(qcids); - } - else { - q = QueryWithNegationImpl.create(qcids, nqcids); - } - return q; - } - - public MatchSet findMatchProfile(String individualId) throws IncoherentStateException { - ProfileQuery q = createProfileQuery(individualId); - return findMatchProfile(q); - } - - - public MatchSet findMatchProfile(ProfileQuery q) throws IncoherentStateException { - MatchSet ms = findMatchProfileAll(q); - int limit = q.getLimit() == null ? 200 : q.getLimit(); - if (limit > -1) { - ms.truncate(limit); - } - return ms; - } - - public MatchSet findMatchProfile(ProfileQuery q, double alpha) throws IncoherentStateException { - MatchSet ms = findMatchProfileAll(q); - - //use all matches as "background" - //TODO this is a naive assumption, needs refactor - DescriptiveStatistics ds = ms.getScores(); - MatchSet significantMatchingSet = MatchSetImpl.create(q); - - for (Match m : ms.getMatches()) { - double p = TestUtils.tTest(m.getScore(), ds); - if (p < alpha) { - m.setSignificance(p); - significantMatchingSet.add(m); - } - } - return ms; - } - - // additional layer of indirection above Impl, adds standard metadata - private MatchSet findMatchProfileAll(ProfileQuery q) throws IncoherentStateException { - long t1 = System.currentTimeMillis(); - MatchSet ms = findMatchProfileImpl(q); // implementing class - long t2 = System.currentTimeMillis(); - ms.setExecutionMetadata(ExecutionMetadataImpl.create(t1, t2)); - LOG.info("t(ms)="+ms.getExecutionMetadata().getDuration()); - MethodMetadata mmd = new MethodMetadata(); - mmd.methodName = getShortName(); - ms.setMethodMetadata(mmd); - return ms; - } - - public Match compareProfilePair(ProfileQuery q, ProfileQuery t) throws UnknownFilterException, IncoherentStateException { - AnonIndividualFilter filter = new AnonIndividualFilter(t); - q.setFilter(filter); - MatchSet matchSet = findMatchProfile(q); - return matchSet.getMatches().get(0); - } - - - // handling of anonymous individuals - - private boolean isAnonymousIndividual(String individualId) { - return individualId.startsWith(AnonIndividualFilter.PREFIX); - } - - - protected EWAHCompressedBitmap getDirectTypesBM(String individualId) { - if (isAnonymousIndividual(individualId)) { - Set cids = - AnonIndividualFilter.getClassIdsFromExpression(individualId); - return knowledgeBase.getClassesBM(cids); - } - else - return knowledgeBase.getDirectTypesBM(individualId); - } - protected EWAHCompressedBitmap getTypesBM(String individualId) { - if (isAnonymousIndividual(individualId)) { - Set cids = - AnonIndividualFilter.getClassIdsFromExpression(individualId); - return knowledgeBase.getSuperClassesBM(cids); - } - else - return knowledgeBase.getTypesBM(individualId); - } - - protected abstract MatchSet findMatchProfileImpl(ProfileQuery q) throws IncoherentStateException; + private Logger LOG = Logger.getLogger(AbstractProfileMatcher.class); + + protected BMKnowledgeBase knowledgeBase; + private FilterEngine filterEngine; + + /** + * @param knowledgeBase + */ + public AbstractProfileMatcher(BMKnowledgeBase knowledgeBase) { + super(); + this.knowledgeBase = knowledgeBase; + this.filterEngine = FilterEngine.create(knowledgeBase); + } + + /** + * @return ontology interface + */ + public BMKnowledgeBase getKnowledgeBase() { + return knowledgeBase; + } + + private void setKnowledgeBase(BMKnowledgeBase knowledgeBase) { + this.knowledgeBase = knowledgeBase; + } + + public void precompute() { + } + + /** + * all positive nodes in query plus their ancestors + * + * @param q + * @return + */ + protected EWAHCompressedBitmap getProfileBM(ProfileQuery q) { + return knowledgeBase.getSuperClassesBM(q.getQueryClassIds()); + } + + protected EWAHCompressedBitmap getDirectProfileBM(ProfileQuery q) { + Set positions = new HashSet(); + for (String cid : q.getQueryClassIds()) { + positions.add(knowledgeBase.getClassIndex(cid)); + } + return EWAHUtils.convertIndexSetToBitmap(positions); + } + + // given an array of class IDs c1...cn, return an array S1...Sn, + // where Si is the set of superclasses (direct and indirect) of ci, + // stored as a bitmap + protected EWAHCompressedBitmap[] getProfileSetBM(String[] qcids) { + EWAHCompressedBitmap[] bms = new EWAHCompressedBitmap[qcids.length]; + for (int i = 0; i < qcids.length; i++) { + String qc = qcids[i]; + Preconditions.checkNotNull(qc); + Preconditions.checkNotNull(knowledgeBase.getClassIndex(qc)); + bms[i] = knowledgeBase.getSuperClassesBM(qc); + } + return bms; + } + + // a negated profile implicitly includes subclasses + protected EWAHCompressedBitmap getNegatedProfileBM(ProfileQuery q) { + if (!(q instanceof QueryWithNegation)) { + return new EWAHCompressedBitmap(); + } + QueryWithNegation nq = (QueryWithNegation) q; + Set bits = new HashSet(); + for (String id : nq.getQueryNegatedClassIds()) { + int ci = knowledgeBase.getClassIndex(id); + bits.addAll(knowledgeBase.getSubClasses(ci).getPositions()); + } + return EWAHUtils.convertIndexSetToBitmap(bits); + } + + protected EWAHCompressedBitmap getDirectNegatedProfileBM(QueryWithNegation q) { + Set bits = new HashSet(); + // TODO: less dumb implementation... + for (String id : q.getQueryNegatedClassIds()) { + int ci = knowledgeBase.getClassIndex(id); + bits.add(ci); + } + return EWAHUtils.convertIndexSetToBitmap(bits); + } + + protected Match createMatch(String matchId, String matchLabel, double s) { + return MatchImpl.create(matchId, matchLabel, s); + } + + /** + * @param filter + * @return list of individuals that satisfy filter + * @throws UnknownFilterException + */ + protected List getFilteredIndividualIds(Filter filter) throws UnknownFilterException { + return filterEngine.applyFilter(filter); + } + + /* + * (non-Javadoc) + * + * @see org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher# + * createProfileQuery(java.lang.String) + */ + public ProfileQuery createProfileQuery(String individualId) { + return createProfileQuery(individualId, null); + } + + public ProfileQuery createPositiveProfileQuery(String individualId) { + return createProfileQuery(individualId, false); + } + + public ProfileQuery createProfileQueryWithNegation(String individualId) { + return createProfileQuery(individualId, true); + } + + public ProfileQuery createProfileQuery(String individualId, Boolean isUseNegation) { + Preconditions.checkNotNull(individualId); + EWAHCompressedBitmap bmi = knowledgeBase.getDirectTypesBM(individualId); + EWAHCompressedBitmap nbmi = knowledgeBase.getDirectNegatedTypesBM(individualId); + Set qcids = knowledgeBase.getClassIds(bmi); + Set nqcids = knowledgeBase.getClassIds(nbmi); + ProfileQuery q; + if (isUseNegation == null) { + if (nqcids.size() == 0) { + q = ProfileQueryImpl.create(qcids); + } else { + q = QueryWithNegationImpl.create(qcids, nqcids); + } + } else { + if (isUseNegation) { + q = QueryWithNegationImpl.create(qcids, nqcids); + } else { + q = ProfileQueryImpl.create(qcids); + } + } + return q; + } + + public ProfileQuery createProfileQueryFromClasses(Set qcids, Set nqcids) { + ProfileQuery q; + if (nqcids != null && nqcids.size() == 0) { + q = ProfileQueryImpl.create(qcids); + } else { + q = QueryWithNegationImpl.create(qcids, nqcids); + } + return q; + } + + public MatchSet findMatchProfile(String individualId) throws IncoherentStateException { + ProfileQuery q = createProfileQuery(individualId); + return findMatchProfile(q); + } + + public MatchSet findMatchProfile(ProfileQuery q) throws IncoherentStateException { + MatchSet ms = findMatchProfileAll(q); + int limit = q.getLimit() == null ? 200 : q.getLimit(); + if (limit > -1) { + ms.truncate(limit); + } + return ms; + } + + public MatchSet findMatchProfile(ProfileQuery q, double alpha) throws IncoherentStateException { + MatchSet ms = findMatchProfileAll(q); + + // use all matches as "background" + // TODO this is a naive assumption, needs refactor + DescriptiveStatistics ds = ms.getScores(); + MatchSet significantMatchingSet = MatchSetImpl.create(q); + + for (Match m : ms.getMatches()) { + double p = TestUtils.tTest(m.getScore(), ds); + if (p < alpha) { + m.setSignificance(p); + significantMatchingSet.add(m); + } + } + return ms; + } + + // additional layer of indirection above Impl, adds standard metadata + private MatchSet findMatchProfileAll(ProfileQuery q) throws IncoherentStateException { + long t1 = System.currentTimeMillis(); + MatchSet ms = findMatchProfileImpl(q); // implementing class + long t2 = System.currentTimeMillis(); + ms.setExecutionMetadata(ExecutionMetadataImpl.create(t1, t2)); + LOG.info("t(ms)=" + ms.getExecutionMetadata().getDuration()); + MethodMetadata mmd = new MethodMetadata(); + mmd.methodName = getShortName(); + ms.setMethodMetadata(mmd); + return ms; + } + + public Match compareProfilePair(ProfileQuery q, ProfileQuery t) + throws UnknownFilterException, IncoherentStateException { + AnonIndividualFilter filter = new AnonIndividualFilter(t); + q.setFilter(filter); + MatchSet matchSet = findMatchProfile(q); + return matchSet.getMatches().get(0); + } + + // handling of anonymous individuals + + private boolean isAnonymousIndividual(String individualId) { + return individualId.startsWith(AnonIndividualFilter.PREFIX); + } + + protected EWAHCompressedBitmap getDirectTypesBM(String individualId) { + if (isAnonymousIndividual(individualId)) { + Set cids = AnonIndividualFilter.getClassIdsFromExpression(individualId); + return knowledgeBase.getClassesBM(cids); + } else + return knowledgeBase.getDirectTypesBM(individualId); + } + + protected EWAHCompressedBitmap getTypesBM(String individualId) { + if (isAnonymousIndividual(individualId)) { + Set cids = AnonIndividualFilter.getClassIdsFromExpression(individualId); + return knowledgeBase.getSuperClassesBM(cids); + } else + return knowledgeBase.getTypesBM(individualId); + } + + protected abstract MatchSet findMatchProfileImpl(ProfileQuery q) throws IncoherentStateException; } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java index 6adf28a..0b8f0ca 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java @@ -7,32 +7,27 @@ import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; /** - * common methods and variables for all ProfileMatcher that - * implement semantic similarity techniques, i.e. those involving a MRCA + * common methods and variables for all ProfileMatcher that implement semantic + * similarity techniques, i.e. those involving a MRCA * * @author cjm * */ public abstract class AbstractSemanticSimilarityProfileMatcher extends AbstractProfileMatcher { - - //private Logger LOG = Logger.getLogger(AbstractSemanticSimilarityProfileMatcher.class); - private MostInformativeCommonAncestorCalculator micaCalculator; - + // private Logger LOG = + // Logger.getLogger(AbstractSemanticSimilarityProfileMatcher.class); + private MostInformativeCommonAncestorCalculator micaCalculator; /** * @param knowledgeBase */ - @Inject // TODO - public AbstractSemanticSimilarityProfileMatcher( - BMKnowledgeBase knowledgeBase) { + public AbstractSemanticSimilarityProfileMatcher(BMKnowledgeBase knowledgeBase) { super(knowledgeBase); micaCalculator = new MostInformativeCommonAncestorCalculatorImpl(knowledgeBase); } - - /** * @return object used for calculation of most informative common ancestors */ @@ -43,10 +38,8 @@ public MostInformativeCommonAncestorCalculator getMicaCalculator() { /** * @param micaCalculator */ - private void setMicaCalculator( - MostInformativeCommonAncestorCalculator micaCalculator) { + private void setMicaCalculator(MostInformativeCommonAncestorCalculator micaCalculator) { this.micaCalculator = micaCalculator; } - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java index 4aee08b..580dd3b 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java @@ -21,28 +21,31 @@ import com.googlecode.javaewah.EWAHCompressedBitmap; /** - * Calculate probability of observing query (e.g. patient profile) given target as evidence. + * Calculate probability of observing query (e.g. patient profile) given target + * as evidence. * - * This implementation does not explicitly model NOTs, it - * uses a {@link TwoStateConditionalProbabilityIndex}. - * The two states are ON (true/observed) and OFF (unknown/not observed) - * - note the open world assumptions: that the off state means there is no - * information about the truth of the node, it does not mean the node is false. + * This implementation does not explicitly model NOTs, it uses a + * {@link TwoStateConditionalProbabilityIndex}. The two states are ON + * (true/observed) and OFF (unknown/not observed) - note the open world + * assumptions: that the off state means there is no information about the truth + * of the node, it does not mean the node is false. * - * Although we do not model negation as a 3rd state, we still compute on negation, post-hoc, see below. + * Although we do not model negation as a 3rd state, we still compute on + * negation, post-hoc, see below. * *

Calculating probabilities

*

Calculating probabilities for a single query node

* - * Using a {@link TwoStateConditionalProbabilityIndex}, - * probabilities propagate TO a child FROM its parents. + * Using a {@link TwoStateConditionalProbabilityIndex}, probabilities propagate + * TO a child FROM its parents. * * If the query node is ON, and the node is ON in the target, then Pr = 1-fnr; - * otherwise the probability is calculated based on the probability of the parents. + * otherwise the probability is calculated based on the probability of the + * parents. * * - * The probability of a child node being on C=on is dependent on the state of its - * parents; we sum over 2N states + * The probability of a child node being on C=on is dependent on the state of + * its parents; we sum over 2N states * * * @@ -56,28 +59,31 @@ * * * - * For any given query Q=Q1,...Qm, we assume independent probabilities - * and calculate Pr(Q) = Pq(Q1=on,...,Qm=on) + * For any given query Q=Q1,...Qm, we assume independent probabilities and + * calculate Pr(Q) = Pq(Q1=on,...,Qm=on) * *

Negation

* - * Each node can only have two states in this model; the off state can be thought of - * as being the 'unknown' state. We assume an open world assumption. The absence of - * a node in the query should be thought of as 'not observed' rather than 'not'. + * Each node can only have two states in this model; the off state can be + * thought of as being the 'unknown' state. We assume an open world assumption. + * The absence of a node in the query should be thought of as 'not observed' + * rather than 'not'. * - * We still include negation in the calculation; for any negated query node i, we - * calculate Pr(i) = ON, and assign a final probability of 1-fnr (this is the only circumstance - * a fnr can have an effect, since we have the open world model). + * We still include negation in the calculation; for any negated query node i, + * we calculate Pr(i) = ON, and assign a final probability of 1-fnr (this is the + * only circumstance a fnr can have an effect, since we have the open world + * model). * - * Similarly, for any negated target node j, the Pr of any query under this will be 1-fpr + * Similarly, for any negated target node j, the Pr of any query under this will + * be 1-fpr * *

TODOs

* - * Currently this method is too slow to be used for dynamic queries, taking 1-5s per query. - * Some efficiency could be gained by calculating with log-probs. + * Currently this method is too slow to be used for dynamic queries, taking 1-5s + * per query. Some efficiency could be gained by calculating with log-probs. * - * If we cache probabilities per-node for every target, we would gain a lot of speed, - * space = NumClasses x NumTargets + * If we cache probabilities per-node for every target, we would gain a lot of + * speed, space = NumClasses x NumTargets * * * @author cjm @@ -90,18 +96,19 @@ double falseNegativeRate = 0.01; // TODO - do not hardcode double falsePositiveRate = 0.01; // TODO - do not hardcode - ConditionalProbabilityIndex cpi = null; // index of Pr(Node={on,off}|ParentsState) + ConditionalProbabilityIndex cpi = null; // index of + // Pr(Node={on,off}|ParentsState) @Deprecated private Calculator[] calculatorCache; private Double[][] targetClassProbabilityCache; - @Inject + @Inject private BayesianNetworkProfileMatcher(BMKnowledgeBase kb) { super(kb); int N = kb.getIndividualIdsInSignature().size(); calculatorCache = new Calculator[N]; - for (int i=0; i negatedQueryClassIds = null; if (isUseNegation) { LOG.info("Using QueryWithNegation"); - QueryWithNegation nq = (QueryWithNegation)q; + QueryWithNegation nq = (QueryWithNegation) q; negatedQueryProfileBM = getDirectNegatedProfileBM(nq); negatedQueryClassIds = knowledgeBase.getClassIds(negatedQueryProfileBM); - LOG.info("nqp=" + negatedQueryProfileBM+" // "+negatedQueryClassIds); - } - else { + LOG.info("nqp=" + negatedQueryProfileBM + " // " + negatedQueryClassIds); + } else { LOG.info("Not using QueryWithNegation"); } @@ -170,22 +176,22 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { double pvector[] = new double[indIds.size()]; String indArr[] = new String[indIds.size()]; - int n=0; + int n = 0; // TODO - FOR DEBUGGING ONLY - // int nc=0; - // for (String itemId : indIds) { - // int indIx = knowledgeBase.getIndividualIndex(itemId); - // if (targetClassProbabilityCache[indIx] != null) { - // Double[] a = targetClassProbabilityCache[indIx]; - // for (int i=0; i debugMaxP) { debugMaxP = p; } - + if (Double.isNaN(p)) { - LOG.error("NaN for tgt "+itemId); + LOG.error("NaN for tgt " + itemId); } // NEGATION if (negatedQueryProfileBM != null) { double np = 1 - calc.calculateProbability(negatedQueryClassIds); - //LOG.info("Combined Probability = (POS) "+p+" * (NEG) "+np); - p = p*np; + // LOG.info("Combined Probability = (POS) "+p+" * (NEG) "+np); + p = p * np; } pvector[n] = p; indArr[n] = itemId; sumOfProbs += p; n++; - //LOG.info("p for "+itemId+" = "+p); + // LOG.info("p for "+itemId+" = "+p); } if (sumOfProbs == 0.0) { LOG.error("sumOfProds=0.0"); @@ -236,7 +241,7 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { } int tempNumNans = 0; - for (n = 0; n 0) { - LOG.error("#NaNs "+tempNumNans+" / "+pvector.length); - LOG.error("maxPr = "+debugMaxP); + LOG.error("#NaNs " + tempNumNans + " / " + pvector.length); + LOG.error("maxPr = " + debugMaxP); } mp.sortMatches(); return mp; } /** - * We wrap calculation within a class to allow for cacheing relative to - * a particular targetProfile + * We wrap calculation within a class to allow for cacheing relative to a + * particular targetProfile * * @author cjm * @@ -276,17 +281,16 @@ public Calculator(EWAHCompressedBitmap targetProfileBM, EWAHCompressedBitmap neg /** * Top-level call * - * Calculate the probability of all queryClasses being on, - * given the nodes in the target profile are not + * Calculate the probability of all queryClasses being on, given the + * nodes in the target profile are not * - * Note: currently this is asymmetric; ie we do not calculate - * the probability of the target given the query nodes are on; - * this has the effect of penalizing large queries; for a fixed - * query this is not an issue. However, it also does *not* penalize - * broad-spectrum targets. + * Note: currently this is asymmetric; ie we do not calculate the + * probability of the target given the query nodes are on; this has the + * effect of penalizing large queries; for a fixed query this is not an + * issue. However, it also does *not* penalize broad-spectrum targets. * - * This also means the FNR is meaningless, - * unless negation is explicitly used + * This also means the FNR is meaningless, unless negation is explicitly + * used * * @param queryClassIds * @param targetProfileBM @@ -298,23 +302,25 @@ public double calculateProbability(Set queryClassIds) { // treat set of query class Ids as a leaf node that is the // class intersection of all members; ie q1^...^qn // for a class intersection, the CPT is always such that - // Pr=1.0, if all parents=1 - // Pr=0.0 otherwise + // Pr=1.0, if all parents=1 + // Pr=0.0 otherwise for (String queryClassId : queryClassIds) { double p = calculateProbability(queryClassId); if (Double.isNaN(p)) { - LOG.error("NaN for qc="+queryClassId); + LOG.error("NaN for qc=" + queryClassId); } // NEGATION - // the FNR only comes into play if negation is explicitly specified. - // If the query is on but a superclass in the target has been negated, + // the FNR only comes into play if negation is explicitly + // specified. + // If the query is on but a superclass in the target has been + // negated, // we assume the query is a false positive if (negatedTargetProfileBM != null) { if (knowledgeBase.getSuperClassesBM(queryClassId).andCardinality(negatedTargetProfileBM) > 0) { - LOG.info("NEGATIVE EVIDENCE for "+queryClassId); - p *= falsePositiveRate; + LOG.info("NEGATIVE EVIDENCE for " + queryClassId); + p *= falsePositiveRate; } } cump *= p; @@ -323,8 +329,8 @@ public double calculateProbability(Set queryClassIds) { } /** - * probability of queryClass being true, given that all - * nodes in target profile are on + * probability of queryClass being true, given that all nodes in target + * profile are on * * @param queryClassId * @param targetProfileBM @@ -339,53 +345,55 @@ private double calculateProbability(String queryClassId) { /** * Calculate the probability that a node qc is ON. * - * - If this is specified in the query, then a set value is returned (1-FP); - * - If not specified, equal to sum of probabilities of all states of parents - * - * Side effects: caches probability + * - If this is specified in the query, then a set value is returned + * (1-FP); - If not specified, equal to sum of probabilities of all + * states of parents + * + * Side effects: caches probability * * @param qcix * @return Pr(Qi=on|T) */ private double calculateProbability(int qcix) { if (probCache[qcix] != null) { - LOG.debug("Using cached for "+qcix); + LOG.debug("Using cached for " + qcix); return probCache[qcix]; } BMKnowledgeBase kb = getKnowledgeBase(); - LOG.debug("Calculating probability for "+qcix+" ie "+kb.getClassId(qcix)); + LOG.debug("Calculating probability for " + qcix + " ie " + kb.getClassId(qcix)); double probQiGivenT; - // TODO - optimization: determine efficiency of using get(ix) vs other methods + // TODO - optimization: determine efficiency of using get(ix) vs + // other methods if (targetProfileBM.get(qcix)) { LOG.debug("Qi is in target profile"); - probQiGivenT = 1-falsePositiveRate; - } - else { + probQiGivenT = 1 - falsePositiveRate; + } else { // Qi is NOT in target profile; - // Pr(Qi=on | T) = Pr(QiP1=on, QiP2=on, ..|T)Pr(on on...) + Pr(QiP1=off, ...) + // Pr(Qi=on | T) = Pr(QiP1=on, QiP2=on, ..|T)Pr(on on...) + + // Pr(QiP1=off, ...) List pixs = kb.getDirectSuperClassesBM(qcix).getPositions(); double[] parentProbs = new double[pixs.size()]; LOG.debug("calculating probabilities for parents"); - for (int i=0; i qClassIds = q.getQueryClassIds(); int qsize = qClassIds.size(); queryClassArray = qClassIds.toArray(new String[qsize]); EWAHCompressedBitmap queryProfileBMArr[] = getProfileSetBM(queryClassArray); - - MatchSet mp = MatchSetImpl.create(q); - + + MatchSet mp = MatchSetImpl.create(q); + List indIds = getFilteredIndividualIds(q.getFilter()); for (String itemId : indIds) { EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - //LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); - + // LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); + double score = 0; ClassInformationContentPair[] qmatchArr = new ClassInformationContentPair[qsize]; - for (int j = 0; j indIds = getFilteredIndividualIds(q.getFilter()); for (String itemId : indIds) { EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - - //LOG.info("TARGET PROFILE for "+itemId+" "+targetProfileBM); + + // LOG.info("TARGET PROFILE for "+itemId+" "+targetProfileBM); int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); int numInQueryOrInTarget = queryProfileBM.orCardinality(targetProfileBM); double j = numInQueryAndInTarget / (double) numInQueryOrInTarget; @@ -75,8 +73,4 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) throws UnknownFilterExcepti return mp; } - - - - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java index f2cd581..a1f59ce 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java @@ -15,27 +15,26 @@ import com.googlecode.javaewah.EWAHCompressedBitmap; /** - * Given a query profile (a set of classes c1, .., cn) return a match profile, - * where each candidate individual is assigned a maximum Information Content score + * Given a query profile (a set of classes c1, .., cn) return a match profile, + * where each candidate individual is assigned a maximum Information Content + * score * * @author cjm * */ -public class MaximumInformationContentSimilarityProfileMatcher - extends AbstractSemanticSimilarityProfileMatcher - implements ProfileMatcher { - +public class MaximumInformationContentSimilarityProfileMatcher extends AbstractSemanticSimilarityProfileMatcher + implements ProfileMatcher { + private Logger LOG = Logger.getLogger(MaximumInformationContentSimilarityProfileMatcher.class); - /** * @param kb */ - @Inject + @Inject private MaximumInformationContentSimilarityProfileMatcher(BMKnowledgeBase kb) { super(kb); } - + /** * @param kb * @return new instance @@ -43,7 +42,7 @@ private MaximumInformationContentSimilarityProfileMatcher(BMKnowledgeBase kb) { public static MaximumInformationContentSimilarityProfileMatcher create(BMKnowledgeBase kb) { return new MaximumInformationContentSimilarityProfileMatcher(kb); } - + @Override public String getShortName() { return "max-information"; @@ -54,21 +53,20 @@ public String getShortName() { * @return match profile containing probabilities of each individual */ public MatchSet findMatchProfileImpl(ProfileQuery q) { - + EWAHCompressedBitmap queryProfileBM = getProfileBM(q); - //LOG.info("QUERY PROFILE for "+q+" "+queryProfileBM.getPositions()); - - MatchSet mp = MatchSetImpl.create(q); - + // LOG.info("QUERY PROFILE for "+q+" "+queryProfileBM.getPositions()); + + MatchSet mp = MatchSetImpl.create(q); + List indIds = getFilteredIndividualIds(q.getFilter()); for (String itemId : indIds) { EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - - //LOG.info("TARGET PROFILE for "+itemId+" "+targetProfileBM); - ClassInformationContentPair mica = - getMicaCalculator().getMostInformativeCommonAncestorWithIC(queryProfileBM, - targetProfileBM); - //LOG.info("mica="+mica); + + // LOG.info("TARGET PROFILE for "+itemId+" "+targetProfileBM); + ClassInformationContentPair mica = getMicaCalculator() + .getMostInformativeCommonAncestorWithIC(queryProfileBM, targetProfileBM); + // LOG.info("mica="+mica); String label = knowledgeBase.getLabelMapper().getArbitraryLabel(itemId); mp.add(createMatch(itemId, label, mica.ic)); } @@ -76,5 +74,4 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { return mp; } - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java index afcf28c..7f13bc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java @@ -213,7 +213,7 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { // any node which has an off query parent is discounted //EWAHCompressedBitmap maskedTargetProfileBM = nodesHtBM.and(queryBlanketProfileBM); - LOG.info("TARGET PROFILE for "+itemId+" "+nodesHtBM); + //LOG.info("TARGET PROFILE for "+itemId+" "+nodesHtBM); // cumulative log-probability double logp = 0.0; @@ -398,7 +398,7 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { indArr[n] = itemId; sumOfProbs += p; n++; - LOG.info("logp for "+itemId+" = "+logp+" sumOfLogProbs="+sumOfProbs); + //LOG.info("logp for "+itemId+" = "+logp+" sumOfLogProbs="+sumOfProbs); } for (n = 0; n> individualToInterpretationToTypesBM = new HashMap<>(); - @Inject + @Inject protected NaiveBayesFixedWeightTwoStateProfileMatcher(BMKnowledgeBase kb) { super(kb); } @@ -70,8 +99,31 @@ public boolean isUseBlanket() { public String getShortName() { return "naive-bayes-fixed-weight-two-state"; } + + /** + * @return the kLeastFrequent + */ + public int getkLeastFrequent() { + return kLeastFrequent; + } + + /** + * The default for this should be 0. When 0, the behavior is as for frequency unaware + * (i.e. every instance-class association with frequency info will be treated as normal instance-class) + * + * When k>1, will make use of the k least frequent annotations in probabilistic calculation + * + * @param kLeastFrequent the kLeastFrequent to set + */ + public void setkLeastFrequent(int kLeastFrequent) { + // reset cache + individualToInterpretationToTypesBM = new HashMap<>(); + this.kLeastFrequent = kLeastFrequent; + } + + /** * Extends the query profile - for every node c, all the direct parents of c are in * the query profile, then add c to the query profile. * @@ -132,50 +184,82 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { double pvector[] = new double[indIds.size()]; String indArr[] = new String[indIds.size()]; int n=0; + + for (String itemId : indIds) { - EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - // any node which has an off query parent is discounted - targetProfileBM = targetProfileBM.and(queryBlanketProfileBM); - LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); - - - // two state model. - // mapping to Bauer et al: these correspond to mxy1, x=Q, y=H/T - int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); - int numInQueryAndNOTInTarget = queryProfileBM.andNotCardinality(targetProfileBM); - int numNOTInQueryAndInTarget = targetProfileBM.andNotCardinality(queryProfileBM); - int numNOTInQueryAndNOTInTarget = - numClassesConsidered - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); - - double p = 0.0; - // TODO: optimize this - // integrate over a Dirichlet prior for alpha & beta, rather than gridsearch - // this can be done closed-form - for (double fnr : defaultFalseNegativeRateArr) { - for (double fpr : defaultFalsePositiveRateArr) { - - double pQ1T1 = Math.pow(1-fnr, numInQueryAndInTarget); - double pQ0T1 = Math.pow(fnr, numNOTInQueryAndInTarget); - double pQ1T0 = Math.pow(fpr, numInQueryAndNOTInTarget); - double pQ0T0 = Math.pow(1-fpr, numNOTInQueryAndNOTInTarget); - - - - //LOG.debug("pQ1T1 = "+(1-fnr)+" ^ "+ numInQueryAndInTarget+" = "+pQ1T1); - //LOG.debug("pQ0T1 = "+(fnr)+" ^ "+ numNOTInQueryAndInTarget+" = "+pQ0T1); - //LOG.debug("pQ1T0 = "+(fpr)+" ^ "+ numInQueryAndNOTInTarget+" = "+pQ1T0); - //LOG.debug("pQ0T0 = "+(1-fpr)+" ^ "+ numNOTInQueryAndNOTInTarget+" = "+pQ0T0); - //TODO: optimization. We can precalculate the logs for different integers - p += - Math.exp(Math.log(pQ1T1) + Math.log(pQ0T1) + Math.log(pQ1T0) + Math.log(pQ0T0)); - - } - } - pvector[n] = p; - indArr[n] = itemId; - sumOfProbs += p; + + int effectiveK = kLeastFrequent; + int twoToTheK = (int) Math.pow(2, kLeastFrequent); + int numWeightedTypes = knowledgeBase.getDirectWeightedTypes(itemId).size(); + if (numWeightedTypes < kLeastFrequent) { + twoToTheK = (int) Math.pow(2, numWeightedTypes); + effectiveK = numWeightedTypes; + } + + double cumulativePr = 0; + for (int comboIndex = 0; comboIndex < twoToTheK; comboIndex++) { + + Double comboPr = null; + EWAHCompressedBitmap targetProfileBM; + if (kLeastFrequent == 0) { + targetProfileBM = knowledgeBase.getTypesBM(itemId); + } + else { + WeightedTypesBM wtbm = getTypesFrequencyAware(itemId, comboIndex, effectiveK); + comboPr = wtbm.weight; + targetProfileBM = wtbm.typesBM; + } + + // any node which has an off query parent is discounted + targetProfileBM = targetProfileBM.and(queryBlanketProfileBM); + LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); + + + // two state model. + // mapping to Bauer et al: these correspond to mxy1, x=Q, y=H/T + int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); + int numInQueryAndNOTInTarget = queryProfileBM.andNotCardinality(targetProfileBM); + int numNOTInQueryAndInTarget = targetProfileBM.andNotCardinality(queryProfileBM); + int numNOTInQueryAndNOTInTarget = + numClassesConsidered - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); + + double p = 0.0; + // TODO: optimize this + // integrate over a Dirichlet prior for alpha & beta, rather than gridsearch + // this can be done closed-form + for (double fnr : defaultFalseNegativeRateArr) { + for (double fpr : defaultFalsePositiveRateArr) { + + double pQ1T1 = Math.pow(1-fnr, numInQueryAndInTarget); + double pQ0T1 = Math.pow(fnr, numNOTInQueryAndInTarget); + double pQ1T0 = Math.pow(fpr, numInQueryAndNOTInTarget); + double pQ0T0 = Math.pow(1-fpr, numNOTInQueryAndNOTInTarget); + + + + //LOG.debug("pQ1T1 = "+(1-fnr)+" ^ "+ numInQueryAndInTarget+" = "+pQ1T1); + //LOG.debug("pQ0T1 = "+(fnr)+" ^ "+ numNOTInQueryAndInTarget+" = "+pQ0T1); + //LOG.debug("pQ1T0 = "+(fpr)+" ^ "+ numInQueryAndNOTInTarget+" = "+pQ1T0); + //LOG.debug("pQ0T0 = "+(1-fpr)+" ^ "+ numNOTInQueryAndNOTInTarget+" = "+pQ0T0); + //TODO: optimization. We can precalculate the logs for different integers + p += + Math.exp(Math.log(pQ1T1) + Math.log(pQ0T1) + Math.log(pQ1T0) + Math.log(pQ0T0)); + + } + } + + if (comboPr != null) { + p *= comboPr; + } + cumulativePr += p; + } + pvector[n] = cumulativePr; + indArr[n] = itemId; + + sumOfProbs += cumulativePr; n++; - LOG.debug("p for "+itemId+" = "+p); + LOG.debug("p for "+itemId+" = "+cumulativePr); + } for (n = 0; n()); + } + Map m = individualToInterpretationToTypesBM.get(iix); + if (m.containsKey(n)) { + // use cached value + return m.get(n); + } + + // default direct type map. + // note that associations with frequency annotations are includes here alongside + // normal associations + EWAHCompressedBitmap dtmap = knowledgeBase.getDirectTypesBM(itemId); + + // associations with frequency info + // map is from ClassIndex -> Weight + Map wmap = knowledgeBase.getDirectWeightedTypes(itemId); + + // sort with least frequent first + List sortedTypeIndices = new ArrayList<>(wmap.keySet()); + sortedTypeIndices.sort( (Integer i, Integer j) -> wmap.get(i) - wmap.get(j)); + + EWAHCompressedBitmap mask = new EWAHCompressedBitmap(); + double pr = 1.0; + for (int i=0; i< effectiveK; i++) { + Integer iClassIx = sortedTypeIndices.get(i); + Double w = wmap.get(iClassIx) / 100.0; + //LOG.info("Class "+iClassIx +" which is "+i+"-least frequent has weight "+w+" for individual "+itemId+" in combo "+n); + if ( (n >> i) % 2 == 0) { + mask.set(iClassIx); + pr *= 1-w; + } + else { + pr *= w; + } + } + //LOG.info("Instance "+itemId+" in combo "+n+" has Pr = "+pr); + + EWAHCompressedBitmap dtmapMasked = dtmap.xor(mask); + EWAHCompressedBitmap inferredTypesBM = knowledgeBase.getSuperClassesBM(dtmapMasked); + WeightedTypesBM wtbm = new WeightedTypesBM(inferredTypesBM, pr); + m.put(n, wtbm); + return wtbm; + } /** * @return probability a query class is a false positive diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java index b213fc2..8084cc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java @@ -15,10 +15,10 @@ import com.googlecode.javaewah.EWAHCompressedBitmap; /** - * Given a query profile (a set of classes c1, .., cn) return a match profile, + * Given a query profile (a set of classes c1, .., cn) return a match profile, * where each candidate individual is assigned a probability of being the match, - * based on multiplying the probabilities of the set of all classes being on/off, given - * the item is true. + * based on multiplying the probabilities of the set of all classes being + * on/off, given the item is true. * * TODO: this is INCOMPLETE * @@ -29,13 +29,14 @@ private Logger LOG = Logger.getLogger(NaiveBayesVariableWeightProfileMatcher.class); - private double[][] likelihoods; // [label_j][feature_i] p( feature_i | label_j ) + private double[][] likelihoods; // [label_j][feature_i] p( feature_i | + // label_j ) private double[] priors; // p(label_j) /** * @param kb */ - @Inject + @Inject public NaiveBayesVariableWeightProfileMatcher(BMKnowledgeBase kb) { super(kb); } @@ -61,46 +62,40 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { EWAHCompressedBitmap queryProfileBM = getProfileBM(q); int[] qvector = bmToVector(queryProfileBM, knowledgeBase.getNumClassNodes()); + // LOG.info("QUERY PROFILE for "+q+" "+queryProfileBM.getPositions()); - //LOG.info("QUERY PROFILE for "+q+" "+queryProfileBM.getPositions()); - - MatchSet mp = MatchSetImpl.create(q); + MatchSet mp = MatchSetImpl.create(q); List indIds = getFilteredIndividualIds(q.getFilter()); double sumOfProbs = 0; double[] pvector = new double[indIds.size()]; - String[] indIdsVector = new String[indIds.size()]; + String[] indIdsVector = new String[indIds.size()]; int localItemIndex = 0; for (String itemId : indIds) { LOG.info(itemId); EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - // TODO - should not need this; tvector already used to calculate likelihoods + // TODO - should not need this; tvector already used to calculate + // likelihoods int[] tvector = bmToVector(targetProfileBM, knowledgeBase.getNumClassNodes()); int j = knowledgeBase.getIndividualIndex(itemId); double[] likelihoodsForItem = likelihoods[j]; double logpsum = 0; - for (int i=0; i qClassIds = q.getQueryClassIds(); int qsize = qClassIds.size(); queryClassArray = qClassIds.toArray(new String[qsize]); - + // array (in same order as queryClassArray) in which each element // is the set of superclasses of the indexed class EWAHCompressedBitmap queryProfileBMArr[] = getProfileSetBM(queryClassArray); EWAHCompressedBitmap queryProfileBM = getProfileBM(q); - MatchSet mp = MatchSetImpl.create(q); - + MatchSet mp = MatchSetImpl.create(q); + // --- // calculate optimal match, based on matching of profile to itself; - // has two components, maxIC and average of each phenotype in profile to itself + // has two components, maxIC and average of each phenotype in profile to + // itself double maxScoreOfOptimalTarget = getScore(queryProfileBM, queryProfileBM); double avgScoreOfOptimalTarget = 0; - - for (int j = 0; j indIds = getFilteredIndividualIds(q.getFilter()); for (String itemId : indIds) { EWAHCompressedBitmap targetProfileBM = getTypesBM(itemId); - + // calculate maximum IC double maxScore = getScore(queryProfileBM, targetProfileBM); - + EWAHCompressedBitmap targetProfileDirectBM = getDirectTypesBM(itemId); int tsize = targetProfileDirectBM.cardinality(); - + // note: this is an experimental implementation that // does not make use of a MICA cache; it may be replaced by // a version that uses a cache later. double score = 0; // find best match for every class j in query profile - for (int j = 0; j targetToQueryCache; - @Inject + @Inject private ThreeStateBayesianNetworkProfileMatcher(BMKnowledgeBase kb) { super(kb); } @@ -191,7 +191,7 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) throws IncoherentStateExcep indArr[n] = itemId; sumOfProbs += p; n++; - LOG.info("p for "+itemId+" = "+p); + //LOG.info("p for "+itemId+" = "+p); } for (n = 0; n lines = FileUtils.readLines(f); - for (String line : lines) { - String[] vals = line.split("\t", 2); - String[] terms = vals[1].split(";"); - for (String t : terms) { - addInstanceOf(vals[0], t); - } - } - Preconditions.checkNotNull(owlDataOntology); - } - - public void loadDataFromTsvGzip(String path) throws OWLOntologyCreationException, IOException { - GZIPInputStream gis = new GZIPInputStream(new FileInputStream(path)); - BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); - String line; - while ((line = bf.readLine()) != null) { - String[] vals = line.split("\t", 2); - String[] terms = vals[1].split(";"); - for (String t : terms) { - addInstanceOf(vals[0], t); - } - } - Preconditions.checkNotNull(owlDataOntology); - } - - - - private IRI getIRI(String id) { - // TODO - use json-ld - if (id.contains(":")) { - return IRI.create("http://purl.obolibrary.org/obo/" + id.replace(":", "_")); - } else { - return IRI.create(id); - } - } - - private void mergeOntology(OWLOntology o) { - if (owlOntology == null) { - LOG.info("Ont ontology=" + o); - owlOntology = o; - } else { - LOG.info("Merging ont axioms from=" + o); - owlOntology.getOWLOntologyManager().addAxioms(owlOntology, o.getAxioms()); - } - } - - private void addInstanceOf(String i, String c) { - if (owlDataOntology == null) { - owlDataOntology = owlOntology; - } - OWLDataFactory f = manager.getOWLDataFactory(); - OWLClassAssertionAxiom ax = - f.getOWLClassAssertionAxiom(f.getOWLClass(getIRI(c)), f.getOWLNamedIndividual(getIRI(i))); - manager.addAxiom(owlOntology, ax); - } - - - private void mergeData(OWLOntology o) { - if (owlDataOntology == null) { - LOG.info("Data ontology=" + o); - owlDataOntology = o; - } else { - LOG.info("Merging data axioms from=" + o); - owlDataOntology.getOWLOntologyManager().addAxioms(owlDataOntology, o.getAxioms()); - } - } - - private OWLOntologyManager getOWLOntologyManager() { - if (manager == null) - manager = OWLManager.createOWLOntologyManager(); - return manager; - } - - /** - * @return handle for a Bitmap-based Knowledge Base - */ - public BMKnowledgeBase createKnowledgeBaseInterface() { - // TODO: use factories, or injection - return BMKnowledgeBaseOWLAPIImpl.create(owlOntology, owlDataOntology, owlReasonerFactory, - curieUtil); - } - + private Logger LOG = Logger.getLogger(OWLLoader.class); + + OWLOntologyManager manager; + OWLOntology owlOntology; + OWLOntology owlDataOntology; + OWLReasoner owlReasoner; + OWLReasonerFactory owlReasonerFactory = new ElkReasonerFactory(); + CurieUtil curieUtil = new CurieUtil(new HashMap()); + + /** + * @param iri + * @return OWL Ontology + * @throws OWLOntologyCreationException + */ + public OWLOntology loadOWL(IRI iri) throws OWLOntologyCreationException { + return getOWLOntologyManager().loadOntology(iri); + } + + /** + * @param file + * @return OWL Ontology + * @throws OWLOntologyCreationException + */ + public OWLOntology loadOWL(File file) throws OWLOntologyCreationException { + IRI iri = IRI.create(file); + return getOWLOntologyManager().loadOntologyFromOntologyDocument(iri); + } + + /** + * Loads an OWL ontology from a URI or file + * + * @param path + * @return OWL Ontology + * @throws OWLOntologyCreationException + */ + public OWLOntology loadOWL(String path) throws OWLOntologyCreationException { + if (path.startsWith("http")) { + return loadOWL(IRI.create(path)); + } else { + File file = new File(path); + return loadOWL(file); + } + } + + /** + * @param iri + * @throws OWLOntologyCreationException + */ + public void load(IRI iri) throws OWLOntologyCreationException { + owlOntology = getOWLOntologyManager().loadOntology(iri); + Preconditions.checkNotNull(owlOntology); + } + + /** + * @param file + * @throws OWLOntologyCreationException + */ + public void load(File file) throws OWLOntologyCreationException { + owlOntology = loadOWL(file); + Preconditions.checkNotNull(owlOntology); + } + + public void loadGzippdOntology(String path) + throws FileNotFoundException, IOException, OWLOntologyCreationException { + GZIPInputStream gis = new GZIPInputStream(new FileInputStream(path)); + BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); + owlOntology = getOWLOntologyManager().loadOntologyFromOntologyDocument(gis); + Preconditions.checkNotNull(owlOntology); + } + + /** + * Loads an OWL ontology from a URI or file + * + * @param path + * @throws OWLOntologyCreationException + */ + public void load(String path) throws OWLOntologyCreationException { + owlOntology = loadOWL(path); + Preconditions.checkNotNull(owlOntology); + } + + /** + * Loads OWL ontologies from a URI or file + * + * @param path + * @throws OWLOntologyCreationException + */ + public void loadOntologies(String... paths) throws OWLOntologyCreationException { + for (String path : paths) + mergeOntology(loadOWL(path)); + Preconditions.checkNotNull(owlOntology); + } + + /** + * Loads an OWL ontology from a URI or file + * + * @param path + * @throws OWLOntologyCreationException + */ + public void loadData(String... paths) throws OWLOntologyCreationException { + for (String path : paths) + mergeData(loadOWL(path)); + Preconditions.checkNotNull(owlDataOntology); + } + + public void loadDataFromTsv(String path) throws OWLOntologyCreationException, IOException { + File f = new File(path); + // Files.readLines(f, Charset.defaultCharset()); + List lines = FileUtils.readLines(f); + for (String line : lines) { + String[] vals = line.split("\t", 2); + String[] terms = vals[1].split(";"); + for (String t : terms) { + addInstanceOf(vals[0], t); + } + } + Preconditions.checkNotNull(owlDataOntology); + } + + public void loadDataFromTsvGzip(String path) throws OWLOntologyCreationException, IOException { + GZIPInputStream gis = new GZIPInputStream(new FileInputStream(path)); + BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); + String line; + while ((line = bf.readLine()) != null) { + String[] vals = line.split("\t", 2); + String[] terms = vals[1].split(";"); + for (String t : terms) { + addInstanceOf(vals[0], t); + } + } + Preconditions.checkNotNull(owlDataOntology); + } + + private IRI getIRI(String id) { + // TODO - use json-ld + if (id.contains(":")) { + return IRI.create("http://purl.obolibrary.org/obo/" + id.replace(":", "_")); + } else { + return IRI.create(id); + } + } + + private void mergeOntology(OWLOntology o) { + if (owlOntology == null) { + LOG.info("Ont ontology=" + o); + owlOntology = o; + } else { + LOG.info("Merging ont axioms from=" + o); + owlOntology.getOWLOntologyManager().addAxioms(owlOntology, o.getAxioms()); + } + } + + private void addInstanceOf(String i, String c) { + if (owlDataOntology == null) { + owlDataOntology = owlOntology; + } + OWLDataFactory f = manager.getOWLDataFactory(); + OWLClassAssertionAxiom ax = f.getOWLClassAssertionAxiom(f.getOWLClass(getIRI(c)), + f.getOWLNamedIndividual(getIRI(i))); + manager.addAxiom(owlOntology, ax); + } + + private void mergeData(OWLOntology o) { + if (owlDataOntology == null) { + LOG.info("Data ontology=" + o); + owlDataOntology = o; + } else { + LOG.info("Merging data axioms from=" + o); + owlDataOntology.getOWLOntologyManager().addAxioms(owlDataOntology, o.getAxioms()); + } + } + + private OWLOntologyManager getOWLOntologyManager() { + if (manager == null) + manager = OWLManager.createOWLOntologyManager(); + return manager; + } + + /** + * @return handle for a Bitmap-based Knowledge Base + */ + public BMKnowledgeBase createKnowledgeBaseInterface() { + // TODO: use factories, or injection + return BMKnowledgeBaseOWLAPIImpl.create(owlOntology, owlDataOntology, owlReasonerFactory, curieUtil); + } } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java index 650f0bf..212adcc 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java @@ -7,7 +7,6 @@ import org.monarchinitiative.owlsim.model.kb.Attribute; import org.monarchinitiative.owlsim.model.kb.Entity; -import com.google.inject.ImplementedBy; import com.googlecode.javaewah.EWAHCompressedBitmap; /** @@ -69,7 +68,6 @@ * * @author cjm */ -@ImplementedBy(BMKnowledgeBaseOWLAPIImpl.class) public interface BMKnowledgeBase { @@ -144,12 +142,13 @@ public EWAHCompressedBitmap getDirectSubClassesBM(String classId); - /** - * @param classIds - * @return union of all superclasses (direct and indirect and equivalent) as a bitmap - */ - public EWAHCompressedBitmap getSubClassesBM(Set classIds); + /** + * @param classIds + * @return union of all superclasses (direct and indirect and equivalent) as a bitmap + */ + public EWAHCompressedBitmap getSubClassesBM(Set classIds); + /** * @param classIds * @return union of all direct subclasses as a bitmap @@ -179,7 +178,13 @@ * @return union of all superclasses as a bitmap */ public EWAHCompressedBitmap getSuperClassesBM(Set classIds); - + + /** + * @param classIds + * @return union of all superclasses (direct and indirect and equivalent) as a bitmap + */ + public EWAHCompressedBitmap getSuperClassesBM(EWAHCompressedBitmap classesBM); + /** * @param classIndex * @return superclasses (direct and indirect and equivalent) of classId as bitmap @@ -199,12 +204,18 @@ */ public EWAHCompressedBitmap getTypesBM(String id); - /** - * @param id - an individual - * @return direct types as bitmap - */ - public EWAHCompressedBitmap getDirectTypesBM(String id); - + /** + * @param id - an individual + * @return direct types as bitmap + */ + public EWAHCompressedBitmap getDirectTypesBM(String id); + + /** + * @param id - an individual + * @return map between Type class index and 0 getDirectWeightedTypes(String id); + /** * @param itemId * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/KnowledgeBaseModule.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/KnowledgeBaseModule.java deleted file mode 100644 index c2047ba..0000000 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/KnowledgeBaseModule.java +++ /dev/null @@ -1,118 +0,0 @@ -package org.monarchinitiative.owlsim.kb; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.Collection; -import java.util.Map; -import java.util.Set; -import java.util.zip.GZIPInputStream; - -import javax.inject.Singleton; - -import org.apache.commons.validator.routines.UrlValidator; -import org.monarchinitiative.owlsim.kb.bindings.IndicatesDataTsvs; -import org.monarchinitiative.owlsim.kb.bindings.IndicatesOwlDataOntologies; -import org.monarchinitiative.owlsim.kb.bindings.IndicatesOwlOntologies; -import org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl; -import org.prefixcommons.CurieUtil; -import org.semanticweb.elk.owlapi.ElkReasonerFactory; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyCreationException; -import org.semanticweb.owlapi.model.OWLOntologyManager; -import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; - -import com.google.common.collect.ImmutableCollection; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.inject.AbstractModule; -import com.google.inject.Provides; - -/** - * TODO - rewrite this - * - * Reduce duplication of code with OWLLoader - * - */ -public class KnowledgeBaseModule extends AbstractModule { - - private final ImmutableCollection ontologyUris; - private final ImmutableCollection ontologyDataUris; - private final ImmutableCollection dataTsvs; - private final ImmutableMap curies; - private final UrlValidator urlValdiator = UrlValidator.getInstance(); - - public KnowledgeBaseModule(Collection ontologyUris, - Collection ontologyDataUris, - Set dataTsvs, - Map curies) { - this.ontologyUris = new ImmutableSet.Builder().addAll(ontologyUris).build(); - this.ontologyDataUris = new ImmutableSet.Builder().addAll(ontologyDataUris).build(); - this.dataTsvs = new ImmutableSet.Builder().addAll(dataTsvs).build(); - this.curies = new ImmutableMap.Builder().putAll(curies).build(); - } - - @Override - protected void configure() { - - bind(BMKnowledgeBase.class).to(BMKnowledgeBaseOWLAPIImpl.class).in(Singleton.class); - bind(OWLReasonerFactory.class).to(ElkReasonerFactory.class); - bind(CurieUtil.class).toInstance(new CurieUtil(curies)); -// bind(OWLOntologyManager.class).to(OWLOntologyManagerImpl.class); -// bind(ReadWriteLock.class).to(NoOpReadWriteLock.class); -// bind(OWLDataFactory.class).to(OWLDataFactoryImpl.class); - //bind(OWLOntologyManager.class).toInstance(OWLManager.createOWLOntologyManager()); - } - - OWLOntology loadOntology(OWLOntologyManager manager, String uri) throws OWLOntologyCreationException { - if (urlValdiator.isValid(uri)) { - return manager.loadOntology(IRI.create(uri)); - } else { - File file = new File(uri); - return manager.loadOntologyFromOntologyDocument(file); - } - } - - OWLOntology mergeOntologies(OWLOntologyManager manager, Collection uris) throws OWLOntologyCreationException, FileNotFoundException, IOException { - OWLOntology ontology = manager.createOntology(); - for (String uri: uris) { - OWLOntology loadedOntology; - if (uri.endsWith(".gz")) { - GZIPInputStream gis = new GZIPInputStream(new FileInputStream(uri)); - BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); - loadedOntology = manager.loadOntologyFromOntologyDocument(gis); - } - else { - loadedOntology = loadOntology(manager, uri); - } - manager.addAxioms(ontology, loadedOntology.getAxioms()); - } - return ontology; - } - - @Provides - @IndicatesOwlOntologies - @Singleton - OWLOntology getOwlOntologies(OWLOntologyManager manager) throws OWLOntologyCreationException, FileNotFoundException, IOException { - return mergeOntologies(manager, ontologyUris); - } - - @Provides - @IndicatesOwlDataOntologies - @Singleton - OWLOntology getOwlDataOntologies(OWLOntologyManager manager) throws OWLOntologyCreationException, FileNotFoundException, IOException { - return mergeOntologies(manager, ontologyDataUris); - } - - @Provides - @IndicatesDataTsvs - @Singleton - OWLOntology getDataTsvs(OWLOntologyManager manager) throws OWLOntologyCreationException, FileNotFoundException, IOException { - return mergeOntologies(manager, dataTsvs); - } - -} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java index 81c0a2c..2de3213 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java @@ -1,5 +1,6 @@ package org.monarchinitiative.owlsim.kb.ewah; +import java.util.Collection; import java.util.Set; import com.googlecode.javaewah.EWAHCompressedBitmap; @@ -63,7 +64,7 @@ public EWAHCompressedBitmap getSuperClasses(int clsIndex) { return storedSuperClasses[clsIndex]; } - public EWAHCompressedBitmap getClasses(Set clsIndices) { + public EWAHCompressedBitmap getClasses(Collection clsIndices) { EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); for (int i : clsIndices) { bm.set(i); diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java index 42e2ba8..aa471b5 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java @@ -10,15 +10,11 @@ import java.util.Set; import java.util.stream.Collectors; -import javax.inject.Inject; - import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.io.OWLLoader; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.CURIEMapper; import org.monarchinitiative.owlsim.kb.LabelMapper; -import org.monarchinitiative.owlsim.kb.bindings.IndicatesOwlDataOntologies; -import org.monarchinitiative.owlsim.kb.bindings.IndicatesOwlOntologies; import org.monarchinitiative.owlsim.kb.ewah.EWAHKnowledgeBaseStore; import org.monarchinitiative.owlsim.model.kb.Attribute; import org.monarchinitiative.owlsim.model.kb.Entity; @@ -26,7 +22,9 @@ import org.prefixcommons.CurieUtil; import org.semanticweb.owlapi.model.AxiomType; import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotation; import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; import org.semanticweb.owlapi.model.OWLAnnotationValue; import org.semanticweb.owlapi.model.OWLClass; import org.semanticweb.owlapi.model.OWLClassAssertionAxiom; @@ -106,6 +104,9 @@ private Map>> propertyValueMapMap; Map> opposingClassMap = new HashMap>(); + + Map> individualToWeightedDirectTypeMap = new HashMap<>(); + private int[] individualCountPerClassArray; @@ -118,9 +119,8 @@ * @param owlDataOntology TODO - fix this * @param rf */ - @Inject - public BMKnowledgeBaseOWLAPIImpl(@IndicatesOwlOntologies OWLOntology owlOntology, - @IndicatesOwlDataOntologies OWLOntology owlDataOntology, OWLReasonerFactory rf, + public BMKnowledgeBaseOWLAPIImpl(OWLOntology owlOntology, + OWLOntology owlDataOntology, OWLReasonerFactory rf, CurieUtil curieUtil) { super(); curieMapper = new CURIEMapperImpl(); @@ -511,6 +511,9 @@ private void storeInferences() { ontoEWAHStore.setDirectIndividuals(clsIndex, individualInts); } + + // populate frequency-awareness map + individualToWeightedDirectTypeMap = new HashMap<>(); for (OWLNamedIndividual i : individualsInSignature) { int individualIndex = getIndex(i); // LOG.info("String inferences for "+i+" --> " +individualIndex); @@ -518,8 +521,49 @@ private void storeInferences() { getIntegersForClassSet(owlReasoner.getTypes(i, true))); ontoEWAHStore.setTypes(individualIndex, getIntegersForClassSet(owlReasoner.getTypes(i, false))); + + // TODO - ensure robust for equivalent individuals + Map wmap = new HashMap<>(); + individualToWeightedDirectTypeMap.put(individualIndex, wmap); + for (OWLClassAssertionAxiom caax : owlOntology.getClassAssertionAxioms(i)) { + int cix; + + // only associations to named classes + if (caax.getClassExpression().isAnonymous()) { + continue; + } + cix = getIndex(caax.getClassExpression().asOWLClass()); + + // we use reification to store probability + for (OWLAnnotation ann : caax.getAnnotations()) { + OWLAnnotationProperty prop = ann.getProperty(); + OWLAnnotationValue v = ann.getValue(); + if (v instanceof OWLLiteral) { + OWLLiteral lv = v.asLiteral().get(); + Double pr = null; + if (lv.isDouble()) { + pr = lv.parseDouble(); + } + if (lv.isFloat()) { + pr = (double) lv.parseFloat(); + } + if (pr != null) { + // TODO : decide on a vocabulary + if (prop.getIRI().toString().contains("probability")) { + wmap.put(cix, (int) (pr * 100)); + } + } + if (lv.isInteger()) { + if (prop.getIRI().toString().contains("frequency")) { + wmap.put(cix, lv.parseInteger()); + } + + } + } + } + } - // Treat CLassAssertion( ComplementOf(c) i) as a negative assertion + // Treat ClassAssertion( ComplementOf(c) i) as a negative assertion Set ncs = new HashSet(); Set ncsDirect = new HashSet(); for (OWLClassAssertionAxiom cx : owlOntology.getClassAssertionAxioms(i)) { @@ -813,6 +857,13 @@ protected EWAHCompressedBitmap getSuperClassesBMByOWLClassSet(Set clsS return ontoEWAHStore.getSuperClasses(classIndices); } + /* (non-Javadoc) + * @see org.monarchinitiative.owlsim.kb.BMKnowledgeBase#getSuperClassesBM(com.googlecode.javaewah.EWAHCompressedBitmap) + */ + public EWAHCompressedBitmap getSuperClassesBM(EWAHCompressedBitmap classesBM) { + return ontoEWAHStore.getSuperClasses(new HashSet<>(classesBM.getPositions())); + } + public EWAHCompressedBitmap getSuperClassesBM(String cid) { return ontoEWAHStore.getSuperClasses(getClassIndex(cid)); } @@ -950,6 +1001,15 @@ public EWAHCompressedBitmap getTypesBM(String id) { public EWAHCompressedBitmap getTypesBM(int individualIndex) { return ontoEWAHStore.getTypes(individualIndex); } + + /* (non-Javadoc) + * @see org.monarchinitiative.owlsim.kb.BMKnowledgeBase#getDirectWeightedTypes(java.lang.String) + */ + public Map getDirectWeightedTypes(String id) { + int iix = getIndividualIndex(id); + return individualToWeightedDirectTypeMap.get(iix); + } + /** * @param id diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/ProfileQuery.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/ProfileQuery.java index b9d748c..0b91b7e 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/ProfileQuery.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/ProfileQuery.java @@ -3,50 +3,49 @@ import java.util.Set; import org.monarchinitiative.owlsim.kb.filter.Filter; -import org.monarchinitiative.owlsim.model.match.impl.ProfileQueryImpl; - -import com.google.inject.ImplementedBy; /** - * The most basic type of query, a positive conjunction of features to be matched. + * The most basic type of query, a positive conjunction of features to be + * matched. * * * @author cjm * */ -@ImplementedBy(ProfileQueryImpl.class) public interface ProfileQuery { - + /** * @return all (positive) class Ids in query */ public Set getQueryClassIds(); - + /** * @return query filter */ public Filter getFilter(); - + public void setFilter(Filter f); - + public Integer getLimit(); /** * set to -1 for no limit (all) + * * @param limit */ public void setLimit(Integer limit); - + /** * an optional set of individuals for which we wish to test ranking. * * This is for use when we with to limit the number of individuals returned, - * but we want to know the ranking and scores of particular individuals outside - * the top N + * but we want to know the ranking and scores of particular individuals + * outside the top N * * @return individual ids */ - public Set getReferenceIndividualIds(); - public void setReferenceIndividualIds(Set indIds); + public Set getReferenceIndividualIds(); + + public void setReferenceIndividualIds(Set indIds); } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java index 9318ede..041b20a 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java @@ -109,10 +109,13 @@ protected void load(String fn, String... ontfns) throws OWLOntologyCreationExcep kb = loader.createKnowledgeBaseInterface(); } - protected void loadSimplePhenoWithNegation() throws OWLOntologyCreationException { - load("simple-pheno-with-negation.owl"); - - } + protected void loadSimplePhenoWithNegation() throws OWLOntologyCreationException { + load("simple-pheno-with-negation.owl"); + } + + protected void loadSimplePhenoWithFrequency() throws OWLOntologyCreationException { + load("simple-pheno-with-freqs.owl"); + } @Deprecated protected void search(ProfileMatcher profileMatcher, @@ -188,5 +191,17 @@ protected boolean isRankedLast(String matchId, MatchSet matchSet) { LOG.info("Rank of match "+matchId+" is "+matchRank+" which is last or joint last"); return true; } + + protected boolean isRankedAt(String matchId, MatchSet matchSet, int expectedRank) { + int matchRank = 0; + for (Match m : matchSet.getMatches()) { + int rank = m.getRank(); + + if (m.getMatchId().equals(matchId)) { + return (rank == expectedRank); + } + } + return false; + } } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java index b83ab08..5c379b2 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java @@ -80,6 +80,59 @@ public void testExamplePositiveOnly() throws Exception { } + @Test + public void testFrequencyAware() throws Exception { + loadSimplePhenoWithFrequency(); + //LOG.info("INDS="+kb.getIndividualIdsInSignature()); + ProfileMatcher profileMatcher = createProfileMatcher(kb); + ((NaiveBayesFixedWeightTwoStateProfileMatcher) profileMatcher).setkLeastFrequent(3); + + Assert.assertTrue(kb.getIndividualIdsInSignature().size() > 0); + + int nOk = 0; + for (String i : kb.getIndividualIdsInSignature()) { + + ProfileQuery pq = profileMatcher.createPositiveProfileQuery(i); + TestQuery tq = new TestQuery(pq, i, 4); // self should always be ranked first + String fn = i.replaceAll(".*/", ""); + eval.writeJsonTo("target/naivebfreq-test-results-"+fn+".json"); + Assert.assertTrue(eval.evaluateTestQuery(profileMatcher, tq)); + + if (i.equals("http://x.org/ind-dec-all")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-no-brain-phenotype", tq.matchSet)); + nOk++; + } + if (i.equals("http://x.org/ind-big-heart-small-brain")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-big-femur", tq.matchSet)); + + // targets with frequency + Assert.assertTrue(isRankedAt("http://x.org/fplus-big-heart-small-brain", tq.matchSet, 2)); + Assert.assertTrue(isRankedAt("http://x.org/f0-big-heart-small-brain", tq.matchSet, 3)); + Assert.assertTrue(isRankedAt("http://x.org/fminus-big-heart-small-brain", tq.matchSet, 4)); + nOk++; + } + if (i.equals("http://x.org/ind-small-heart-big-brain")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-big-femur", tq.matchSet)); + + // targets with frequency + Assert.assertTrue(isRankedAt("http://x.org/fminus-big-heart-small-brain", tq.matchSet, 2)); + Assert.assertTrue(isRankedAt("http://x.org/f0-big-heart-small-brain", tq.matchSet, 3)); + Assert.assertTrue(isRankedAt("http://x.org/fplus-big-heart-small-brain", tq.matchSet, 4)); + nOk++; + } + if (i.equals("http://x.org/ind-unstated-phenotype")) { + //Assert.assertTrue(isRankedLast("http://x.org/ind-no-phenotype", tq.matchSet)); + //temporarily removed the no-phenotype individual from test; auto-pass this for now + nOk++; + } + if (i.equals("http://x.org/ind-no-brain-phenotype")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-inc-all", tq.matchSet)); + nOk++; + } + + } + Assert.assertEquals(5, nOk); + } } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java index a906c89..bdb2a72 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java @@ -75,17 +75,16 @@ public void testCompareProfileFile() throws Exception { Set tcids = kb.getClassIds(kb.getDirectTypesBM(j)); ProfileQuery tp = profileMatcher.createProfileQueryFromClasses(tcids, null); - String fn = i.replaceAll(".*/", ""); + String fn = i.replaceAll(".*/", ""); //eval.writeJsonTo("target/pdgm-test-results-"+fn+".json"); Match pairMatch = profileMatcher.compareProfilePair(qp, tp); // note: scores may deiverge slightly; this is because // disjointness axioms are used for to populate negative class // assertions for individuals at KB creation time - System.out.println("COMPARING: "+i+" -vs- "+j); - System.out.println(pairMatch); - System.out.println(match); - System.out.println("---"); + LOG.debug("COMPARING: "+i+" -vs- "+j); + LOG.debug(pairMatch); + LOG.debug(match); } } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/AbstractProfileMatcherPerfIT.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/AbstractProfileMatcherPerfIT.java index e8e4007..1d7df28 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/AbstractProfileMatcherPerfIT.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/perf/AbstractProfileMatcherPerfIT.java @@ -6,8 +6,6 @@ import java.util.List; import java.util.Set; -import javax.inject.Inject; - import org.apache.log4j.Logger; import org.junit.Test; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/mica/MICAStoreBench.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/mica/MICAStoreBench.java index d0b7b44..e8fb58a 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/mica/MICAStoreBench.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/mica/MICAStoreBench.java @@ -2,8 +2,6 @@ import java.util.HashMap; -import javax.inject.Inject; - import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.mica.impl.MICAStoreImpl; import org.monarchinitiative.owlsim.compute.mica.impl.NoRootException; diff --git a/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl b/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl new file mode 100644 index 0000000..2b5850a --- /dev/null +++ b/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl @@ -0,0 +1,282 @@ +Prefix: : +Prefix: dc: +Prefix: owl: +Prefix: rdf: +Prefix: rdfs: +Prefix: xml: +Prefix: xsd: +Prefix: x: + + +Ontology: + + +AnnotationProperty: x:probability + + +Datatype: xsd:double + + +Class: absent-heart + + SubClassOf: + hypoplastic-heart + + +Class: bone-length + + SubClassOf: + bone-morphology + + +Class: bone-morphology + + SubClassOf: + skeletal-phenotype + + +Class: bone-shape + + SubClassOf: + bone-morphology + + +Class: brain-morphology + + SubClassOf: + neuro-phenotype + + +Class: brain-shape + + SubClassOf: + brain-morphology + + +Class: brain-size + + SubClassOf: + brain-morphology + + +Class: circulatory-phenotype + + SubClassOf: + phenotype + + +Class: dec-bone-length + + SubClassOf: + bone-length + + + +Class: dec-brain-size + + SubClassOf: + brain-size + + + +Class: dec-femur-length + + SubClassOf: + dec-bone-length + + + +Class: heart-morphology + + SubClassOf: + circulatory-phenotype + + +Class: heart-shape + + SubClassOf: + heart-morphology + + +Class: heart-size + + SubClassOf: + heart-morphology + + +Class: hyperplastic-heart + + SubClassOf: + heart-size + + + +Class: hypoplastic-heart + + SubClassOf: + heart-size + + + +Class: inc-bone-length + + SubClassOf: + bone-length + + + +Class: inc-brain-size + + SubClassOf: + brain-size + + + +Class: inc-femur-length + + SubClassOf: + inc-bone-length + + + +Class: neuro-phenotype + + SubClassOf: + phenotype + + +Class: phenotype + + +Class: skeletal-phenotype + + SubClassOf: + phenotype + + +Individual: ind-big-femur + + Types: + inc-femur-length + + +Individual: ind-big-heart-big-brain + + Types: + hyperplastic-heart, + inc-brain-size + +Individual: fplus-big-heart-small-brain + + Types: + Annotations: x:probability "0.75"^^xsd:double dec-brain-size, + Annotations: x:probability "0.25"^^xsd:double inc-brain-size, + Annotations: x:probability "0.75"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.25"^^xsd:double hypoplastic-heart + +Individual: f0-big-heart-small-brain + + Types: + Annotations: x:probability "0.5"^^xsd:double dec-brain-size, + Annotations: x:probability "0.5"^^xsd:double inc-brain-size, + Annotations: x:probability "0.5"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.5"^^xsd:double hypoplastic-heart + +Individual: fminus-big-heart-small-brain + + Types: + Annotations: x:probability "0.25"^^xsd:double dec-brain-size, + Annotations: x:probability "0.75"^^xsd:double inc-brain-size, + Annotations: x:probability "0.25"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.75"^^xsd:double hypoplastic-heart + + +Individual: ind-big-heart-small-brain + + Types: dec-brain-size, + hyperplastic-heart + + +Individual: ind-bone + + Types: + bone-morphology + + +Individual: ind-brain + + Types: + brain-morphology + + +Individual: ind-dec-all + + Types: + dec-bone-length, + dec-brain-size, + hypoplastic-heart + + +Individual: ind-heart-bone + + Types: + bone-morphology, + heart-morphology + + +Individual: ind-heart-brain + + Types: + brain-morphology, + heart-morphology + + +Individual: ind-heart-brain-bone + + Types: + bone-morphology, + brain-morphology, + heart-morphology + + +Individual: ind-inc-all + + Types: + hyperplastic-heart, + inc-bone-length, + inc-brain-size + + +Individual: ind-no-brain-phenotype + + Types: + phenotype, + not (brain-morphology) + + +Individual: ind-small-femur + + Types: + dec-femur-length + + +Individual: ind-small-heart-big-brain + + Types: + hypoplastic-heart, + inc-brain-size + + +Individual: ind-small-heart-small-brain + + Types: + dec-brain-size, + hypoplastic-heart + + +Individual: ind-unstated-phenotype + + Types: + phenotype + + diff --git a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java index 4ab2e13..9e8da2f 100644 --- a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/OwlSimServiceApplication.java @@ -23,10 +23,10 @@ import org.apache.log4j.Logger; import org.eclipse.jetty.servlets.CrossOriginFilter; -import org.monarchinitiative.owlsim.compute.enrich.EnrichmentMapModule; -import org.monarchinitiative.owlsim.compute.matcher.MatcherMapModule; -import org.monarchinitiative.owlsim.kb.KnowledgeBaseModule; import org.monarchinitiative.owlsim.services.configuration.ApplicationConfiguration; +import org.monarchinitiative.owlsim.services.modules.EnrichmentMapModule; +import org.monarchinitiative.owlsim.services.modules.KnowledgeBaseModule; +import org.monarchinitiative.owlsim.services.modules.MatcherMapModule; import org.semanticweb.owlapi.OWLAPIParsersModule; import org.semanticweb.owlapi.OWLAPIServiceLoaderModule; diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentMapModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/EnrichmentMapModule.java similarity index 76% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentMapModule.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/EnrichmentMapModule.java index d4b6823..95c03ac 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/enrich/EnrichmentMapModule.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/EnrichmentMapModule.java @@ -1,10 +1,11 @@ -package org.monarchinitiative.owlsim.compute.enrich; +package org.monarchinitiative.owlsim.services.modules; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.log4j.Logger; +import org.monarchinitiative.owlsim.compute.enrich.EnrichmentEngine; import org.monarchinitiative.owlsim.compute.enrich.impl.HypergeometricEnrichmentEngine; import com.google.inject.AbstractModule; @@ -13,15 +14,15 @@ public class EnrichmentMapModule extends AbstractModule { - private Logger LOG = Logger.getLogger(EnrichmentMapModule.class); - + private Logger LOG = Logger.getLogger(EnrichmentMapModule.class); @Override protected void configure() { } /*** - *

Note: The class must be injectable by Guice. + *

+ * Note: The class must be injectable by Guice. * * @param injector * @return A mapping of ProfileMatchers @@ -29,7 +30,7 @@ protected void configure() { */ @Provides Map getEnrichmentEngines(Injector injector) throws IOException { - + Map engineMap = new HashMap<>(); EnrichmentEngine e = (EnrichmentEngine) injector.getInstance(HypergeometricEnrichmentEngine.class); engineMap.put(e.getShortName(), e); diff --git a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java new file mode 100644 index 0000000..e6b76c7 --- /dev/null +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java @@ -0,0 +1,152 @@ +package org.monarchinitiative.owlsim.services.modules; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Collection; +import java.util.Map; +import java.util.Set; +import java.util.zip.GZIPInputStream; + +import javax.inject.Singleton; + +import org.apache.commons.validator.routines.UrlValidator; +import org.monarchinitiative.owlsim.compute.classmatch.ClassMatcher; +import org.monarchinitiative.owlsim.compute.enrich.impl.HypergeometricEnrichmentEngine; +import org.monarchinitiative.owlsim.compute.matcher.impl.BayesianNetworkProfileMatcher; +import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator; +import org.monarchinitiative.owlsim.compute.mica.impl.MostInformativeCommonAncestorCalculatorImpl; +import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; +import org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl; +import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesDataTsvs; +import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesOwlDataOntologies; +import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesOwlOntologies; +import org.prefixcommons.CurieUtil; +import org.semanticweb.elk.owlapi.ElkReasonerFactory; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; + +import com.google.common.collect.ImmutableCollection; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.inject.AbstractModule; +import com.google.inject.Provides; + +/** + * TODO - rewrite this + * + * Reduce duplication of code with OWLLoader + * + */ +public class KnowledgeBaseModule extends AbstractModule { + + private final ImmutableCollection ontologyUris; + private final ImmutableCollection ontologyDataUris; + private final ImmutableCollection dataTsvs; + private final ImmutableMap curies; + private final UrlValidator urlValdiator = UrlValidator.getInstance(); + + public KnowledgeBaseModule(Collection ontologyUris, Collection ontologyDataUris, + Set dataTsvs, Map curies) { + this.ontologyUris = new ImmutableSet.Builder().addAll(ontologyUris).build(); + this.ontologyDataUris = new ImmutableSet.Builder().addAll(ontologyDataUris).build(); + this.dataTsvs = new ImmutableSet.Builder().addAll(dataTsvs).build(); + this.curies = new ImmutableMap.Builder().putAll(curies).build(); + } + + @Override + protected void configure() { + bind(BMKnowledgeBase.class).to(BMKnowledgeBaseOWLAPIImpl.class).in(Singleton.class); + bind(OWLReasonerFactory.class).to(ElkReasonerFactory.class); + bind(CurieUtil.class).toInstance(new CurieUtil(curies)); + // bind(OWLOntologyManager.class).to(OWLOntologyManagerImpl.class); + // bind(ReadWriteLock.class).to(NoOpReadWriteLock.class); + // bind(OWLDataFactory.class).to(OWLDataFactoryImpl.class); + // bind(OWLOntologyManager.class).toInstance(OWLManager.createOWLOntologyManager()); + } + + @Provides + BMKnowledgeBaseOWLAPIImpl provideBMKnowledgeBaseOWLAPIImpl(@IndicatesOwlOntologies OWLOntology owlOntology, + @IndicatesOwlDataOntologies OWLOntology owlDataOntology, OWLReasonerFactory rf, CurieUtil curieUtil) { + BMKnowledgeBaseOWLAPIImpl bMKnowledgeBaseOWLAPIImpl = new BMKnowledgeBaseOWLAPIImpl(owlOntology, + owlDataOntology, rf, curieUtil); + return bMKnowledgeBaseOWLAPIImpl; + } + + OWLOntology loadOntology(OWLOntologyManager manager, String uri) throws OWLOntologyCreationException { + if (urlValdiator.isValid(uri)) { + return manager.loadOntology(IRI.create(uri)); + } else { + File file = new File(uri); + return manager.loadOntologyFromOntologyDocument(file); + } + } + + OWLOntology mergeOntologies(OWLOntologyManager manager, Collection uris) + throws OWLOntologyCreationException, FileNotFoundException, IOException { + OWLOntology ontology = manager.createOntology(); + for (String uri : uris) { + OWLOntology loadedOntology; + if (uri.endsWith(".gz")) { + GZIPInputStream gis = new GZIPInputStream(new FileInputStream(uri)); + BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); + loadedOntology = manager.loadOntologyFromOntologyDocument(gis); + } else { + loadedOntology = loadOntology(manager, uri); + } + manager.addAxioms(ontology, loadedOntology.getAxioms()); + } + return ontology; + } + + @Provides + @IndicatesOwlOntologies + @Singleton + OWLOntology getOwlOntologies(OWLOntologyManager manager) + throws OWLOntologyCreationException, FileNotFoundException, IOException { + return mergeOntologies(manager, ontologyUris); + } + + @Provides + @IndicatesOwlDataOntologies + @Singleton + OWLOntology getOwlDataOntologies(OWLOntologyManager manager) + throws OWLOntologyCreationException, FileNotFoundException, IOException { + return mergeOntologies(manager, ontologyDataUris); + } + + @Provides + @IndicatesDataTsvs + @Singleton + OWLOntology getDataTsvs(OWLOntologyManager manager) + throws OWLOntologyCreationException, FileNotFoundException, IOException { + return mergeOntologies(manager, dataTsvs); + } + + @Provides + MostInformativeCommonAncestorCalculator getMostInformativeCommonAncestorCalculator(BMKnowledgeBase knowledgeBase) { + return new MostInformativeCommonAncestorCalculatorImpl(knowledgeBase); + } + + @Provides + HypergeometricEnrichmentEngine getHypergeometricEnrichmentEngine(BMKnowledgeBase knowledgeBase) { + return new HypergeometricEnrichmentEngine(knowledgeBase); + } + + @Provides + BayesianNetworkProfileMatcher getBayesianNetworkProfileMatcher(BMKnowledgeBase knowledgeBase) { + return BayesianNetworkProfileMatcher.create(knowledgeBase); + } + + @Provides + ClassMatcher getClassMatcher(BMKnowledgeBase knowledgeBase) { + return new ClassMatcher(knowledgeBase); + } + +} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/MatcherMapModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherMapModule.java similarity index 51% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/MatcherMapModule.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherMapModule.java index 36c91d8..49c0e18 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/MatcherMapModule.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherMapModule.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.owlsim.compute.matcher; +package org.monarchinitiative.owlsim.services.modules; import java.io.IOException; import java.lang.reflect.Modifier; @@ -6,7 +6,7 @@ import java.util.Map; import org.apache.log4j.Logger; -import org.monarchinitiative.owlsim.compute.matcher.impl.JaccardSimilarityProfileMatcher; +import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import com.google.common.reflect.ClassPath; import com.google.inject.AbstractModule; @@ -15,7 +15,7 @@ public class MatcherMapModule extends AbstractModule { - private Logger LOG = Logger.getLogger(MatcherMapModule.class); + private Logger LOG = Logger.getLogger(MatcherMapModule.class); // The package containing ProfileMatcher implementations private static final String matcherPackage = "org.monarchinitiative.owlsim.compute.matcher.impl"; @@ -27,11 +27,14 @@ protected void configure() { /*** * Gets of map of ProfileMatchers. * - *

A convenience method to obviate maintaining hard coded instances of ProfileMatchers. - * matcherPackage is inspected for any non-abstract class that implements ProfileMatcher - * and a map is created between that ProfileMatcher's shortName and an instance of the matcher. + *

+ * A convenience method to obviate maintaining hard coded instances of + * ProfileMatchers. matcherPackage is inspected for any + * non-abstract class that implements ProfileMatcher and a map is created + * between that ProfileMatcher's shortName and an instance of the matcher. * - *

Note: The class must be injectable by Guice. + *

+ * Note: The class must be injectable by Guice. * * @param injector * @return A mapping of ProfileMatchers @@ -40,21 +43,19 @@ protected void configure() { @Provides Map getMatchers(Injector injector) throws IOException { ClassPath classpath = ClassPath.from(getClass().getClassLoader()); - LOG.info("Fetchig classes from: "+classpath.getClass()); - LOG.info("top level of :"+matcherPackage); + LOG.info("Fetchig classes from: " + classpath.getClass()); + LOG.info("top level of :" + matcherPackage); Map matcherMap = new HashMap<>(); - for (ClassPath.ClassInfo info: classpath.getTopLevelClasses(matcherPackage)) { - Class clazz = info.load(); - LOG.info(" Adding: "+info + " class: "+clazz + " ISAB:"+ - Modifier.isAbstract(clazz.getModifiers())); - if (!Modifier.isAbstract(clazz.getModifiers()) && - ProfileMatcher.class.isAssignableFrom(info.load())) { + for (ClassPath.ClassInfo info : classpath.getTopLevelClasses(matcherPackage)) { + Class clazz = info.load(); + LOG.info(" Adding: " + info + " class: " + clazz + " ISAB:" + Modifier.isAbstract(clazz.getModifiers())); + if (!Modifier.isAbstract(clazz.getModifiers()) && ProfileMatcher.class.isAssignableFrom(info.load())) { ProfileMatcher matcher = (ProfileMatcher) injector.getInstance(clazz); matcherMap.put(matcher.getShortName(), matcher); } } - + return matcherMap; } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/runner/MatcherModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherModule.java similarity index 62% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/runner/MatcherModule.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherModule.java index 1106f92..a9ec8b6 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/runner/MatcherModule.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/MatcherModule.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.owlsim.compute.runner; +package org.monarchinitiative.owlsim.services.modules; import java.io.IOException; import java.lang.reflect.Modifier; @@ -9,10 +9,8 @@ import com.google.common.reflect.ClassPath; import com.google.inject.AbstractModule; -import com.google.inject.Injector; import com.google.inject.Provides; - // CLONED FROM SERVICES public class MatcherModule extends AbstractModule { @@ -26,28 +24,30 @@ protected void configure() { /*** * Gets of map of ProfileMatchers. * - *

A convenience method to obviate maintaining hard coded instances of ProfileMatchers. - * matcherPackage is inspected for any non-abstract class that implements ProfileMatcher - * and a map is created between that ProfileMatcher's shortName and an instance of the matcher. + *

+ * A convenience method to obviate maintaining hard coded instances of + * ProfileMatchers. matcherPackage is inspected for any + * non-abstract class that implements ProfileMatcher and a map is created + * between that ProfileMatcher's shortName and an instance of the matcher. * - *

Note: The class must be injectable by Guice. + *

+ * Note: The class must be injectable by Guice. * * @param injector * @return A mapping of ProfileMatchers * @throws IOException - * @throws IllegalAccessException - * @throws InstantiationException + * @throws IllegalAccessException + * @throws InstantiationException */ @Provides Map getMatchers() throws IOException, InstantiationException, IllegalAccessException { ClassPath classpath = ClassPath.from(getClass().getClassLoader()); Map matcherMap = new HashMap<>(); - for (ClassPath.ClassInfo info: classpath.getTopLevelClasses(matcherPackage)) { + for (ClassPath.ClassInfo info : classpath.getTopLevelClasses(matcherPackage)) { Class clazz = info.load(); - if (!Modifier.isAbstract(clazz.getModifiers()) && - ProfileMatcher.class.isAssignableFrom(info.load())) { - + if (!Modifier.isAbstract(clazz.getModifiers()) && ProfileMatcher.class.isAssignableFrom(info.load())) { + ProfileMatcher matcher = (ProfileMatcher) clazz.newInstance(); matcherMap.put(matcher.getShortName(), matcher); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesDataTsvs.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesDataTsvs.java similarity index 65% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesDataTsvs.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesDataTsvs.java index c576ce6..b1d0def 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesDataTsvs.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesDataTsvs.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.owlsim.kb.bindings; +package org.monarchinitiative.owlsim.services.modules.bindings; import java.lang.annotation.Retention; import java.lang.annotation.Target; @@ -9,5 +9,8 @@ import static java.lang.annotation.ElementType.FIELD; import static java.lang.annotation.ElementType.METHOD; -@BindingAnnotation @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME) -public @interface IndicatesDataTsvs {} +@BindingAnnotation +@Target({ FIELD, PARAMETER, METHOD }) +@Retention(RUNTIME) +public @interface IndicatesDataTsvs { +} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlDataOntologies.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlDataOntologies.java similarity index 64% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlDataOntologies.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlDataOntologies.java index 1993f18..9de8cc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlDataOntologies.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlDataOntologies.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.owlsim.kb.bindings; +package org.monarchinitiative.owlsim.services.modules.bindings; import java.lang.annotation.Retention; import java.lang.annotation.Target; @@ -9,5 +9,8 @@ import static java.lang.annotation.ElementType.FIELD; import static java.lang.annotation.ElementType.METHOD; -@BindingAnnotation @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME) -public @interface IndicatesOwlDataOntologies {} +@BindingAnnotation +@Target({ FIELD, PARAMETER, METHOD }) +@Retention(RUNTIME) +public @interface IndicatesOwlDataOntologies { +} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlOntologies.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlOntologies.java similarity index 65% rename from owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlOntologies.java rename to owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlOntologies.java index acb815f..97a432e 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/bindings/IndicatesOwlOntologies.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/bindings/IndicatesOwlOntologies.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.owlsim.kb.bindings; +package org.monarchinitiative.owlsim.services.modules.bindings; import java.lang.annotation.Retention; import java.lang.annotation.Target; @@ -9,5 +9,8 @@ import static java.lang.annotation.ElementType.FIELD; import static java.lang.annotation.ElementType.METHOD; -@BindingAnnotation @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME) -public @interface IndicatesOwlOntologies {} +@BindingAnnotation +@Target({ FIELD, PARAMETER, METHOD }) +@Retention(RUNTIME) +public @interface IndicatesOwlOntologies { +}