From 58e533b595596aa68ea51d3d4460a5f8f54ba471 Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Thu, 2 Mar 2017 00:19:43 -0500 Subject: [PATCH 1/8] first pass at frequency-awareness, issue #56 --- ...aiveBayesFixedWeightTwoStateProfileMatcher.java | 223 +++++++++++++--- .../owlsim/kb/BMKnowledgeBase.java | 37 ++- .../owlsim/kb/ewah/EWAHKnowledgeBaseStore.java | 3 +- .../owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java | 67 ++++- .../matcher/AbstractProfileMatcherTest.java | 23 +- ...BayesFixedWeightTwoStateProfileMatcherTest.java | 53 ++++ .../src/test/resources/simple-pheno-with-freqs.owl | 282 +++++++++++++++++++++ 7 files changed, 628 insertions(+), 60 deletions(-) create mode 100644 owlsim-core/src/test/resources/simple-pheno-with-freqs.owl diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java index 9e5d464..31194ed 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java @@ -1,7 +1,10 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; +import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import javax.inject.Inject; @@ -36,18 +39,44 @@ public class NaiveBayesFixedWeightTwoStateProfileMatcher extends AbstractProfileMatcher implements ProfileMatcher { private Logger LOG = Logger.getLogger(NaiveBayesFixedWeightTwoStateProfileMatcher.class); + + // set this to more than 1 for frequency-aware; + // a value of 0 defaults to frequency-unaware + private int kLeastFrequent = 0; + @Deprecated private double defaultFalsePositiveRate = 0.002; // alpha @Deprecated private double defaultFalseNegativeRate = 0.10; // beta + + /** + * A tuple of (weight, Classes) + * + */ + private class WeightedTypesBM { + // bitmap representing a set of classes assumed to be on + final EWAHCompressedBitmap typesBM; + + // probability of the state in which all such classes are on + final double weight; + + public WeightedTypesBM(EWAHCompressedBitmap typesBM, Double weight) { + super(); + this.typesBM = typesBM; + this.weight = weight; + } + } // TODO - replace when tetsing is over //private double[] defaultFalsePositiveRateArr = new double[]{0.002}; //private double[] defaultFalseNegativeRateArr = new double[] {0.10}; private double[] defaultFalsePositiveRateArr = new double[]{1e-10,0.0005,0.001,0.005,0.01}; private double[] defaultFalseNegativeRateArr = new double[] {1e-10,0.005,0.01,0.05,0.1,0.2,0.4,0.8,0.9}; + + // for maps a pair of (Individual, InterpretationIndex) to a set of inferred (self, direct, indirect) types + private Map> individualToInterpretationToTypesBM = new HashMap<>(); @Inject protected NaiveBayesFixedWeightTwoStateProfileMatcher(BMKnowledgeBase kb) { @@ -70,8 +99,31 @@ public boolean isUseBlanket() { public String getShortName() { return "naive-bayes-fixed-weight-two-state"; } + + /** + * @return the kLeastFrequent + */ + public int getkLeastFrequent() { + return kLeastFrequent; + } + + /** + * The default for this should be 0. When 0, the behavior is as for frequency unaware + * (i.e. every instance-class association with frequency info will be treated as normal instance-class) + * + * When k>1, will make use of the k least frequent annotations in probabilistic calculation + * + * @param kLeastFrequent the kLeastFrequent to set + */ + public void setkLeastFrequent(int kLeastFrequent) { + // reset cache + individualToInterpretationToTypesBM = new HashMap<>(); + this.kLeastFrequent = kLeastFrequent; + } + + /** * Extends the query profile - for every node c, all the direct parents of c are in * the query profile, then add c to the query profile. * @@ -132,50 +184,82 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { double pvector[] = new double[indIds.size()]; String indArr[] = new String[indIds.size()]; int n=0; + + for (String itemId : indIds) { - EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - // any node which has an off query parent is discounted - targetProfileBM = targetProfileBM.and(queryBlanketProfileBM); - LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); - - - // two state model. - // mapping to Bauer et al: these correspond to mxy1, x=Q, y=H/T - int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); - int numInQueryAndNOTInTarget = queryProfileBM.andNotCardinality(targetProfileBM); - int numNOTInQueryAndInTarget = targetProfileBM.andNotCardinality(queryProfileBM); - int numNOTInQueryAndNOTInTarget = - numClassesConsidered - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); - - double p = 0.0; - // TODO: optimize this - // integrate over a Dirichlet prior for alpha & beta, rather than gridsearch - // this can be done closed-form - for (double fnr : defaultFalseNegativeRateArr) { - for (double fpr : defaultFalsePositiveRateArr) { - - double pQ1T1 = Math.pow(1-fnr, numInQueryAndInTarget); - double pQ0T1 = Math.pow(fnr, numNOTInQueryAndInTarget); - double pQ1T0 = Math.pow(fpr, numInQueryAndNOTInTarget); - double pQ0T0 = Math.pow(1-fpr, numNOTInQueryAndNOTInTarget); - - - - //LOG.debug("pQ1T1 = "+(1-fnr)+" ^ "+ numInQueryAndInTarget+" = "+pQ1T1); - //LOG.debug("pQ0T1 = "+(fnr)+" ^ "+ numNOTInQueryAndInTarget+" = "+pQ0T1); - //LOG.debug("pQ1T0 = "+(fpr)+" ^ "+ numInQueryAndNOTInTarget+" = "+pQ1T0); - //LOG.debug("pQ0T0 = "+(1-fpr)+" ^ "+ numNOTInQueryAndNOTInTarget+" = "+pQ0T0); - //TODO: optimization. We can precalculate the logs for different integers - p += - Math.exp(Math.log(pQ1T1) + Math.log(pQ0T1) + Math.log(pQ1T0) + Math.log(pQ0T0)); - - } - } - pvector[n] = p; - indArr[n] = itemId; - sumOfProbs += p; + + int effectiveK = kLeastFrequent; + int twoToTheK = (int) Math.pow(2, kLeastFrequent); + int numWeightedTypes = knowledgeBase.getDirectWeightedTypes(itemId).size(); + if (numWeightedTypes < kLeastFrequent) { + twoToTheK = (int) Math.pow(2, numWeightedTypes); + effectiveK = numWeightedTypes; + } + + double cumulativePr = 0; + for (int comboIndex = 0; comboIndex < twoToTheK; comboIndex++) { + + Double comboPr = null; + EWAHCompressedBitmap targetProfileBM; + if (kLeastFrequent == 0) { + targetProfileBM = knowledgeBase.getTypesBM(itemId); + } + else { + WeightedTypesBM wtbm = getTypesFrequencyAware(itemId, comboIndex, effectiveK); + comboPr = wtbm.weight; + targetProfileBM = wtbm.typesBM; + } + + // any node which has an off query parent is discounted + targetProfileBM = targetProfileBM.and(queryBlanketProfileBM); + LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); + + + // two state model. + // mapping to Bauer et al: these correspond to mxy1, x=Q, y=H/T + int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); + int numInQueryAndNOTInTarget = queryProfileBM.andNotCardinality(targetProfileBM); + int numNOTInQueryAndInTarget = targetProfileBM.andNotCardinality(queryProfileBM); + int numNOTInQueryAndNOTInTarget = + numClassesConsidered - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); + + double p = 0.0; + // TODO: optimize this + // integrate over a Dirichlet prior for alpha & beta, rather than gridsearch + // this can be done closed-form + for (double fnr : defaultFalseNegativeRateArr) { + for (double fpr : defaultFalsePositiveRateArr) { + + double pQ1T1 = Math.pow(1-fnr, numInQueryAndInTarget); + double pQ0T1 = Math.pow(fnr, numNOTInQueryAndInTarget); + double pQ1T0 = Math.pow(fpr, numInQueryAndNOTInTarget); + double pQ0T0 = Math.pow(1-fpr, numNOTInQueryAndNOTInTarget); + + + + //LOG.debug("pQ1T1 = "+(1-fnr)+" ^ "+ numInQueryAndInTarget+" = "+pQ1T1); + //LOG.debug("pQ0T1 = "+(fnr)+" ^ "+ numNOTInQueryAndInTarget+" = "+pQ0T1); + //LOG.debug("pQ1T0 = "+(fpr)+" ^ "+ numInQueryAndNOTInTarget+" = "+pQ1T0); + //LOG.debug("pQ0T0 = "+(1-fpr)+" ^ "+ numNOTInQueryAndNOTInTarget+" = "+pQ0T0); + //TODO: optimization. We can precalculate the logs for different integers + p += + Math.exp(Math.log(pQ1T1) + Math.log(pQ0T1) + Math.log(pQ1T0) + Math.log(pQ0T0)); + + } + } + + if (comboPr != null) { + p *= comboPr; + } + cumulativePr += p; + } + pvector[n] = cumulativePr; + indArr[n] = itemId; + + sumOfProbs += cumulativePr; n++; - LOG.debug("p for "+itemId+" = "+p); + LOG.debug("p for "+itemId+" = "+cumulativePr); + } for (n = 0; n()); + } + Map m = individualToInterpretationToTypesBM.get(iix); + if (m.containsKey(n)) { + // use cached value + return m.get(n); + } + + // default direct type map. + // note that associations with frequency annotations are includes here alongside + // normal associations + EWAHCompressedBitmap dtmap = knowledgeBase.getDirectTypesBM(itemId); + + // associations with frequency info + // map is from ClassIndex -> Weight + Map wmap = knowledgeBase.getDirectWeightedTypes(itemId); + + // sort with least frequent first + List sortedTypeIndices = new ArrayList<>(wmap.keySet()); + sortedTypeIndices.sort( (Integer i, Integer j) -> wmap.get(i) - wmap.get(j)); + + EWAHCompressedBitmap mask = new EWAHCompressedBitmap(); + double pr = 1.0; + for (int i=0; i< effectiveK; i++) { + Integer iClassIx = sortedTypeIndices.get(i); + Double w = wmap.get(iClassIx) / 100.0; + //LOG.info("Class "+iClassIx +" which is "+i+"-least frequent has weight "+w+" for individual "+itemId+" in combo "+n); + if ( (n >> i) % 2 == 0) { + mask.set(iClassIx); + pr *= 1-w; + } + else { + pr *= w; + } + } + //LOG.info("Instance "+itemId+" in combo "+n+" has Pr = "+pr); + + EWAHCompressedBitmap dtmapMasked = dtmap.xor(mask); + EWAHCompressedBitmap inferredTypesBM = knowledgeBase.getSuperClassesBM(dtmapMasked); + WeightedTypesBM wtbm = new WeightedTypesBM(inferredTypesBM, pr); + m.put(n, wtbm); + return wtbm; + } /** * @return probability a query class is a false positive diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java index 650f0bf..65bbfbd 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/BMKnowledgeBase.java @@ -144,12 +144,13 @@ public EWAHCompressedBitmap getDirectSubClassesBM(String classId); - /** - * @param classIds - * @return union of all superclasses (direct and indirect and equivalent) as a bitmap - */ - public EWAHCompressedBitmap getSubClassesBM(Set classIds); + /** + * @param classIds + * @return union of all superclasses (direct and indirect and equivalent) as a bitmap + */ + public EWAHCompressedBitmap getSubClassesBM(Set classIds); + /** * @param classIds * @return union of all direct subclasses as a bitmap @@ -179,7 +180,13 @@ * @return union of all superclasses as a bitmap */ public EWAHCompressedBitmap getSuperClassesBM(Set classIds); - + + /** + * @param classIds + * @return union of all superclasses (direct and indirect and equivalent) as a bitmap + */ + public EWAHCompressedBitmap getSuperClassesBM(EWAHCompressedBitmap classesBM); + /** * @param classIndex * @return superclasses (direct and indirect and equivalent) of classId as bitmap @@ -199,12 +206,18 @@ */ public EWAHCompressedBitmap getTypesBM(String id); - /** - * @param id - an individual - * @return direct types as bitmap - */ - public EWAHCompressedBitmap getDirectTypesBM(String id); - + /** + * @param id - an individual + * @return direct types as bitmap + */ + public EWAHCompressedBitmap getDirectTypesBM(String id); + + /** + * @param id - an individual + * @return map between Type class index and 0 getDirectWeightedTypes(String id); + /** * @param itemId * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java index 81c0a2c..2de3213 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/ewah/EWAHKnowledgeBaseStore.java @@ -1,5 +1,6 @@ package org.monarchinitiative.owlsim.kb.ewah; +import java.util.Collection; import java.util.Set; import com.googlecode.javaewah.EWAHCompressedBitmap; @@ -63,7 +64,7 @@ public EWAHCompressedBitmap getSuperClasses(int clsIndex) { return storedSuperClasses[clsIndex]; } - public EWAHCompressedBitmap getClasses(Set clsIndices) { + public EWAHCompressedBitmap getClasses(Collection clsIndices) { EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); for (int i : clsIndices) { bm.set(i); diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java index 42e2ba8..b3b9fb4 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java @@ -26,7 +26,9 @@ import org.prefixcommons.CurieUtil; import org.semanticweb.owlapi.model.AxiomType; import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotation; import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; import org.semanticweb.owlapi.model.OWLAnnotationValue; import org.semanticweb.owlapi.model.OWLClass; import org.semanticweb.owlapi.model.OWLClassAssertionAxiom; @@ -106,6 +108,9 @@ private Map>> propertyValueMapMap; Map> opposingClassMap = new HashMap>(); + + Map> individualToWeightedDirectTypeMap = new HashMap<>(); + private int[] individualCountPerClassArray; @@ -511,6 +516,9 @@ private void storeInferences() { ontoEWAHStore.setDirectIndividuals(clsIndex, individualInts); } + + // populate frequency-awareness map + individualToWeightedDirectTypeMap = new HashMap<>(); for (OWLNamedIndividual i : individualsInSignature) { int individualIndex = getIndex(i); // LOG.info("String inferences for "+i+" --> " +individualIndex); @@ -518,8 +526,49 @@ private void storeInferences() { getIntegersForClassSet(owlReasoner.getTypes(i, true))); ontoEWAHStore.setTypes(individualIndex, getIntegersForClassSet(owlReasoner.getTypes(i, false))); + + // TODO - ensure robust for equivalent individuals + Map wmap = new HashMap<>(); + individualToWeightedDirectTypeMap.put(individualIndex, wmap); + for (OWLClassAssertionAxiom caax : owlOntology.getClassAssertionAxioms(i)) { + int cix; + + // only associations to named classes + if (caax.getClassExpression().isAnonymous()) { + continue; + } + cix = getIndex(caax.getClassExpression().asOWLClass()); + + // we use reification to store probability + for (OWLAnnotation ann : caax.getAnnotations()) { + OWLAnnotationProperty prop = ann.getProperty(); + OWLAnnotationValue v = ann.getValue(); + if (v instanceof OWLLiteral) { + OWLLiteral lv = v.asLiteral().get(); + Double pr = null; + if (lv.isDouble()) { + pr = lv.parseDouble(); + } + if (lv.isFloat()) { + pr = (double) lv.parseFloat(); + } + if (pr != null) { + // TODO : decide on a vocabulary + if (prop.getIRI().toString().contains("probability")) { + wmap.put(cix, (int) (pr * 100)); + } + } + if (lv.isInteger()) { + if (prop.getIRI().toString().contains("frequenct")) { + wmap.put(cix, lv.parseInteger()); + } + + } + } + } + } - // Treat CLassAssertion( ComplementOf(c) i) as a negative assertion + // Treat ClassAssertion( ComplementOf(c) i) as a negative assertion Set ncs = new HashSet(); Set ncsDirect = new HashSet(); for (OWLClassAssertionAxiom cx : owlOntology.getClassAssertionAxioms(i)) { @@ -813,6 +862,13 @@ protected EWAHCompressedBitmap getSuperClassesBMByOWLClassSet(Set clsS return ontoEWAHStore.getSuperClasses(classIndices); } + /* (non-Javadoc) + * @see org.monarchinitiative.owlsim.kb.BMKnowledgeBase#getSuperClassesBM(com.googlecode.javaewah.EWAHCompressedBitmap) + */ + public EWAHCompressedBitmap getSuperClassesBM(EWAHCompressedBitmap classesBM) { + return ontoEWAHStore.getSuperClasses(new HashSet<>(classesBM.getPositions())); + } + public EWAHCompressedBitmap getSuperClassesBM(String cid) { return ontoEWAHStore.getSuperClasses(getClassIndex(cid)); } @@ -950,6 +1006,15 @@ public EWAHCompressedBitmap getTypesBM(String id) { public EWAHCompressedBitmap getTypesBM(int individualIndex) { return ontoEWAHStore.getTypes(individualIndex); } + + /* (non-Javadoc) + * @see org.monarchinitiative.owlsim.kb.BMKnowledgeBase#getDirectWeightedTypes(java.lang.String) + */ + public Map getDirectWeightedTypes(String id) { + int iix = getIndividualIndex(id); + return individualToWeightedDirectTypeMap.get(iix); + } + /** * @param id diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java index 9318ede..041b20a 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java @@ -109,10 +109,13 @@ protected void load(String fn, String... ontfns) throws OWLOntologyCreationExcep kb = loader.createKnowledgeBaseInterface(); } - protected void loadSimplePhenoWithNegation() throws OWLOntologyCreationException { - load("simple-pheno-with-negation.owl"); - - } + protected void loadSimplePhenoWithNegation() throws OWLOntologyCreationException { + load("simple-pheno-with-negation.owl"); + } + + protected void loadSimplePhenoWithFrequency() throws OWLOntologyCreationException { + load("simple-pheno-with-freqs.owl"); + } @Deprecated protected void search(ProfileMatcher profileMatcher, @@ -188,5 +191,17 @@ protected boolean isRankedLast(String matchId, MatchSet matchSet) { LOG.info("Rank of match "+matchId+" is "+matchRank+" which is last or joint last"); return true; } + + protected boolean isRankedAt(String matchId, MatchSet matchSet, int expectedRank) { + int matchRank = 0; + for (Match m : matchSet.getMatches()) { + int rank = m.getRank(); + + if (m.getMatchId().equals(matchId)) { + return (rank == expectedRank); + } + } + return false; + } } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java index b83ab08..d40c7d3 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java @@ -80,6 +80,59 @@ public void testExamplePositiveOnly() throws Exception { } + @Test + public void testFrequencyWare() throws Exception { + loadSimplePhenoWithFrequency(); + //LOG.info("INDS="+kb.getIndividualIdsInSignature()); + ProfileMatcher profileMatcher = createProfileMatcher(kb); + ((NaiveBayesFixedWeightTwoStateProfileMatcher) profileMatcher).setkLeastFrequent(3); + + Assert.assertTrue(kb.getIndividualIdsInSignature().size() > 0); + + int nOk = 0; + for (String i : kb.getIndividualIdsInSignature()) { + + ProfileQuery pq = profileMatcher.createPositiveProfileQuery(i); + TestQuery tq = new TestQuery(pq, i, 4); // self should always be ranked first + String fn = i.replaceAll(".*/", ""); + eval.writeJsonTo("target/naivebfreq-test-results-"+fn+".json"); + Assert.assertTrue(eval.evaluateTestQuery(profileMatcher, tq)); + + if (i.equals("http://x.org/ind-dec-all")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-no-brain-phenotype", tq.matchSet)); + nOk++; + } + if (i.equals("http://x.org/ind-big-heart-small-brain")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-big-femur", tq.matchSet)); + + // targets with frequency + Assert.assertTrue(isRankedAt("http://x.org/fplus-big-heart-small-brain", tq.matchSet, 2)); + Assert.assertTrue(isRankedAt("http://x.org/f0-big-heart-small-brain", tq.matchSet, 3)); + Assert.assertTrue(isRankedAt("http://x.org/fminus-big-heart-small-brain", tq.matchSet, 4)); + nOk++; + } + if (i.equals("http://x.org/ind-small-heart-big-brain")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-big-femur", tq.matchSet)); + + // targets with frequency + Assert.assertTrue(isRankedAt("http://x.org/fminus-big-heart-small-brain", tq.matchSet, 2)); + Assert.assertTrue(isRankedAt("http://x.org/f0-big-heart-small-brain", tq.matchSet, 3)); + Assert.assertTrue(isRankedAt("http://x.org/fplus-big-heart-small-brain", tq.matchSet, 4)); + nOk++; + } + if (i.equals("http://x.org/ind-unstated-phenotype")) { + //Assert.assertTrue(isRankedLast("http://x.org/ind-no-phenotype", tq.matchSet)); + //temporarily removed the no-phenotype individual from test; auto-pass this for now + nOk++; + } + if (i.equals("http://x.org/ind-no-brain-phenotype")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-inc-all", tq.matchSet)); + nOk++; + } + + } + Assert.assertEquals(5, nOk); + } } diff --git a/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl b/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl new file mode 100644 index 0000000..2b5850a --- /dev/null +++ b/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl @@ -0,0 +1,282 @@ +Prefix: : +Prefix: dc: +Prefix: owl: +Prefix: rdf: +Prefix: rdfs: +Prefix: xml: +Prefix: xsd: +Prefix: x: + + +Ontology: + + +AnnotationProperty: x:probability + + +Datatype: xsd:double + + +Class: absent-heart + + SubClassOf: + hypoplastic-heart + + +Class: bone-length + + SubClassOf: + bone-morphology + + +Class: bone-morphology + + SubClassOf: + skeletal-phenotype + + +Class: bone-shape + + SubClassOf: + bone-morphology + + +Class: brain-morphology + + SubClassOf: + neuro-phenotype + + +Class: brain-shape + + SubClassOf: + brain-morphology + + +Class: brain-size + + SubClassOf: + brain-morphology + + +Class: circulatory-phenotype + + SubClassOf: + phenotype + + +Class: dec-bone-length + + SubClassOf: + bone-length + + + +Class: dec-brain-size + + SubClassOf: + brain-size + + + +Class: dec-femur-length + + SubClassOf: + dec-bone-length + + + +Class: heart-morphology + + SubClassOf: + circulatory-phenotype + + +Class: heart-shape + + SubClassOf: + heart-morphology + + +Class: heart-size + + SubClassOf: + heart-morphology + + +Class: hyperplastic-heart + + SubClassOf: + heart-size + + + +Class: hypoplastic-heart + + SubClassOf: + heart-size + + + +Class: inc-bone-length + + SubClassOf: + bone-length + + + +Class: inc-brain-size + + SubClassOf: + brain-size + + + +Class: inc-femur-length + + SubClassOf: + inc-bone-length + + + +Class: neuro-phenotype + + SubClassOf: + phenotype + + +Class: phenotype + + +Class: skeletal-phenotype + + SubClassOf: + phenotype + + +Individual: ind-big-femur + + Types: + inc-femur-length + + +Individual: ind-big-heart-big-brain + + Types: + hyperplastic-heart, + inc-brain-size + +Individual: fplus-big-heart-small-brain + + Types: + Annotations: x:probability "0.75"^^xsd:double dec-brain-size, + Annotations: x:probability "0.25"^^xsd:double inc-brain-size, + Annotations: x:probability "0.75"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.25"^^xsd:double hypoplastic-heart + +Individual: f0-big-heart-small-brain + + Types: + Annotations: x:probability "0.5"^^xsd:double dec-brain-size, + Annotations: x:probability "0.5"^^xsd:double inc-brain-size, + Annotations: x:probability "0.5"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.5"^^xsd:double hypoplastic-heart + +Individual: fminus-big-heart-small-brain + + Types: + Annotations: x:probability "0.25"^^xsd:double dec-brain-size, + Annotations: x:probability "0.75"^^xsd:double inc-brain-size, + Annotations: x:probability "0.25"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.75"^^xsd:double hypoplastic-heart + + +Individual: ind-big-heart-small-brain + + Types: dec-brain-size, + hyperplastic-heart + + +Individual: ind-bone + + Types: + bone-morphology + + +Individual: ind-brain + + Types: + brain-morphology + + +Individual: ind-dec-all + + Types: + dec-bone-length, + dec-brain-size, + hypoplastic-heart + + +Individual: ind-heart-bone + + Types: + bone-morphology, + heart-morphology + + +Individual: ind-heart-brain + + Types: + brain-morphology, + heart-morphology + + +Individual: ind-heart-brain-bone + + Types: + bone-morphology, + brain-morphology, + heart-morphology + + +Individual: ind-inc-all + + Types: + hyperplastic-heart, + inc-bone-length, + inc-brain-size + + +Individual: ind-no-brain-phenotype + + Types: + phenotype, + not (brain-morphology) + + +Individual: ind-small-femur + + Types: + dec-femur-length + + +Individual: ind-small-heart-big-brain + + Types: + hypoplastic-heart, + inc-brain-size + + +Individual: ind-small-heart-small-brain + + Types: + dec-brain-size, + hypoplastic-heart + + +Individual: ind-unstated-phenotype + + Types: + phenotype + + From b9c84c66db4a08b507cea21268d1d8ac6d04ab1d Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Thu, 2 Mar 2017 12:12:20 -0800 Subject: [PATCH 2/8] fixing typo in property name --- .../org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java index b3b9fb4..cd3f3ef 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/kb/impl/BMKnowledgeBaseOWLAPIImpl.java @@ -559,7 +559,7 @@ private void storeInferences() { } } if (lv.isInteger()) { - if (prop.getIRI().toString().contains("frequenct")) { + if (prop.getIRI().toString().contains("frequency")) { wmap.put(cix, lv.parseInteger()); } From 2c57dc76d27c09eb07029872fac1e4905a309ce3 Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Thu, 2 Mar 2017 12:12:30 -0800 Subject: [PATCH 3/8] reducing logging --- .../matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java | 4 ++-- .../compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java index afcf28c..7f13bc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java @@ -213,7 +213,7 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { // any node which has an off query parent is discounted //EWAHCompressedBitmap maskedTargetProfileBM = nodesHtBM.and(queryBlanketProfileBM); - LOG.info("TARGET PROFILE for "+itemId+" "+nodesHtBM); + //LOG.info("TARGET PROFILE for "+itemId+" "+nodesHtBM); // cumulative log-probability double logp = 0.0; @@ -398,7 +398,7 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { indArr[n] = itemId; sumOfProbs += p; n++; - LOG.info("logp for "+itemId+" = "+logp+" sumOfLogProbs="+sumOfProbs); + //LOG.info("logp for "+itemId+" = "+logp+" sumOfLogProbs="+sumOfProbs); } for (n = 0; n Date: Thu, 2 Mar 2017 16:26:49 -0800 Subject: [PATCH 5/8] [#58] first draft --- Dockerfile | 35 +++++ README.md | 12 +- configuration-samples/configuration-all.yaml | 197 +++++++++++++++++++++++++++ 3 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 Dockerfile create mode 100644 configuration-samples/configuration-all.yaml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..77ce76d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,35 @@ +# +# Oracle Java 8 Dockerfile +# +# https://github.com/dockerfile/java +# https://github.com/dockerfile/java/tree/master/oracle-java8 +# + +# Pull base image. +FROM ubuntu:16.04 + +RUN apt-get -y update && apt-get install -y software-properties-common python-software-properties + +# Install Java. +RUN \ + echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \ + add-apt-repository -y ppa:webupd8team/java && \ + apt-get update && \ + apt-get install -y oracle-java8-installer && \ + rm -rf /var/lib/apt/lists/* && \ + rm -rf /var/cache/oracle-jdk8-installer + +# Define commonly used JAVA_HOME variable +ENV JAVA_HOME /usr/lib/jvm/java-8-oracle + +# Define working directory. +WORKDIR /data +ADD owlsim-services/target/owlsim-services-3.0-SNAPSHOT.jar /data/ +ADD configuration-samples/configuration-all.yaml /data/configuration.yaml + +ADD http://ci.monarchinitiative.org/view/dev/job/create-owlsim-files-on-dev/lastSuccessfulBuild/artifact/server/all.owl /data/ +ADD https://raw.githubusercontent.com/monarch-initiative/monarch-owlsim-data/master/data/Homo_sapiens/Hs_disease_phenotype.txt /data/ + +CMD java -jar /data/owlsim-services-3.0-SNAPSHOT.jar server /data/configuration.yaml + +EXPOSE 8080 diff --git a/README.md b/README.md index 41485c8..f80f5bd 100644 --- a/README.md +++ b/README.md @@ -56,8 +56,18 @@ Paths: or for some versions of dropwizard: * http://localhost:8080/api/docs/ - * http://localhost:8080/api/match/matchers + * http://localhost:8080/api/match/matchers Example query using default config: http://localhost:8080/api/match/jaccard?id=X:heart-morphology&id=X:brain-morphology + +## Build with Docker + +Run those commands from the root directory: + +``` +mvn package +docker build -t owlsim . +docker run -p 8080:8080 owlsim +``` diff --git a/configuration-samples/configuration-all.yaml b/configuration-samples/configuration-all.yaml new file mode 100644 index 0000000..8ea11ad --- /dev/null +++ b/configuration-samples/configuration-all.yaml @@ -0,0 +1,197 @@ +ontologyUris: + - /data/all.owl +ontologyDataUris: [] +dataTsvs: [] +curies: + # Skolemize Blank Nodes + # overwrite iri fragment with '' for unresovable bnodes + '_': 'https://monarchinitiative.org/.well-known/genid/' + + # Monarch-specific + '': 'https://monarchinitiative.org/' + 'MONARCH': 'https://monarchinitiative.org/MONARCH_' + + 'MonarchData': 'http://data.monarchinitiative.org/ttl/' + 'MonarchArchive': 'http://archive.monarchinitiative.org/ttl/' + + # other semantic-web items + 'Annotation': 'http://www.w3.org/ns/oa#Annotation' # FIXME - i don't think we're using this + 'dc': 'http://purl.org/dc/elements/1.1/' + 'foaf': 'http://xmlns.com/foaf/0.1/' + + # ontologies + # [y] indicates those that the monarch team contributes to + 'AQTLTrait': 'http://identifiers.org/animalqtltrait/' # FIXME - should get integrated into Upheno + 'BFO': 'http://purl.obolibrary.org/obo/BFO_' # BFO: Basic Formal Ontology + 'CHEBI' : 'http://purl.obolibrary.org/obo/CHEBI_' # ChEBI: Chemicals of Biological Interest + 'CHR' : 'http://purl.obolibrary.org/obo/CHR_' # CHR: Chromosome Ontology + 'CL' : 'http://purl.obolibrary.org/obo/CL_' # CL: Cell Ontology (cell types) [y] + 'CLO' : 'http://purl.obolibrary.org/obo/CLO_' # CLO: Cell Line Ontology [y] + 'CMO' : 'http://purl.obolibrary.org/obo/CMO_' # CMO: Clinical Measurements Ontology + 'DATA' : 'http://edamontology.org/data_' # EDAM: Data and Methods Ontology (data artifacts) + 'DC' : 'http://purl.obolibrary.org/obo/DC_' # TODO + 'DECIPHER' : 'http://purl.obolibrary.org/obo/DECIPHER_' # DECIPHER: Deciphering Developmental Disease + 'DOID': 'http://purl.obolibrary.org/obo/DOID_' # DOID: Human Disease Ontology [y] + 'ECO': 'http://purl.obolibrary.org/obo/ECO_' # ECO: Evidence Code Ontology [y] + 'EFO' : 'http://www.ebi.ac.uk/efo/EFO_' # EFO: Experimental Factor Ontology (all kinds of stuff) [y] + 'ENVO' : 'http://purl.obolibrary.org/obo/ENVO_' # ENVO: Environment Ontology + 'EOM' : 'http://purl.obolibrary.org/obo/EOM_' # elements of morphology phentoypes + 'ERO' : 'http://purl.obolibrary.org/obo/ERO_' # ERO: eagle-i resource ontology [y] + 'faldo' : 'http://biohackathon.org/resource/faldo#' # FALDO: Feature Annotation Location Description Ontology (genomic feature properties) [y] + 'FBcv' : 'http://purl.obolibrary.org/obo/FBcv_' # FBcv: flybase CV (includes phenotypes) + 'FBbt': 'http://purl.obolibrary.org/obo/FBbt_' # FBbt: flybase anatomy + 'FBdv': 'http://purl.obolibrary.org/obo/FBdv_' # FBdv: flybase developmental stages + 'GENO': 'http://purl.obolibrary.org/obo/GENO_' # GENO: Genotype Partonomy Ontology [y] + 'GO' : 'http://purl.obolibrary.org/obo/GO_' # GO: Gene Ontology [y] + 'HP': 'http://purl.obolibrary.org/obo/HP_' # HP: Human Phenotype Ontology [y] + 'IAO': 'http://purl.obolibrary.org/obo/IAO_' # IAO: Information Artifact Ontology [y] + 'KEGG-ds' : 'http://purl.obolibrary.org/KEGG-ds_' # KEGG-ds: KEGG Disease Ontology + 'LPT': 'http://purl.obolibrary.org/obo/LPT_' # LPT: Livestock Phenotypic Trait Ontology + 'MA': 'http://purl.obolibrary.org/obo/MA_' # MA: Mouse Anatomy Ontology [y] + 'MedGen' : 'http://www.ncbi.nlm.nih.gov/medgen/' # a vocabulary - should this be in purl? + 'MESH': 'http://purl.obolibrary.org/obo/MESH_' # MeSH: Medical Subject Headings (medical diseases, phenotypes, and drugs) + 'MP': 'http://purl.obolibrary.org/obo/MP_' # MP: Mammalian Phenotype Ontology [y] + 'MPATH': 'http://purl.obolibrary.org/obo/MPATH_' # MPATH: Mammalian Pathology Ontology + 'NBO': 'http://purl.obolibrary.org/obo/NBO_' # NBO: NeuroBehavior Ontology [y] + 'OBA': 'http://purl.obolibrary.org/obo/OBA_' # OBA: Ontology of Biological Attributes (traits) + 'OBAN': 'http://purl.org/oban/' # OBAN: Open Biomedical Annotation Model [y] + 'OBI': 'http://purl.obolibrary.org/obo/OBI_' # OBI: Ontology of Biomedical Investigations [y] + 'OBO': 'http://purl.obolibrary.org/obo/' # all ontologies in the OBO namespace (this is not itself an ontology) + 'OIO': 'http://www.geneontology.org/formats/oboInOwl#' # oboInOwl: obo-specific annotation properties, like synonym types + 'OMIA' : 'http://purl.obolibrary.org/obo/OMIA_' # OMIA: Online Mendelian Inheritance in Animals (animal diseases) + 'OMIM' : 'http://purl.obolibrary.org/obo/OMIM_' # OMIM: Online Mendelian Inheritance in Man (human disease and variants) + 'Orphanet' : 'http://www.orpha.net/ORDO/Orphanet_' # Orphanet: rare diseases and orphan drugs + 'PATO': 'http://purl.obolibrary.org/obo/PATO_' # PATO: Phenotypic Quality Ontology [y] + 'PCO': 'http://purl.obolibrary.org/obo/PCO_' # PCO: Population and Community Ontology [y] + 'PR': 'http://purl.obolibrary.org/obo/PR_' # PRO: protein ontology + 'PW' : 'http://purl.obolibrary.org/obo/PW_' # PW: pathway ontology + 'RO': 'http://purl.obolibrary.org/obo/RO_' # RO: Relationship Ontology [y] + 'SIO' : 'http://semanticscience.org/resource/SIO_' # SIO: SemanticScience Integrated Ontology (information artifacts) + 'SNOMED' : 'http://purl.obolibrary.org/obo/SNOMED_' # SNOMED:diseases and phenotypes + 'SO' : 'http://purl.obolibrary.org/obo/SO_' # SO: Sequence Ontology [y] + 'STATO': 'http://purl.obolibrary.org/obo/STATO_' # Statistics Ontology + 'UBERON' : 'http://purl.obolibrary.org/obo/UBERON_' # UBERON: integrated anatomy ontology (metazoans, mostly) [y] + 'UPHENO' : 'http://purl.obolibrary.org/obo/UPHENO_' # UPHENO: integrated phenotype ontology, and normal traits [y] + 'UMLS' : 'http://purl.obolibrary.org/obo/UMLS_' # UMLS: unified medical language system + 'UO' : 'http://purl.obolibrary.org/obo/UO_' # UO: units of measurements + 'VT' : 'http://purl.obolibrary.org/obo/VT_' # VT: Vertebrate Trait Ontology + 'WBPhenotype': 'http://purl.obolibrary.org/obo/WBPhenotype_' # WBPhenotype: WormBase phenotypes (nematode) [y] + 'XCO' : 'http://purl.obolibrary.org/obo/XCO_' # XCO: Experimental Conditions Ontology + 'ZFA': 'http://purl.obolibrary.org/obo/ZFA_' # ZFA: Zebrafish Anatomy Ontology [y] + 'ZFS': 'http://purl.obolibrary.org/obo/ZFS_' # ZFS: Zebrafish Staging [y] + 'ZP': 'http://purl.obolibrary.org/obo/ZP_' # ZP: Zebrafish Phenotype Ontology [y] + 'WBbt': 'http://purl.obolibrary.org/obo/WBbt_' #WBbt: C. elegans gross anatomy + 'EMAPA': 'http://purl.obolibrary.org/obo/EMAPA_' # EMAPA: Mouse gross anatomy and development, timed + 'XAO': 'http://purl.obolibrary.org/obo/XAO_' # XAO: Xenopus anatomy and development + + # publication/reference sources + 'DOI' : 'http://dx.doi.org/' + 'GeneReviews' : 'http://www.ncbi.nlm.nih.gov/books/' # diseases too + 'ISBN': 'https://monarchinitiative.org/ISBN_' + 'ISBN-10': 'https://monarchinitiative.org/ISBN10_' + 'ISBN-13': 'https://monarchinitiative.org/ISBN13_' + 'ISBN-15': 'https://monarchinitiative.org/ISBN15_' + 'J' : 'http://www.informatics.jax.org/reference/J:' # MGI-internal identifiers for pubs + 'MPD': 'http://phenome.jax.org/' + 'MPD-assay': 'http://phenome.jax.org/db/qp?rtn=views/catlines&keymeas=' + 'PMID': 'http://www.ncbi.nlm.nih.gov/pubmed/' + 'PMCID' : 'http://www.ncbi.nlm.nih.gov/pmc/' + 'AQTLPub' : 'http://www.animalgenome.org/cgi-bin/QTLdb/BT/qabstract?PUBMED_ID=' + 'GO_REF' : 'http://www.geneontology.org/cgi-bin/references.cgi#GO_REF:' + 'HPO' : 'http://human-phenotype-ontology.org/' # to be used for persons, though they don't resolve with this + + # strains, lines, or organismal reagents + 'APB': 'http://pb.apf.edu.au/phenbank/strain.html?id=' + 'CMMR': 'http://www.cmmr.ca/order.php?t=m&id=' + 'Coriell' : 'https://catalog.coriell.org/0/Sections/Search/Sample_Detail.aspx?Ref=' + 'CoriellCollection' : 'https://catalog.coriell.org/1/' + 'CoriellFamily' : 'https://catalog.coriell.org/0/Sections/BrowseCatalog/FamilyTypeSubDetail.aspx?fam=' + 'CoriellIndividual' : 'https://catalog.coriell.org/Search?q=' + 'dbSNPIndividual' : 'http://www.ncbi.nlm.nih.gov/SNP/snp_ind.cgi?ind_id=' # FIXME + 'EMMA' : 'https://www.infrafrontier.eu/search?keyword=EM:' + 'JAX' : 'http://jaxmice.jax.org/strain/' + 'MMRRC' : 'https://www.mmrrc.org/catalog/sds.php?mmrrc_id=' + 'MPD-strain': 'http://phenome.jax.org/db/q?rtn=strains/details&strainid=' + 'MUGEN': 'http://bioit.fleming.gr/mugen/Controller?workflow=ViewModel&expand_all=true&name_begins=model.block&eid=' + 'NCIMR': 'https://mouse.ncifcrf.gov/available_details.asp?ID=' + 'RBRC': 'http://www2.brc.riken.jp/lab/animal/detail.php?brc_no=RBRC' + + # organisms and genome builds (also NCBITaxon) + 'NCBIAssembly': 'http://www.ncbi.nlm.nih.gov/assembly/' + 'NCBIGenome' : 'http://www.ncbi.nlm.nih.gov/genome/' + 'NCBITaxon' : 'http://purl.obolibrary.org/obo/NCBITaxon_' + 'OMIA-breed': 'https://monarchinitiative.org/model/OMIA-breed:' + 'UCSC' : 'ftp://hgdownload.cse.ucsc.edu/goldenPath/' + + # homology + 'HOMOLOGENE' : 'http://www.ncbi.nlm.nih.gov/homologene/' + 'KEGG-ko' : 'http://www.kegg.jp/dbget-bin/www_bget?ko:' + 'PANTHER' : 'http://www.pantherdb.org/panther/family.do?clsAccession=' # protein/orthologous families + + # variants + 'AQTL' : 'http://identifiers.org/animalqtl/' # FIXME temporary # traits + 'CGD' : 'http://ohsu.edu/cgd/' # diseases, variant instances + 'ClinVar' : 'http://www.ncbi.nlm.nih.gov/clinvar/' # variant+condition + 'ClinVarVariant' : 'http://www.ncbi.nlm.nih.gov/clinvar/variation/' + 'ClinVarSubmitters' : 'http://www.ncbi.nlm.nih.gov/clinvar/submitters/' + 'COSMIC' : 'http://cancer.sanger.ac.uk/cosmic/mutation/overview?id=' + 'HGMD' : 'http://identifiers.org/hgmd/' + 'dbSNP' : 'http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=' + 'dbVar' : 'http://www.ncbi.nlm.nih.gov/dbvar/' + + # pathways + 'KEGG-path' : 'http://www.kegg.jp/dbget-bin/www_bget?path:' + 'REACT' : 'http://www.reactome.org/PathwayBrowser/#/' + + # genes (and RNAs and transcripts) + 'BIOGRID' : 'http://thebiogrid.org/' # also interactions + 'CCDS' : 'http://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA=' # transcripty things + 'dictyBase' : 'http://dictybase.org/gene/' + 'EcoGene' : 'http://ecogene.org/gene/' + 'ENSEMBL' : 'http://identifiers.org/ensembl/' + 'FlyBase' : 'http://flybase.org/reports/' # also variants, pubs, genotypes, strains + 'GenBank' : 'http://www.ncbi.nlm.nih.gov/nuccore/' + 'HGNC' : 'http://identifiers.org/hgnc/HGNC:' + 'IMPC' : 'http://www.mousephenotype.org/data/genes/' # FIXME + 'KEGG-hsa' : 'http://www.kegg.jp/dbget-bin/www_bget?hsa:' + 'MGI': 'http://www.informatics.jax.org/accession/MGI:' # also variants, pubs, genotypes + 'miRBase' : 'http://identifiers.org/mirbase/' # microRNA genes + 'NCBIGene' : 'http://www.ncbi.nlm.nih.gov/gene/' + 'PomBase' : 'http://identifiers.org/PomBase:' + 'RefSeq' : 'http://www.ncbi.nlm.nih.gov/refseq/?term=' + 'RGD' : 'http://rgd.mcw.edu/rgdweb/report/gene/main.html?id=' + 'SGD' : 'http://identifiers.org/SGD:' + 'TAIR' : 'http://identifiers.org/TAIR:' + 'WormBase' : 'http://identifiers.org/wormbase/' # also variants, pubs, genotypes + 'Xenbase' : 'http://identifiers.org/xenbase/' + 'ZFIN' : 'http://zfin.org/' # also variants, pubs, genotypes + + # proteins + 'EC' : 'http://identifiers.org/ec-code/' + 'HPRD' : 'http://www.hprd.org/protein/' + 'NCBIProtein' : 'http://www.ncbi.nlm.nih.gov/protein/' + 'PDB' : 'http://identifiers.org/PDB:' + 'SwissProt' : 'http://identifiers.org/SwissProt:' + 'TrEMBL' : 'http://www.uniprot.org/uniprot/' + 'UniProtKB' : 'http://identifiers.org/uniprot/' + + # SEPIO: Scientific Evidence and Provenance Information Ontology + 'SEPIO': 'http://purl.obolibrary.org/obo/SEPIO_' + 'VIVO': 'http://vivoweb.org/ontology/core#' + + #Procedures/protocols + 'IMPRESS-procedure' : 'https://www.mousephenotype.org/impress/procedures/' + 'IMPRESS-protocol' : 'https://www.mousephenotype.org/impress/protocol/' + 'IMPRESS-parameter' : 'https://www.mousephenotype.org/impress/parameterontologies/' + + #Drugs, chemicals, compounds + 'CID' : 'http://pubchem.ncbi.nlm.nih.gov/compound/' + 'DrugBank' : 'http://www.drugbank.ca/drugs/' + 'SIO': 'http://semanticscience.org/resource/SIO_' + 'OAE': 'http://purl.obolibrary.org/obo/OAE_' + 'RXCUI': 'http://purl.bioontology.org/ontology/RXNORM/' + 'MEDDRA': 'http://purl.bioontology.org/ontology/MEDDRA/' + 'FDADrug': 'http://www.fda.gov/Drugs/InformationOnDrugs/' + 'BT': 'http://c.biothings.io/#' + 'UNII': 'http://fdasis.nlm.nih.gov/srs/unii/' + 'GINAS' : 'http://tripod.nih.gov/ginas/app/substance#' From d436fdf07a08ef84eeafa0339245b3f03648bbff Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Tue, 7 Mar 2017 09:42:29 -0800 Subject: [PATCH 6/8] typos --- .../matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java | 2 +- .../owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java index d40c7d3..5c379b2 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java @@ -81,7 +81,7 @@ public void testExamplePositiveOnly() throws Exception { } @Test - public void testFrequencyWare() throws Exception { + public void testFrequencyAware() throws Exception { loadSimplePhenoWithFrequency(); //LOG.info("INDS="+kb.getIndividualIdsInSignature()); ProfileMatcher profileMatcher = createProfileMatcher(kb); diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java index 3183d95..bdb2a72 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java @@ -75,7 +75,7 @@ public void testCompareProfileFile() throws Exception { Set tcids = kb.getClassIds(kb.getDirectTypesBM(j)); ProfileQuery tp = profileMatcher.createProfileQueryFromClasses(tcids, null); - String fn = i.replaceAll(".*/", ""); + String fn = i.replaceAll(".*/", ""); //eval.writeJsonTo("target/pdgm-test-results-"+fn+".json"); Match pairMatch = profileMatcher.compareProfilePair(qp, tp); From 56a4aa2389918418d53af8fea879b42d5615b33f Mon Sep 17 00:00:00 2001 From: Jeremy Nguyen Xuan Date: Thu, 9 Mar 2017 15:30:34 -0800 Subject: [PATCH 7/8] [#62] we still need the the inject indicator for guice. We can the matcher package through java reflection and let the injector construct the objects. --- .../owlsim/compute/matcher/ProfileMatcher.java | 2 -- .../AbstractSemanticSimilarityProfileMatcher.java | 2 ++ .../impl/BayesianNetworkProfileMatcher.java | 3 +++ .../matcher/impl/GridNegatedProfileMatcher.java | 3 +++ .../compute/matcher/impl/GridProfileMatcher.java | 3 +++ .../impl/JaccardSimilarityProfileMatcher.java | 3 +++ ...InformationContentSimilarityProfileMatcher.java | 3 +++ ...veBayesFixedWeightThreeStateProfileMatcher.java | 3 +++ ...FixedWeightTwoStateNoBlanketProfileMatcher.java | 3 +++ ...aiveBayesFixedWeightTwoStateProfileMatcher.java | 3 +++ .../NaiveBayesVariableWeightProfileMatcher.java | 3 +++ .../matcher/impl/PhenodigmICProfileMatcher.java | 3 +++ .../ThreeStateBayesianNetworkProfileMatcher.java | 3 +++ .../services/modules/KnowledgeBaseModule.java | 25 ++++++++++++++++++++++ 14 files changed, 60 insertions(+), 2 deletions(-) diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java index 52b58cb..a710112 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java @@ -111,6 +111,4 @@ public ProfileQuery createProfileQueryFromClasses(Set classIds, */ BMKnowledgeBase getKnowledgeBase(); - - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java index eb042fa..0b8f0ca 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java @@ -1,5 +1,7 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; +import javax.inject.Inject; + import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator; import org.monarchinitiative.owlsim.compute.mica.impl.MostInformativeCommonAncestorCalculatorImpl; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java index ee22a60..580dd3b 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java @@ -5,6 +5,8 @@ import java.util.Map; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.cpt.ConditionalProbabilityIndex; import org.monarchinitiative.owlsim.compute.cpt.IncoherentStateException; @@ -101,6 +103,7 @@ private Calculator[] calculatorCache; private Double[][] targetClassProbabilityCache; + @Inject private BayesianNetworkProfileMatcher(BMKnowledgeBase kb) { super(kb); int N = kb.getIndividualIdsInSignature().size(); diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java index 04b85c3..0a693e4 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java @@ -1,5 +1,7 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.NegationAwareProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -22,6 +24,7 @@ /** * @param kb */ + @Inject public GridNegatedProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java index c2f4448..499f58e 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java @@ -3,6 +3,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator.ClassInformationContentPair; @@ -32,6 +34,7 @@ /** * @param kb */ + @Inject public GridProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java index cdca4a7..a589c89 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java @@ -2,6 +2,8 @@ import java.util.List; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -26,6 +28,7 @@ /** * @param kb */ + @Inject public JaccardSimilarityProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java index c14bec9..a1f59ce 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java @@ -2,6 +2,8 @@ import java.util.List; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator.ClassInformationContentPair; @@ -28,6 +30,7 @@ /** * @param kb */ + @Inject private MaximumInformationContentSimilarityProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java index bb3b7d2..7f13bc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java @@ -4,6 +4,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.NegationAwareProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -29,6 +31,7 @@ private Logger LOG = Logger.getLogger(NaiveBayesFixedWeightThreeStateProfileMatcher.class); + @Inject private NaiveBayesFixedWeightThreeStateProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java index 009b47c..1518072 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher.java @@ -1,5 +1,7 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; +import javax.inject.Inject; + import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; /** @@ -8,6 +10,7 @@ */ public class NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher extends NaiveBayesFixedWeightTwoStateProfileMatcher { + @Inject private NaiveBayesFixedWeightTwoStateNoBlanketProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java index 392a021..31fbc80 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightTwoStateProfileMatcher.java @@ -7,6 +7,8 @@ import java.util.Map; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -76,6 +78,7 @@ public WeightedTypesBM(EWAHCompressedBitmap typesBM, Double weight) { // for maps a pair of (Individual, InterpretationIndex) to a set of inferred (self, direct, indirect) types private Map> individualToInterpretationToTypesBM = new HashMap<>(); + @Inject protected NaiveBayesFixedWeightTwoStateProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java index a06d0ac..8084cc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java @@ -3,6 +3,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -34,6 +36,7 @@ /** * @param kb */ + @Inject public NaiveBayesVariableWeightProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java index 8d3f8f0..62489a6 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java @@ -3,6 +3,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator.ClassInformationContentPair; @@ -32,6 +34,7 @@ /** * @param kb */ + @Inject public PhenodigmICProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java index 2efe966..e7c207d 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java @@ -6,6 +6,8 @@ import java.util.Map; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.cpt.IncoherentStateException; import org.monarchinitiative.owlsim.compute.cpt.impl.NodeProbabilities; @@ -35,6 +37,7 @@ private ThreeStateConditionalProbabilityIndex cpi = null; private Map targetToQueryCache; + @Inject private ThreeStateBayesianNetworkProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java index 429aa97..e6b76c7 100644 --- a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java @@ -14,6 +14,11 @@ import javax.inject.Singleton; import org.apache.commons.validator.routines.UrlValidator; +import org.monarchinitiative.owlsim.compute.classmatch.ClassMatcher; +import org.monarchinitiative.owlsim.compute.enrich.impl.HypergeometricEnrichmentEngine; +import org.monarchinitiative.owlsim.compute.matcher.impl.BayesianNetworkProfileMatcher; +import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator; +import org.monarchinitiative.owlsim.compute.mica.impl.MostInformativeCommonAncestorCalculatorImpl; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl; import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesDataTsvs; @@ -123,5 +128,25 @@ OWLOntology getDataTsvs(OWLOntologyManager manager) throws OWLOntologyCreationException, FileNotFoundException, IOException { return mergeOntologies(manager, dataTsvs); } + + @Provides + MostInformativeCommonAncestorCalculator getMostInformativeCommonAncestorCalculator(BMKnowledgeBase knowledgeBase) { + return new MostInformativeCommonAncestorCalculatorImpl(knowledgeBase); + } + + @Provides + HypergeometricEnrichmentEngine getHypergeometricEnrichmentEngine(BMKnowledgeBase knowledgeBase) { + return new HypergeometricEnrichmentEngine(knowledgeBase); + } + + @Provides + BayesianNetworkProfileMatcher getBayesianNetworkProfileMatcher(BMKnowledgeBase knowledgeBase) { + return BayesianNetworkProfileMatcher.create(knowledgeBase); + } + + @Provides + ClassMatcher getClassMatcher(BMKnowledgeBase knowledgeBase) { + return new ClassMatcher(knowledgeBase); + } } From 4a2210af47dad051b65e1c17dddea1c401be9d0b Mon Sep 17 00:00:00 2001 From: Jeremy Nguyen Xuan Date: Thu, 9 Mar 2017 17:35:09 -0800 Subject: [PATCH 8/8] [#58] added species --- Dockerfile | 10 ++++++++-- README.md | 14 +++++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 77ce76d..763cc1e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,6 +8,8 @@ # Pull base image. FROM ubuntu:16.04 +ARG species=all + RUN apt-get -y update && apt-get install -y software-properties-common python-software-properties # Install Java. @@ -27,8 +29,12 @@ WORKDIR /data ADD owlsim-services/target/owlsim-services-3.0-SNAPSHOT.jar /data/ ADD configuration-samples/configuration-all.yaml /data/configuration.yaml -ADD http://ci.monarchinitiative.org/view/dev/job/create-owlsim-files-on-dev/lastSuccessfulBuild/artifact/server/all.owl /data/ -ADD https://raw.githubusercontent.com/monarch-initiative/monarch-owlsim-data/master/data/Homo_sapiens/Hs_disease_phenotype.txt /data/ +RUN if [ $species = "human" ]; \ + then \ + wget https://data.monarchinitiative.org/owl/all-hp.owl -O /data/all.owl; \ + else \ + wget http://ci.monarchinitiative.org/view/dev/job/create-owlsim-files-on-dev/lastSuccessfulBuild/artifact/server/all.owl -O /data/all.owl; \ + fi CMD java -jar /data/owlsim-services-3.0-SNAPSHOT.jar server /data/configuration.yaml diff --git a/README.md b/README.md index f80f5bd..7fd2ddc 100644 --- a/README.md +++ b/README.md @@ -64,10 +64,18 @@ http://localhost:8080/api/match/jaccard?id=X:heart-morphology&id=X:brain-morphol ## Build with Docker -Run those commands from the root directory: +Run those commands from the root directory (with Docker >= 1.9): ``` mvn package -docker build -t owlsim . -docker run -p 8080:8080 owlsim +docker build -t owlsim-all . # by default contains all the species +docker run -p 8080:8080 owlsim-all +``` + +To restrict to specific species: + +``` +docker build --build-arg species=all -t owlsim-all . # default if no args is provided or not matching any species +docker build --build-arg species=human -t owlsim-human + ```