diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..763cc1e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,41 @@ +# +# Oracle Java 8 Dockerfile +# +# https://github.com/dockerfile/java +# https://github.com/dockerfile/java/tree/master/oracle-java8 +# + +# Pull base image. +FROM ubuntu:16.04 + +ARG species=all + +RUN apt-get -y update && apt-get install -y software-properties-common python-software-properties + +# Install Java. +RUN \ + echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \ + add-apt-repository -y ppa:webupd8team/java && \ + apt-get update && \ + apt-get install -y oracle-java8-installer && \ + rm -rf /var/lib/apt/lists/* && \ + rm -rf /var/cache/oracle-jdk8-installer + +# Define commonly used JAVA_HOME variable +ENV JAVA_HOME /usr/lib/jvm/java-8-oracle + +# Define working directory. +WORKDIR /data +ADD owlsim-services/target/owlsim-services-3.0-SNAPSHOT.jar /data/ +ADD configuration-samples/configuration-all.yaml /data/configuration.yaml + +RUN if [ $species = "human" ]; \ + then \ + wget https://data.monarchinitiative.org/owl/all-hp.owl -O /data/all.owl; \ + else \ + wget http://ci.monarchinitiative.org/view/dev/job/create-owlsim-files-on-dev/lastSuccessfulBuild/artifact/server/all.owl -O /data/all.owl; \ + fi + +CMD java -jar /data/owlsim-services-3.0-SNAPSHOT.jar server /data/configuration.yaml + +EXPOSE 8080 diff --git a/README.md b/README.md index 41485c8..7fd2ddc 100644 --- a/README.md +++ b/README.md @@ -56,8 +56,26 @@ Paths: or for some versions of dropwizard: * http://localhost:8080/api/docs/ - * http://localhost:8080/api/match/matchers + * http://localhost:8080/api/match/matchers Example query using default config: http://localhost:8080/api/match/jaccard?id=X:heart-morphology&id=X:brain-morphology + +## Build with Docker + +Run those commands from the root directory (with Docker >= 1.9): + +``` +mvn package +docker build -t owlsim-all . # by default contains all the species +docker run -p 8080:8080 owlsim-all +``` + +To restrict to specific species: + +``` +docker build --build-arg species=all -t owlsim-all . # default if no args is provided or not matching any species +docker build --build-arg species=human -t owlsim-human + +``` diff --git a/configuration-samples/configuration-all.yaml b/configuration-samples/configuration-all.yaml new file mode 100644 index 0000000..8ea11ad --- /dev/null +++ b/configuration-samples/configuration-all.yaml @@ -0,0 +1,197 @@ +ontologyUris: + - /data/all.owl +ontologyDataUris: [] +dataTsvs: [] +curies: + # Skolemize Blank Nodes + # overwrite iri fragment with '' for unresovable bnodes + '_': 'https://monarchinitiative.org/.well-known/genid/' + + # Monarch-specific + '': 'https://monarchinitiative.org/' + 'MONARCH': 'https://monarchinitiative.org/MONARCH_' + + 'MonarchData': 'http://data.monarchinitiative.org/ttl/' + 'MonarchArchive': 'http://archive.monarchinitiative.org/ttl/' + + # other semantic-web items + 'Annotation': 'http://www.w3.org/ns/oa#Annotation' # FIXME - i don't think we're using this + 'dc': 'http://purl.org/dc/elements/1.1/' + 'foaf': 'http://xmlns.com/foaf/0.1/' + + # ontologies + # [y] indicates those that the monarch team contributes to + 'AQTLTrait': 'http://identifiers.org/animalqtltrait/' # FIXME - should get integrated into Upheno + 'BFO': 'http://purl.obolibrary.org/obo/BFO_' # BFO: Basic Formal Ontology + 'CHEBI' : 'http://purl.obolibrary.org/obo/CHEBI_' # ChEBI: Chemicals of Biological Interest + 'CHR' : 'http://purl.obolibrary.org/obo/CHR_' # CHR: Chromosome Ontology + 'CL' : 'http://purl.obolibrary.org/obo/CL_' # CL: Cell Ontology (cell types) [y] + 'CLO' : 'http://purl.obolibrary.org/obo/CLO_' # CLO: Cell Line Ontology [y] + 'CMO' : 'http://purl.obolibrary.org/obo/CMO_' # CMO: Clinical Measurements Ontology + 'DATA' : 'http://edamontology.org/data_' # EDAM: Data and Methods Ontology (data artifacts) + 'DC' : 'http://purl.obolibrary.org/obo/DC_' # TODO + 'DECIPHER' : 'http://purl.obolibrary.org/obo/DECIPHER_' # DECIPHER: Deciphering Developmental Disease + 'DOID': 'http://purl.obolibrary.org/obo/DOID_' # DOID: Human Disease Ontology [y] + 'ECO': 'http://purl.obolibrary.org/obo/ECO_' # ECO: Evidence Code Ontology [y] + 'EFO' : 'http://www.ebi.ac.uk/efo/EFO_' # EFO: Experimental Factor Ontology (all kinds of stuff) [y] + 'ENVO' : 'http://purl.obolibrary.org/obo/ENVO_' # ENVO: Environment Ontology + 'EOM' : 'http://purl.obolibrary.org/obo/EOM_' # elements of morphology phentoypes + 'ERO' : 'http://purl.obolibrary.org/obo/ERO_' # ERO: eagle-i resource ontology [y] + 'faldo' : 'http://biohackathon.org/resource/faldo#' # FALDO: Feature Annotation Location Description Ontology (genomic feature properties) [y] + 'FBcv' : 'http://purl.obolibrary.org/obo/FBcv_' # FBcv: flybase CV (includes phenotypes) + 'FBbt': 'http://purl.obolibrary.org/obo/FBbt_' # FBbt: flybase anatomy + 'FBdv': 'http://purl.obolibrary.org/obo/FBdv_' # FBdv: flybase developmental stages + 'GENO': 'http://purl.obolibrary.org/obo/GENO_' # GENO: Genotype Partonomy Ontology [y] + 'GO' : 'http://purl.obolibrary.org/obo/GO_' # GO: Gene Ontology [y] + 'HP': 'http://purl.obolibrary.org/obo/HP_' # HP: Human Phenotype Ontology [y] + 'IAO': 'http://purl.obolibrary.org/obo/IAO_' # IAO: Information Artifact Ontology [y] + 'KEGG-ds' : 'http://purl.obolibrary.org/KEGG-ds_' # KEGG-ds: KEGG Disease Ontology + 'LPT': 'http://purl.obolibrary.org/obo/LPT_' # LPT: Livestock Phenotypic Trait Ontology + 'MA': 'http://purl.obolibrary.org/obo/MA_' # MA: Mouse Anatomy Ontology [y] + 'MedGen' : 'http://www.ncbi.nlm.nih.gov/medgen/' # a vocabulary - should this be in purl? + 'MESH': 'http://purl.obolibrary.org/obo/MESH_' # MeSH: Medical Subject Headings (medical diseases, phenotypes, and drugs) + 'MP': 'http://purl.obolibrary.org/obo/MP_' # MP: Mammalian Phenotype Ontology [y] + 'MPATH': 'http://purl.obolibrary.org/obo/MPATH_' # MPATH: Mammalian Pathology Ontology + 'NBO': 'http://purl.obolibrary.org/obo/NBO_' # NBO: NeuroBehavior Ontology [y] + 'OBA': 'http://purl.obolibrary.org/obo/OBA_' # OBA: Ontology of Biological Attributes (traits) + 'OBAN': 'http://purl.org/oban/' # OBAN: Open Biomedical Annotation Model [y] + 'OBI': 'http://purl.obolibrary.org/obo/OBI_' # OBI: Ontology of Biomedical Investigations [y] + 'OBO': 'http://purl.obolibrary.org/obo/' # all ontologies in the OBO namespace (this is not itself an ontology) + 'OIO': 'http://www.geneontology.org/formats/oboInOwl#' # oboInOwl: obo-specific annotation properties, like synonym types + 'OMIA' : 'http://purl.obolibrary.org/obo/OMIA_' # OMIA: Online Mendelian Inheritance in Animals (animal diseases) + 'OMIM' : 'http://purl.obolibrary.org/obo/OMIM_' # OMIM: Online Mendelian Inheritance in Man (human disease and variants) + 'Orphanet' : 'http://www.orpha.net/ORDO/Orphanet_' # Orphanet: rare diseases and orphan drugs + 'PATO': 'http://purl.obolibrary.org/obo/PATO_' # PATO: Phenotypic Quality Ontology [y] + 'PCO': 'http://purl.obolibrary.org/obo/PCO_' # PCO: Population and Community Ontology [y] + 'PR': 'http://purl.obolibrary.org/obo/PR_' # PRO: protein ontology + 'PW' : 'http://purl.obolibrary.org/obo/PW_' # PW: pathway ontology + 'RO': 'http://purl.obolibrary.org/obo/RO_' # RO: Relationship Ontology [y] + 'SIO' : 'http://semanticscience.org/resource/SIO_' # SIO: SemanticScience Integrated Ontology (information artifacts) + 'SNOMED' : 'http://purl.obolibrary.org/obo/SNOMED_' # SNOMED:diseases and phenotypes + 'SO' : 'http://purl.obolibrary.org/obo/SO_' # SO: Sequence Ontology [y] + 'STATO': 'http://purl.obolibrary.org/obo/STATO_' # Statistics Ontology + 'UBERON' : 'http://purl.obolibrary.org/obo/UBERON_' # UBERON: integrated anatomy ontology (metazoans, mostly) [y] + 'UPHENO' : 'http://purl.obolibrary.org/obo/UPHENO_' # UPHENO: integrated phenotype ontology, and normal traits [y] + 'UMLS' : 'http://purl.obolibrary.org/obo/UMLS_' # UMLS: unified medical language system + 'UO' : 'http://purl.obolibrary.org/obo/UO_' # UO: units of measurements + 'VT' : 'http://purl.obolibrary.org/obo/VT_' # VT: Vertebrate Trait Ontology + 'WBPhenotype': 'http://purl.obolibrary.org/obo/WBPhenotype_' # WBPhenotype: WormBase phenotypes (nematode) [y] + 'XCO' : 'http://purl.obolibrary.org/obo/XCO_' # XCO: Experimental Conditions Ontology + 'ZFA': 'http://purl.obolibrary.org/obo/ZFA_' # ZFA: Zebrafish Anatomy Ontology [y] + 'ZFS': 'http://purl.obolibrary.org/obo/ZFS_' # ZFS: Zebrafish Staging [y] + 'ZP': 'http://purl.obolibrary.org/obo/ZP_' # ZP: Zebrafish Phenotype Ontology [y] + 'WBbt': 'http://purl.obolibrary.org/obo/WBbt_' #WBbt: C. elegans gross anatomy + 'EMAPA': 'http://purl.obolibrary.org/obo/EMAPA_' # EMAPA: Mouse gross anatomy and development, timed + 'XAO': 'http://purl.obolibrary.org/obo/XAO_' # XAO: Xenopus anatomy and development + + # publication/reference sources + 'DOI' : 'http://dx.doi.org/' + 'GeneReviews' : 'http://www.ncbi.nlm.nih.gov/books/' # diseases too + 'ISBN': 'https://monarchinitiative.org/ISBN_' + 'ISBN-10': 'https://monarchinitiative.org/ISBN10_' + 'ISBN-13': 'https://monarchinitiative.org/ISBN13_' + 'ISBN-15': 'https://monarchinitiative.org/ISBN15_' + 'J' : 'http://www.informatics.jax.org/reference/J:' # MGI-internal identifiers for pubs + 'MPD': 'http://phenome.jax.org/' + 'MPD-assay': 'http://phenome.jax.org/db/qp?rtn=views/catlines&keymeas=' + 'PMID': 'http://www.ncbi.nlm.nih.gov/pubmed/' + 'PMCID' : 'http://www.ncbi.nlm.nih.gov/pmc/' + 'AQTLPub' : 'http://www.animalgenome.org/cgi-bin/QTLdb/BT/qabstract?PUBMED_ID=' + 'GO_REF' : 'http://www.geneontology.org/cgi-bin/references.cgi#GO_REF:' + 'HPO' : 'http://human-phenotype-ontology.org/' # to be used for persons, though they don't resolve with this + + # strains, lines, or organismal reagents + 'APB': 'http://pb.apf.edu.au/phenbank/strain.html?id=' + 'CMMR': 'http://www.cmmr.ca/order.php?t=m&id=' + 'Coriell' : 'https://catalog.coriell.org/0/Sections/Search/Sample_Detail.aspx?Ref=' + 'CoriellCollection' : 'https://catalog.coriell.org/1/' + 'CoriellFamily' : 'https://catalog.coriell.org/0/Sections/BrowseCatalog/FamilyTypeSubDetail.aspx?fam=' + 'CoriellIndividual' : 'https://catalog.coriell.org/Search?q=' + 'dbSNPIndividual' : 'http://www.ncbi.nlm.nih.gov/SNP/snp_ind.cgi?ind_id=' # FIXME + 'EMMA' : 'https://www.infrafrontier.eu/search?keyword=EM:' + 'JAX' : 'http://jaxmice.jax.org/strain/' + 'MMRRC' : 'https://www.mmrrc.org/catalog/sds.php?mmrrc_id=' + 'MPD-strain': 'http://phenome.jax.org/db/q?rtn=strains/details&strainid=' + 'MUGEN': 'http://bioit.fleming.gr/mugen/Controller?workflow=ViewModel&expand_all=true&name_begins=model.block&eid=' + 'NCIMR': 'https://mouse.ncifcrf.gov/available_details.asp?ID=' + 'RBRC': 'http://www2.brc.riken.jp/lab/animal/detail.php?brc_no=RBRC' + + # organisms and genome builds (also NCBITaxon) + 'NCBIAssembly': 'http://www.ncbi.nlm.nih.gov/assembly/' + 'NCBIGenome' : 'http://www.ncbi.nlm.nih.gov/genome/' + 'NCBITaxon' : 'http://purl.obolibrary.org/obo/NCBITaxon_' + 'OMIA-breed': 'https://monarchinitiative.org/model/OMIA-breed:' + 'UCSC' : 'ftp://hgdownload.cse.ucsc.edu/goldenPath/' + + # homology + 'HOMOLOGENE' : 'http://www.ncbi.nlm.nih.gov/homologene/' + 'KEGG-ko' : 'http://www.kegg.jp/dbget-bin/www_bget?ko:' + 'PANTHER' : 'http://www.pantherdb.org/panther/family.do?clsAccession=' # protein/orthologous families + + # variants + 'AQTL' : 'http://identifiers.org/animalqtl/' # FIXME temporary # traits + 'CGD' : 'http://ohsu.edu/cgd/' # diseases, variant instances + 'ClinVar' : 'http://www.ncbi.nlm.nih.gov/clinvar/' # variant+condition + 'ClinVarVariant' : 'http://www.ncbi.nlm.nih.gov/clinvar/variation/' + 'ClinVarSubmitters' : 'http://www.ncbi.nlm.nih.gov/clinvar/submitters/' + 'COSMIC' : 'http://cancer.sanger.ac.uk/cosmic/mutation/overview?id=' + 'HGMD' : 'http://identifiers.org/hgmd/' + 'dbSNP' : 'http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=' + 'dbVar' : 'http://www.ncbi.nlm.nih.gov/dbvar/' + + # pathways + 'KEGG-path' : 'http://www.kegg.jp/dbget-bin/www_bget?path:' + 'REACT' : 'http://www.reactome.org/PathwayBrowser/#/' + + # genes (and RNAs and transcripts) + 'BIOGRID' : 'http://thebiogrid.org/' # also interactions + 'CCDS' : 'http://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA=' # transcripty things + 'dictyBase' : 'http://dictybase.org/gene/' + 'EcoGene' : 'http://ecogene.org/gene/' + 'ENSEMBL' : 'http://identifiers.org/ensembl/' + 'FlyBase' : 'http://flybase.org/reports/' # also variants, pubs, genotypes, strains + 'GenBank' : 'http://www.ncbi.nlm.nih.gov/nuccore/' + 'HGNC' : 'http://identifiers.org/hgnc/HGNC:' + 'IMPC' : 'http://www.mousephenotype.org/data/genes/' # FIXME + 'KEGG-hsa' : 'http://www.kegg.jp/dbget-bin/www_bget?hsa:' + 'MGI': 'http://www.informatics.jax.org/accession/MGI:' # also variants, pubs, genotypes + 'miRBase' : 'http://identifiers.org/mirbase/' # microRNA genes + 'NCBIGene' : 'http://www.ncbi.nlm.nih.gov/gene/' + 'PomBase' : 'http://identifiers.org/PomBase:' + 'RefSeq' : 'http://www.ncbi.nlm.nih.gov/refseq/?term=' + 'RGD' : 'http://rgd.mcw.edu/rgdweb/report/gene/main.html?id=' + 'SGD' : 'http://identifiers.org/SGD:' + 'TAIR' : 'http://identifiers.org/TAIR:' + 'WormBase' : 'http://identifiers.org/wormbase/' # also variants, pubs, genotypes + 'Xenbase' : 'http://identifiers.org/xenbase/' + 'ZFIN' : 'http://zfin.org/' # also variants, pubs, genotypes + + # proteins + 'EC' : 'http://identifiers.org/ec-code/' + 'HPRD' : 'http://www.hprd.org/protein/' + 'NCBIProtein' : 'http://www.ncbi.nlm.nih.gov/protein/' + 'PDB' : 'http://identifiers.org/PDB:' + 'SwissProt' : 'http://identifiers.org/SwissProt:' + 'TrEMBL' : 'http://www.uniprot.org/uniprot/' + 'UniProtKB' : 'http://identifiers.org/uniprot/' + + # SEPIO: Scientific Evidence and Provenance Information Ontology + 'SEPIO': 'http://purl.obolibrary.org/obo/SEPIO_' + 'VIVO': 'http://vivoweb.org/ontology/core#' + + #Procedures/protocols + 'IMPRESS-procedure' : 'https://www.mousephenotype.org/impress/procedures/' + 'IMPRESS-protocol' : 'https://www.mousephenotype.org/impress/protocol/' + 'IMPRESS-parameter' : 'https://www.mousephenotype.org/impress/parameterontologies/' + + #Drugs, chemicals, compounds + 'CID' : 'http://pubchem.ncbi.nlm.nih.gov/compound/' + 'DrugBank' : 'http://www.drugbank.ca/drugs/' + 'SIO': 'http://semanticscience.org/resource/SIO_' + 'OAE': 'http://purl.obolibrary.org/obo/OAE_' + 'RXCUI': 'http://purl.bioontology.org/ontology/RXNORM/' + 'MEDDRA': 'http://purl.bioontology.org/ontology/MEDDRA/' + 'FDADrug': 'http://www.fda.gov/Drugs/InformationOnDrugs/' + 'BT': 'http://c.biothings.io/#' + 'UNII': 'http://fdasis.nlm.nih.gov/srs/unii/' + 'GINAS' : 'http://tripod.nih.gov/ginas/app/substance#' diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java index 52b58cb..a710112 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/ProfileMatcher.java @@ -111,6 +111,4 @@ public ProfileQuery createProfileQueryFromClasses(Set classIds, */ BMKnowledgeBase getKnowledgeBase(); - - } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java index eb042fa..0b8f0ca 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/AbstractSemanticSimilarityProfileMatcher.java @@ -1,5 +1,7 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; +import javax.inject.Inject; + import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator; import org.monarchinitiative.owlsim.compute.mica.impl.MostInformativeCommonAncestorCalculatorImpl; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java index ee22a60..580dd3b 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BayesianNetworkProfileMatcher.java @@ -5,6 +5,8 @@ import java.util.Map; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.cpt.ConditionalProbabilityIndex; import org.monarchinitiative.owlsim.compute.cpt.IncoherentStateException; @@ -101,6 +103,7 @@ private Calculator[] calculatorCache; private Double[][] targetClassProbabilityCache; + @Inject private BayesianNetworkProfileMatcher(BMKnowledgeBase kb) { super(kb); int N = kb.getIndividualIdsInSignature().size(); diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java index 04b85c3..0a693e4 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridNegatedProfileMatcher.java @@ -1,5 +1,7 @@ package org.monarchinitiative.owlsim.compute.matcher.impl; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.NegationAwareProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -22,6 +24,7 @@ /** * @param kb */ + @Inject public GridNegatedProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java index c2f4448..499f58e 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/GridProfileMatcher.java @@ -3,6 +3,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator.ClassInformationContentPair; @@ -32,6 +34,7 @@ /** * @param kb */ + @Inject public GridProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java index cdca4a7..a589c89 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/JaccardSimilarityProfileMatcher.java @@ -2,6 +2,8 @@ import java.util.List; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -26,6 +28,7 @@ /** * @param kb */ + @Inject public JaccardSimilarityProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java index c14bec9..a1f59ce 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/MaximumInformationContentSimilarityProfileMatcher.java @@ -2,6 +2,8 @@ import java.util.List; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator.ClassInformationContentPair; @@ -28,6 +30,7 @@ /** * @param kb */ + @Inject private MaximumInformationContentSimilarityProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java index 1c7bcd6..7f13bc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesFixedWeightThreeStateProfileMatcher.java @@ -4,6 +4,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.NegationAwareProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -24,379 +26,392 @@ * @author cjm * */ -public class NaiveBayesFixedWeightThreeStateProfileMatcher extends AbstractProfileMatcher - implements NegationAwareProfileMatcher { - - private Logger LOG = Logger.getLogger(NaiveBayesFixedWeightThreeStateProfileMatcher.class); - - private NaiveBayesFixedWeightThreeStateProfileMatcher(BMKnowledgeBase kb) { - super(kb); - } - - /** - * @param kb - * @return new instance - */ - public static NaiveBayesFixedWeightThreeStateProfileMatcher create(BMKnowledgeBase kb) { - return new NaiveBayesFixedWeightThreeStateProfileMatcher(kb); - } - - @Override - public String getShortName() { - return "naive-bayes-fixed-weight-three-state"; - } - - private EWAHCompressedBitmap getQueryBlanketBM(ProfileQuery q) { - EWAHCompressedBitmap onQueryNodesBM = getProfileBM(q); - Set nodesWithOnParents = new HashSet(); - - // there may be more efficient ways of doing this, but this is - // called once at the start of the search... - for (String cid : knowledgeBase.getClassIdsInSignature()) { - int cix = knowledgeBase.getClassIndex(cid); - EWAHCompressedBitmap supsBM = knowledgeBase.getDirectSuperClassesBM(cid); - int nParents = supsBM.cardinality(); - if (supsBM.andCardinality(onQueryNodesBM) == nParents) { - nodesWithOnParents.add(cix); - } - } - - return onQueryNodesBM.or(EWAHUtils.convertIndexSetToBitmap(nodesWithOnParents)); - } - - // any negated query node that has at least one negated parent; - // these are counted as no-transition - private EWAHCompressedBitmap getQueryNegatedNoTransition(EWAHCompressedBitmap negatedQueryProfileBM) { - Set nodes = new HashSet(); - - // there may be more efficient ways of doing this, but this is - // called once at the start of the search... - for (int cix : negatedQueryProfileBM.getPositions()) { - EWAHCompressedBitmap supsBM = knowledgeBase.getDirectSuperClassesBM(cix); - int nParents = supsBM.cardinality(); - if (supsBM.andCardinality(negatedQueryProfileBM) > 0) { - nodes.add(cix); - } - } - - return EWAHUtils.convertIndexSetToBitmap(nodes); - } - - /** - * @param q - * @return match profile containing probabilities of each individual - */ - public MatchSet findMatchProfileImpl(ProfileQuery q) { - - // double fpr = getFalsePositiveRate(); - // double fnr = getFalseNegativeRate(); - double sumOfProbs = 0.0; - - EWAHCompressedBitmap nodesQtBM = getProfileBM(q); - EWAHCompressedBitmap nodesQfBM = getNegatedProfileBM(q); - - // first, given a query (on and off states), - // group all nodes according to transitions from parent node - - // nomenclature: QUERY {unk,true,false} PARENTS {unk,true,false}+ - // multiple values taken as union - - // uncommitted nodes with a true parent (trans) - Set nodesQuPt = new HashSet(); - - // uncommitted nodes with an uncommitted parent (no trans) - Set nodesQuPu = new HashSet(); - - // off nodes with on or uncommitted parent (trans) - Set nodesQfPtu = new HashSet(); - - // off nodes with uncommitted parent - Set nodesQfPu = new HashSet(); - - // calculate transitions for all query nodes - /* - * if Q=t, then ALL parents MUST be t (NO transitions) if Q=u, then - * EITHER ALL parents ARE t : TRANSITION T->U AT LEAST ONE parent is=u : - * NO TRANSITION U->U NO PARENT is f if Q=f, then EITHER ONE parent IS f - * : NO TRANSITION F->F ALL parents ARE t : TRANSITION T->F ELSE : - * TRANSITION U->F - */ - for (String cid : knowledgeBase.getClassIdsInSignature()) { - int cix = knowledgeBase.getClassIndex(cid); - if (nodesQtBM.getPositions().contains(cix)) { - // state T, transition must be T->T - continue; - } - EWAHCompressedBitmap parentsBM = knowledgeBase.getDirectSuperClassesBM(cix); - if (nodesQfBM.getPositions().contains(cix)) { - // state = F - if (parentsBM.andCardinality(nodesQfBM) == 0) { - // transition T,U -> F - nodesQfPtu.add(cix); - } else { - // F->F - } - } else { - // state = U - if (parentsBM.andCardinality(nodesQtBM) < parentsBM.cardinality()) { - // transition T -> U ( F->U is impossible ) - nodesQuPt.add(cix); - } else { - // U->U - nodesQuPu.add(cix); - } - } - - } - EWAHCompressedBitmap nodesQuPtBM = EWAHUtils.convertIndexSetToBitmap(nodesQuPt); - EWAHCompressedBitmap nodesQuPuBM = EWAHUtils.convertIndexSetToBitmap(nodesQuPu); - EWAHCompressedBitmap nodesQfPtBM = EWAHUtils.convertIndexSetToBitmap(nodesQfPtu); - EWAHCompressedBitmap nodesQfPuBM = EWAHUtils.convertIndexSetToBitmap(nodesQfPu); - - // include subclasses - - EWAHCompressedBitmap queryNegatedNoTransitionBM = getQueryNegatedNoTransition(nodesQfBM); - EWAHCompressedBitmap queryNegatedWithTransitionBM = nodesQfBM.andNot(queryNegatedNoTransitionBM); - - MatchSet mp = MatchSetImpl.create(q); - - List indIds = getFilteredIndividualIds(q.getFilter()); - - double pvector[] = new double[indIds.size()]; - String indArr[] = new String[indIds.size()]; - int n = 0; - - // pr(Q=f | H=t) - double prFalseNegative = 0.000001; - - // pr(Q=t | H=f) - double prFalsePositive = 0.00001; - - // pr(Q=u | H=t) -- like a weaker false negative - double prFalseMiss = 0.01; - - // pr(Q=u | H=f) -- failure to make a call when hidden is false - double prTrueMiss = 0.85; - - // double prWeakFalsePositive = prFalsePositive * 100; - // double prWeakFalsePositive = Math.exp(Math.log(prFalsePositive) /4 ); - double prWeakFalsePositive = 0.1; - - double pprQtHtPt = 1 - (prFalseNegative + prFalseMiss); - double pprQfHfPt = 1 - (prFalsePositive + prTrueMiss); - - // double prWeakTrueMiss = prTrueMiss * 2; // failure to make a call - // when hidden is non-obvious false - double prWeakTrueMiss = 0.85; - for (String itemId : indIds) { - EWAHCompressedBitmap nodesHtBM = knowledgeBase.getTypesBM(itemId); - - // EWAHCompressedBitmap nodesHfBM = - // knowledgeBase.getNegatedTypesBM(itemId); - // TODO: consider propagating down - EWAHCompressedBitmap nodesHfBM = knowledgeBase.getDirectNegatedTypesBM(itemId); - - // any node which has an off query parent is discounted - // EWAHCompressedBitmap maskedTargetProfileBM = - // nodesHtBM.and(queryBlanketProfileBM); - - LOG.info("TARGET PROFILE for " + itemId + " " + nodesHtBM); - - // cumulative log-probability - double logp = 0.0; - // 3^3=27 combos for q (query), h (hidden) and p (parents) - // with states t, f and u - - // --- - // *** Hidden/Target=TRUE - // --- - - // ** T,T - - // 1. P(qi=TRUE | hi=TRUE, p(qi)=TRUE) = 1-(FN + FALSEMISS) - // note that if Q=t and H=t then it's impossible for P=u OR P=f; - // hence we use QtBM - int nQtHtPt = nodesQtBM.andCardinality(nodesHtBM); - if (nQtHtPt > 0) { - double cprQtHtPt = Math.pow(pprQtHtPt, nQtHtPt); - LOG.info(" nQtHtPt=" + nQtHtPt + " pr= " + cprQtHtPt); - logp += Math.log(cprQtHtPt); - } - - // P(qi=FALSE | hi=TRUE, p(qi)=TRUE) = FN - // P(qi=FALSE | hi=TRUE, p(qi)=UNK) = FN - // note we can combine P(qi=FALSE | hi=TRUE) for any non-false - // parent setting - // hence we use QfBM - int nQfHtPt = nodesQfBM.andCardinality(nodesHtBM); - if (nQfHtPt > 0) { - double cprQfHtPt = Math.pow(prFalseNegative, nQfHtPt); - LOG.info(" nQfHtPt=" + nQfHtPt + " pr= " + cprQfHtPt); - logp += Math.log(cprQfHtPt); - } - - // P(qi=UNK | hi=TRUE, p(qi)=TRUE) = FALSEMISS - int nQuHtPt = nodesQuPtBM.andCardinality(nodesHtBM); - if (nQuHtPt > 0) { - double cprQuHtPt = Math.pow(prFalseMiss, nQuHtPt); - LOG.info(" nQuHtPt=" + nQuHtPt + " pr= " + cprQuHtPt); - logp += Math.log(cprQuHtPt); - } - - // ** T,F - // none of these contribute to the score - - // P(qi=TRUE | hi=TRUE, p(qi)=FALSE) = 0 - // P(qi=FALSE | hi=TRUE, p(qi)=FALSE) = 1 - // P(qi=UNK | hi=TRUE, p(qi)=FALSE) = 0 - - // T,U - - // P(qi=TRUE | hi=TRUE, p(qi)=UNK) = 0 - // * ALREADY COVERED IN ABOVE: P(qi=FALSE | hi=TRUE, p(qi)=UNK) = FN - - // P(qi=UNK | hi=TRUE, p(qi)=UNK) = 1-FN - int nQuHtPu = nodesQuPuBM.andCardinality(nodesHtBM); - if (nQuHtPu > 0) { - double cprQuHtPu = Math.pow(1 - prFalseNegative, nQuHtPu); - LOG.info(" nQuHtPu=" + nQuHtPu + " pr= " + cprQuHtPu); - logp += Math.log(cprQuHtPu); - } - - // --- - // *** Hidden/Target is FALSE - // --- - - // F,T - - // P(qi=TRUE | hi=FALSE, p(qi)=TRUE) = FP // e.g. 0.001 - // TODO: should check c(qi), and negation flows in the other - // direction - // note that if Q=t, then P=t, hence we use Qt - int nQtHfPt = nodesQtBM.andCardinality(nodesHfBM); - if (nQtHfPt > 0) { - double cprQtHfPt = Math.pow(prFalsePositive, nQtHfPt); - LOG.info(" nQtHfPt=" + nQtHfPt + " pr= " + cprQtHfPt); - logp += Math.log(cprQtHfPt); - } - // P(qi=FALSE | hi=FALSE, p(qi)=TRUE) = 1-(FP+TRUEMISS) // keep this - // high - int nQfHfPt = nodesQfPtBM.andCardinality(nodesHfBM); - if (nQfHfPt > 0) { - double cprQfHfPt = Math.pow(pprQfHfPt, nQfHfPt); - LOG.info(" nQfHfPt=" + nQfHfPt + " pr= " + cprQfHfPt); - logp += Math.log(cprQfHfPt); - } - - // P(qi=UNK | hi=FALSE, p(qi)=TRUE) = TRUEMISS // e.g. 0.05 - int nQuHfPt = nodesQuPtBM.andCardinality(nodesHfBM); - if (nQuHfPt > 0) { - double cprQuHfPt = Math.pow(prTrueMiss, nQuHfPt); - LOG.info(" nQuHfPt=" + nQuHfPt + " pr= " + cprQuHfPt); - logp += Math.log(cprQuHfPt); - } - - // F,F - - // P(qi=TRUE | hi=FALSE, p(qi)=FALSE) = 0 - // P(qi=FALSE | hi=FALSE, p(qi)=FALSE) = 1 - // P(qi=UNK | hi=FALSE, p(qi)=FALSE) = 0 - - // F,U - - // P(qi=TRUE | hi=FALSE, p(qi)=UNK) = 0 - // P(qi=FALSE | hi=FALSE, p(qi)=UNK) = 1-TRUEMISS - int nQfHfPu = nodesQfPuBM.andCardinality(nodesHfBM); - if (nQfHfPu > 0) { - double cprQfHfPu = Math.pow(1 - prTrueMiss, nQfHfPu); - LOG.info(" nQfHfPu=" + nQfHfPu + " pr= " + cprQfHfPu); - logp += Math.log(cprQfHfPu); - } - - // P(qi=UNK | hi=FALSE, p(qi)=UNK) = TRUEMISS // e.g. - int nQuHfPu = nodesQuPuBM.andCardinality(nodesHfBM); - if (nQuHfPu > 0) { - double cprQuHfPu = Math.pow(prTrueMiss, nQuHfPu); - LOG.info(" nQuHfPu=" + nQuHfPu + " pr= " + cprQuHfPu); - logp += Math.log(cprQuHfPu); - } - - // --- - // Hidden/Target is UNKNOWN (aka FALSE') - // --- - // 'unknown' for a hidden state makes no sense; also it would - // introduce combinatorial explosions. - // here we interpret the 3rd state as being logically FALSE, but as - // being false in a non-obvious way, with lower penalties for - // not observing the falseness - - // ** U,T - - // P(qi=TRUE | hi=UNK, p(qi)=TRUE) = FP' // > FP (it's more likely - // to make a false call if it's non-obvious) - // note that if Q=t, then P=t, hence we use Qt - int nQtHuPt = nodesQtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); - if (nQtHuPt > 0) { - double cprQtHuPt = Math.pow(prWeakFalsePositive, nQtHuPt); - LOG.info(" nQtHuPt=" + nQtHuPt + " pr= " + cprQtHuPt); - logp += Math.log(cprQtHuPt); - } - // P(qi=FALSE | hi=UNK, p(qi)=TRUE) = 1-(FP' + TRUEMISS') - int nQfHuPt = nodesQfPtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); - if (nQfHuPt > 0) { - double cprQfHuPt = Math.pow(1 - (prWeakFalsePositive + prWeakTrueMiss), nQfHuPt); - LOG.info(" nQfHuPt=" + nQfHuPt + " pr= " + cprQfHuPt); - logp += Math.log(cprQfHuPt); - } - // P(qi=UNK | hi=UNK, p(qi)=TRUE) = TRUEMISS' // > TRUEMISS (it's - // more likely to miss a non-obvious absence than an obvious - // absence) - int nQuHuPt = nodesQuPtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); - if (nQuHuPt > 0) { - double cprQuHuPt = Math.pow(prWeakTrueMiss, nQuHuPt); - LOG.info(" nQuHuPt=" + nQuHuPt + " pr= " + cprQuHuPt); - logp += Math.log(cprQuHuPt); - } - - // ** U,F - - // P(qi=TRUE | hi=UNK, p(qi)=FALSE) = 0 - // P(qi=FALSE | hi=UNK, p(qi)=FALSE) = 1 - // P(qi=UNK | hi=UNK, p(qi)=FALSE) = 0 - - // ** U,U - - // P(qi=TRUE | hi=UNK, p(qi)=UNK) = 0 - // P(qi=FALSE | hi=UNK, p(qi)=UNK) = 1 - TRUEMISS' - int nQfHuPu = nodesQfPuBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); - if (nQfHuPu > 0) { - double cprQfHuPu = Math.pow(1 - prWeakTrueMiss, nQfHuPu); - LOG.info(" nQfHuPu=" + nQfHuPu + " pr= " + cprQfHuPu); - logp += Math.log(cprQfHuPu); - } - // P(qi=UNK | hi=UNK, p(qi)=UNK) = TRUEMISS' - int nQuHuPu = nodesQuPuBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); - if (nQuHuPu > 0) { - double cprQuHuPu = Math.pow(prWeakTrueMiss, nQuHuPu); - LOG.info(" nQuHuPu=" + nQuHuPu + " pr= " + cprQuHuPu); - logp += Math.log(cprQuHuPu); - } - - double p = Math.exp(logp); - pvector[n] = p; - indArr[n] = itemId; - sumOfProbs += p; - n++; - LOG.info("logp for " + itemId + " = " + logp + " sumOfLogProbs=" + sumOfProbs); - } - for (n = 0; n < pvector.length; n++) { - double p = pvector[n] / sumOfProbs; - String id = indArr[n]; - String label = knowledgeBase.getLabelMapper().getArbitraryLabel(id); - mp.add(createMatch(id, label, p)); - } - mp.sortMatches(); - return mp; - } +public class NaiveBayesFixedWeightThreeStateProfileMatcher extends AbstractProfileMatcher implements NegationAwareProfileMatcher { + + private Logger LOG = Logger.getLogger(NaiveBayesFixedWeightThreeStateProfileMatcher.class); + + + @Inject + private NaiveBayesFixedWeightThreeStateProfileMatcher(BMKnowledgeBase kb) { + super(kb); + } + + /** + * @param kb + * @return new instance + */ + public static NaiveBayesFixedWeightThreeStateProfileMatcher create(BMKnowledgeBase kb) { + return new NaiveBayesFixedWeightThreeStateProfileMatcher(kb); + } + + @Override + public String getShortName() { + return "naive-bayes-fixed-weight-three-state"; + } + + private EWAHCompressedBitmap getQueryBlanketBM(ProfileQuery q) { + EWAHCompressedBitmap onQueryNodesBM = getProfileBM(q); + Set nodesWithOnParents = new HashSet(); + + // there may be more efficient ways of doing this, but this is + // called once at the start of the search... + for (String cid : knowledgeBase.getClassIdsInSignature()) { + int cix = knowledgeBase.getClassIndex(cid); + EWAHCompressedBitmap supsBM = knowledgeBase.getDirectSuperClassesBM(cid); + int nParents = supsBM.cardinality(); + if (supsBM.andCardinality(onQueryNodesBM) == nParents) { + nodesWithOnParents.add(cix); + } + } + + return onQueryNodesBM.or(EWAHUtils.convertIndexSetToBitmap(nodesWithOnParents)); + } + + // any negated query node that has at least one negated parent; + // these are counted as no-transition + private EWAHCompressedBitmap getQueryNegatedNoTransition(EWAHCompressedBitmap negatedQueryProfileBM) { + Set nodes = new HashSet(); + + // there may be more efficient ways of doing this, but this is + // called once at the start of the search... + for (int cix : negatedQueryProfileBM.getPositions()) { + EWAHCompressedBitmap supsBM = knowledgeBase.getDirectSuperClassesBM(cix); + int nParents = supsBM.cardinality(); + if (supsBM.andCardinality(negatedQueryProfileBM) > 0) { + nodes.add(cix); + } + } + + return EWAHUtils.convertIndexSetToBitmap(nodes); + } + + + /** + * @param q + * @return match profile containing probabilities of each individual + */ + public MatchSet findMatchProfileImpl(ProfileQuery q) { + + + //double fpr = getFalsePositiveRate(); + //double fnr = getFalseNegativeRate(); + double sumOfProbs = 0.0; + + EWAHCompressedBitmap nodesQtBM = getProfileBM(q); + EWAHCompressedBitmap nodesQfBM = getNegatedProfileBM(q); + + // first, given a query (on and off states), + // group all nodes according to transitions from parent node + + // nomenclature: QUERY {unk,true,false} PARENTS {unk,true,false}+ + // multiple values taken as union + + // uncommitted nodes with a true parent (trans) + Set nodesQuPt = new HashSet(); + + // uncommitted nodes with an uncommitted parent (no trans) + Set nodesQuPu = new HashSet(); + + // off nodes with on or uncommitted parent (trans) + Set nodesQfPtu = new HashSet(); + + // off nodes with uncommitted parent + Set nodesQfPu = new HashSet(); + + // calculate transitions for all query nodes + /* + if Q=t, then ALL parents MUST be t (NO transitions) + if Q=u, then EITHER + ALL parents ARE t : TRANSITION T->U + AT LEAST ONE parent is=u : NO TRANSITION U->U + NO PARENT is f + if Q=f, then EITHER + ONE parent IS f : NO TRANSITION F->F + ALL parents ARE t : TRANSITION T->F + ELSE : TRANSITION U->F + */ + for (String cid : knowledgeBase.getClassIdsInSignature()) { + int cix = knowledgeBase.getClassIndex(cid); + if (nodesQtBM.getPositions().contains(cix)) { + // state T, transition must be T->T + continue; + } + EWAHCompressedBitmap parentsBM = knowledgeBase.getDirectSuperClassesBM(cix); + if (nodesQfBM.getPositions().contains(cix)) { + // state = F + if (parentsBM.andCardinality(nodesQfBM) == 0) { + // transition T,U -> F + nodesQfPtu.add(cix); + } + else { + // F->F + } + } + else { + // state = U + if (parentsBM.andCardinality(nodesQtBM) < parentsBM.cardinality()) { + // transition T -> U ( F->U is impossible ) + nodesQuPt.add(cix); + } + else { + // U->U + nodesQuPu.add(cix); + } + } + + } + EWAHCompressedBitmap nodesQuPtBM = EWAHUtils.convertIndexSetToBitmap(nodesQuPt); + EWAHCompressedBitmap nodesQuPuBM = EWAHUtils.convertIndexSetToBitmap(nodesQuPu); + EWAHCompressedBitmap nodesQfPtBM = EWAHUtils.convertIndexSetToBitmap(nodesQfPtu); + EWAHCompressedBitmap nodesQfPuBM = EWAHUtils.convertIndexSetToBitmap(nodesQfPu); + + + // include subclasses + + + EWAHCompressedBitmap queryNegatedNoTransitionBM = + getQueryNegatedNoTransition(nodesQfBM); + EWAHCompressedBitmap queryNegatedWithTransitionBM = + nodesQfBM.andNot(queryNegatedNoTransitionBM); + + MatchSet mp = MatchSetImpl.create(q); + + List indIds = getFilteredIndividualIds(q.getFilter()); + + double pvector[] = new double[indIds.size()]; + String indArr[] = new String[indIds.size()]; + int n=0; + + // pr(Q=f | H=t) + double prFalseNegative = 0.000001; + + // pr(Q=t | H=f) + double prFalsePositive = 0.00001; + + // pr(Q=u | H=t) -- like a weaker false negative + double prFalseMiss = 0.01; + + // pr(Q=u | H=f) -- failure to make a call when hidden is false + double prTrueMiss = 0.85; + + //double prWeakFalsePositive = prFalsePositive * 100; + //double prWeakFalsePositive = Math.exp(Math.log(prFalsePositive) /4 ); + double prWeakFalsePositive = 0.1; + + double pprQtHtPt = 1 - (prFalseNegative + prFalseMiss); + double pprQfHfPt = 1 - (prFalsePositive + prTrueMiss); + + //double prWeakTrueMiss = prTrueMiss * 2; // failure to make a call when hidden is non-obvious false + double prWeakTrueMiss = 0.85; + for (String itemId : indIds) { + EWAHCompressedBitmap nodesHtBM = knowledgeBase.getTypesBM(itemId); + + //EWAHCompressedBitmap nodesHfBM = knowledgeBase.getNegatedTypesBM(itemId); + // TODO: consider propagating down + EWAHCompressedBitmap nodesHfBM = knowledgeBase.getDirectNegatedTypesBM(itemId); + + // any node which has an off query parent is discounted + //EWAHCompressedBitmap maskedTargetProfileBM = nodesHtBM.and(queryBlanketProfileBM); + + //LOG.info("TARGET PROFILE for "+itemId+" "+nodesHtBM); + + // cumulative log-probability + double logp = 0.0; + // 3^3=27 combos for q (query), h (hidden) and p (parents) + // with states t, f and u + + // --- + // *** Hidden/Target=TRUE + // --- + + // ** T,T + + // 1. P(qi=TRUE | hi=TRUE, p(qi)=TRUE) = 1-(FN + FALSEMISS) + // note that if Q=t and H=t then it's impossible for P=u OR P=f; + // hence we use QtBM + int nQtHtPt = nodesQtBM.andCardinality(nodesHtBM); + if (nQtHtPt > 0) { + double cprQtHtPt = Math.pow(pprQtHtPt, nQtHtPt); + LOG.info(" nQtHtPt="+nQtHtPt+" pr= "+cprQtHtPt); + logp += Math.log(cprQtHtPt); + } + + // P(qi=FALSE | hi=TRUE, p(qi)=TRUE) = FN + // P(qi=FALSE | hi=TRUE, p(qi)=UNK) = FN + // note we can combine P(qi=FALSE | hi=TRUE) for any non-false parent setting + // hence we use QfBM + int nQfHtPt = nodesQfBM.andCardinality(nodesHtBM); + if (nQfHtPt > 0) { + double cprQfHtPt = Math.pow(prFalseNegative, nQfHtPt); + LOG.info(" nQfHtPt="+nQfHtPt+" pr= "+cprQfHtPt); + logp += Math.log(cprQfHtPt); + } + + // P(qi=UNK | hi=TRUE, p(qi)=TRUE) = FALSEMISS + int nQuHtPt = nodesQuPtBM.andCardinality(nodesHtBM); + if (nQuHtPt > 0) { + double cprQuHtPt = Math.pow(prFalseMiss, nQuHtPt); + LOG.info(" nQuHtPt="+nQuHtPt+" pr= "+cprQuHtPt); + logp += Math.log(cprQuHtPt); + } + + + // ** T,F + // none of these contribute to the score + + // P(qi=TRUE | hi=TRUE, p(qi)=FALSE) = 0 + // P(qi=FALSE | hi=TRUE, p(qi)=FALSE) = 1 + // P(qi=UNK | hi=TRUE, p(qi)=FALSE) = 0 + + // T,U + + // P(qi=TRUE | hi=TRUE, p(qi)=UNK) = 0 + // * ALREADY COVERED IN ABOVE: P(qi=FALSE | hi=TRUE, p(qi)=UNK) = FN + + // P(qi=UNK | hi=TRUE, p(qi)=UNK) = 1-FN + int nQuHtPu = nodesQuPuBM.andCardinality(nodesHtBM); + if (nQuHtPu > 0) { + double cprQuHtPu = Math.pow(1-prFalseNegative, nQuHtPu); + LOG.info(" nQuHtPu="+nQuHtPu+" pr= "+cprQuHtPu); + logp += Math.log(cprQuHtPu); + } + + + // --- + // *** Hidden/Target is FALSE + // --- + + // F,T + + // P(qi=TRUE | hi=FALSE, p(qi)=TRUE) = FP // e.g. 0.001 + // TODO: should check c(qi), and negation flows in the other direction + // note that if Q=t, then P=t, hence we use Qt + int nQtHfPt = nodesQtBM.andCardinality(nodesHfBM); + if (nQtHfPt > 0) { + double cprQtHfPt = Math.pow(prFalsePositive, nQtHfPt); + LOG.info(" nQtHfPt="+nQtHfPt+" pr= "+cprQtHfPt); + logp += Math.log(cprQtHfPt); + } + // P(qi=FALSE | hi=FALSE, p(qi)=TRUE) = 1-(FP+TRUEMISS) // keep this high + int nQfHfPt = nodesQfPtBM.andCardinality(nodesHfBM); + if (nQfHfPt > 0) { + double cprQfHfPt = Math.pow(pprQfHfPt, nQfHfPt); + LOG.info(" nQfHfPt="+nQfHfPt+" pr= "+cprQfHfPt); + logp += Math.log(cprQfHfPt); + } + + + // P(qi=UNK | hi=FALSE, p(qi)=TRUE) = TRUEMISS // e.g. 0.05 + int nQuHfPt = nodesQuPtBM.andCardinality(nodesHfBM); + if (nQuHfPt > 0) { + double cprQuHfPt = Math.pow(prTrueMiss, nQuHfPt); + LOG.info(" nQuHfPt="+nQuHfPt+" pr= "+cprQuHfPt); + logp += Math.log(cprQuHfPt); + } + + // F,F + + // P(qi=TRUE | hi=FALSE, p(qi)=FALSE) = 0 + // P(qi=FALSE | hi=FALSE, p(qi)=FALSE) = 1 + // P(qi=UNK | hi=FALSE, p(qi)=FALSE) = 0 + + // F,U + + // P(qi=TRUE | hi=FALSE, p(qi)=UNK) = 0 + // P(qi=FALSE | hi=FALSE, p(qi)=UNK) = 1-TRUEMISS + int nQfHfPu = nodesQfPuBM.andCardinality(nodesHfBM); + if (nQfHfPu > 0) { + double cprQfHfPu = Math.pow(1-prTrueMiss, nQfHfPu); + LOG.info(" nQfHfPu="+nQfHfPu+" pr= "+cprQfHfPu); + logp += Math.log(cprQfHfPu); + } + + + // P(qi=UNK | hi=FALSE, p(qi)=UNK) = TRUEMISS // e.g. + int nQuHfPu = nodesQuPuBM.andCardinality(nodesHfBM); + if (nQuHfPu > 0) { + double cprQuHfPu = Math.pow(prTrueMiss, nQuHfPu); + LOG.info(" nQuHfPu="+nQuHfPu+" pr= "+cprQuHfPu); + logp += Math.log(cprQuHfPu); + } + + + // --- + // Hidden/Target is UNKNOWN (aka FALSE') + // --- + // 'unknown' for a hidden state makes no sense; also it would introduce combinatorial explosions. + // here we interpret the 3rd state as being logically FALSE, but as being false in a non-obvious way, with lower penalties for + // not observing the falseness + + // ** U,T + + // P(qi=TRUE | hi=UNK, p(qi)=TRUE) = FP' // > FP (it's more likely to make a false call if it's non-obvious) + // note that if Q=t, then P=t, hence we use Qt + int nQtHuPt = nodesQtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); + if (nQtHuPt > 0) { + double cprQtHuPt = Math.pow(prWeakFalsePositive, nQtHuPt); + LOG.info(" nQtHuPt="+nQtHuPt+" pr= "+cprQtHuPt); + logp += Math.log(cprQtHuPt); + } + // P(qi=FALSE | hi=UNK, p(qi)=TRUE) = 1-(FP' + TRUEMISS') + int nQfHuPt = nodesQfPtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); + if (nQfHuPt > 0) { + double cprQfHuPt = Math.pow(1-(prWeakFalsePositive + prWeakTrueMiss), nQfHuPt); + LOG.info(" nQfHuPt="+nQfHuPt+" pr= "+cprQfHuPt); + logp += Math.log(cprQfHuPt); + } + // P(qi=UNK | hi=UNK, p(qi)=TRUE) = TRUEMISS' // > TRUEMISS (it's more likely to miss a non-obvious absence than an obvious absence) + int nQuHuPt = nodesQuPtBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); + if (nQuHuPt > 0) { + double cprQuHuPt = Math.pow(prWeakTrueMiss, nQuHuPt); + LOG.info(" nQuHuPt="+nQuHuPt+" pr= "+cprQuHuPt); + logp += Math.log(cprQuHuPt); + } + + // ** U,F + + // P(qi=TRUE | hi=UNK, p(qi)=FALSE) = 0 + // P(qi=FALSE | hi=UNK, p(qi)=FALSE) = 1 + // P(qi=UNK | hi=UNK, p(qi)=FALSE) = 0 + + // ** U,U + + // P(qi=TRUE | hi=UNK, p(qi)=UNK) = 0 + // P(qi=FALSE | hi=UNK, p(qi)=UNK) = 1 - TRUEMISS' + int nQfHuPu = nodesQfPuBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); + if (nQfHuPu > 0) { + double cprQfHuPu = Math.pow(1- prWeakTrueMiss, nQfHuPu); + LOG.info(" nQfHuPu="+nQfHuPu+" pr= "+cprQfHuPu); + logp += Math.log(cprQfHuPu); + } + // P(qi=UNK | hi=UNK, p(qi)=UNK) = TRUEMISS' + int nQuHuPu = nodesQuPuBM.andNot(nodesHtBM).andNotCardinality(nodesHfBM); + if (nQuHuPu > 0) { + double cprQuHuPu = Math.pow(prWeakTrueMiss, nQuHuPu); + LOG.info(" nQuHuPu="+nQuHuPu+" pr= "+cprQuHuPu); + logp += Math.log(cprQuHuPu); + } + + + double p = Math.exp(logp); + pvector[n] = p; + indArr[n] = itemId; + sumOfProbs += p; + n++; + //LOG.info("logp for "+itemId+" = "+logp+" sumOfLogProbs="+sumOfProbs); + } + for (n = 0; n> individualToInterpretationToTypesBM = new HashMap<>(); + + @Inject protected NaiveBayesFixedWeightTwoStateProfileMatcher(BMKnowledgeBase kb) { super(kb); } @@ -67,13 +99,35 @@ public boolean isUseBlanket() { public String getShortName() { return "naive-bayes-fixed-weight-two-state"; } + + /** - * Extends the query profile - for every node c, all the direct parents of c - * are in the query profile, then add c to the query profile. + * @return the kLeastFrequent + */ + public int getkLeastFrequent() { + return kLeastFrequent; + } + + /** + * The default for this should be 0. When 0, the behavior is as for frequency unaware + * (i.e. every instance-class association with frequency info will be treated as normal instance-class) + * + * When k>1, will make use of the k least frequent annotations in probabilistic calculation + * + * @param kLeastFrequent the kLeastFrequent to set + */ + public void setkLeastFrequent(int kLeastFrequent) { + // reset cache + individualToInterpretationToTypesBM = new HashMap<>(); + this.kLeastFrequent = kLeastFrequent; + } + + /** + * Extends the query profile - for every node c, all the direct parents of c are in + * the query profile, then add c to the query profile. * - * We use this to reduce the size of the network when testing for - * probabilities + * We use this to reduce the size of the network when testing for probabilities * * TODO: fully evaluate the consequences of using this method * @@ -104,21 +158,22 @@ private EWAHCompressedBitmap getQueryBlanketBM(ProfileQuery q) { */ public MatchSet findMatchProfileImpl(ProfileQuery q) { - // double fpr = getFalsePositiveRate(); - // double fnr = getFalseNegativeRate(); + //double fpr = getFalsePositiveRate(); + //double fnr = getFalseNegativeRate(); double sumOfProbs = 0.0; EWAHCompressedBitmap queryProfileBM = getProfileBM(q); EWAHCompressedBitmap queryBlanketProfileBM = getQueryBlanketBM(q); - LOG.info("|OnQueryNodes|=" + queryProfileBM.cardinality()); - LOG.info("|QueryNodesWithOnParents|=" + queryBlanketProfileBM.cardinality()); + LOG.info("|OnQueryNodes|="+queryProfileBM.cardinality()); + LOG.info("|QueryNodesWithOnParents|="+queryBlanketProfileBM.cardinality()); - // int numClassesConsidered = - // knowledgeBase.getClassIdsInSignature().size(); + + //int numClassesConsidered = knowledgeBase.getClassIdsInSignature().size(); int numClassesConsidered; if (isUseBlanket()) { numClassesConsidered = queryBlanketProfileBM.cardinality(); - } else { + } + else { numClassesConsidered = knowledgeBase.getClassIdsInSignature().size(); } @@ -128,55 +183,85 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) { double pvector[] = new double[indIds.size()]; String indArr[] = new String[indIds.size()]; - int n = 0; + int n=0; + + for (String itemId : indIds) { - EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); - // any node which has an off query parent is discounted - targetProfileBM = targetProfileBM.and(queryBlanketProfileBM); - LOG.debug("TARGET PROFILE for " + itemId + " " + targetProfileBM); - - // two state model. - // mapping to Bauer et al: these correspond to mxy1, x=Q, y=H/T - int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); - int numInQueryAndNOTInTarget = queryProfileBM.andNotCardinality(targetProfileBM); - int numNOTInQueryAndInTarget = targetProfileBM.andNotCardinality(queryProfileBM); - int numNOTInQueryAndNOTInTarget = numClassesConsidered - - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); - - double p = 0.0; - // TODO: optimize this - // integrate over a Dirichlet prior for alpha & beta, rather than - // gridsearch - // this can be done closed-form - for (double fnr : defaultFalseNegativeRateArr) { - for (double fpr : defaultFalsePositiveRateArr) { - - double pQ1T1 = Math.pow(1 - fnr, numInQueryAndInTarget); - double pQ0T1 = Math.pow(fnr, numNOTInQueryAndInTarget); - double pQ1T0 = Math.pow(fpr, numInQueryAndNOTInTarget); - double pQ0T0 = Math.pow(1 - fpr, numNOTInQueryAndNOTInTarget); - - // LOG.debug("pQ1T1 = "+(1-fnr)+" ^ "+ - // numInQueryAndInTarget+" = "+pQ1T1); - // LOG.debug("pQ0T1 = "+(fnr)+" ^ "+ - // numNOTInQueryAndInTarget+" = "+pQ0T1); - // LOG.debug("pQ1T0 = "+(fpr)+" ^ "+ - // numInQueryAndNOTInTarget+" = "+pQ1T0); - // LOG.debug("pQ0T0 = "+(1-fpr)+" ^ "+ - // numNOTInQueryAndNOTInTarget+" = "+pQ0T0); - // TODO: optimization. We can precalculate the logs for - // different integers - p += Math.exp(Math.log(pQ1T1) + Math.log(pQ0T1) + Math.log(pQ1T0) + Math.log(pQ0T0)); - - } - } - pvector[n] = p; - indArr[n] = itemId; - sumOfProbs += p; + + int effectiveK = kLeastFrequent; + int twoToTheK = (int) Math.pow(2, kLeastFrequent); + int numWeightedTypes = knowledgeBase.getDirectWeightedTypes(itemId).size(); + if (numWeightedTypes < kLeastFrequent) { + twoToTheK = (int) Math.pow(2, numWeightedTypes); + effectiveK = numWeightedTypes; + } + + double cumulativePr = 0; + for (int comboIndex = 0; comboIndex < twoToTheK; comboIndex++) { + + Double comboPr = null; + EWAHCompressedBitmap targetProfileBM; + if (kLeastFrequent == 0) { + targetProfileBM = knowledgeBase.getTypesBM(itemId); + } + else { + WeightedTypesBM wtbm = getTypesFrequencyAware(itemId, comboIndex, effectiveK); + comboPr = wtbm.weight; + targetProfileBM = wtbm.typesBM; + } + + // any node which has an off query parent is discounted + targetProfileBM = targetProfileBM.and(queryBlanketProfileBM); + LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); + + + // two state model. + // mapping to Bauer et al: these correspond to mxy1, x=Q, y=H/T + int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); + int numInQueryAndNOTInTarget = queryProfileBM.andNotCardinality(targetProfileBM); + int numNOTInQueryAndInTarget = targetProfileBM.andNotCardinality(queryProfileBM); + int numNOTInQueryAndNOTInTarget = + numClassesConsidered - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); + + double p = 0.0; + // TODO: optimize this + // integrate over a Dirichlet prior for alpha & beta, rather than gridsearch + // this can be done closed-form + for (double fnr : defaultFalseNegativeRateArr) { + for (double fpr : defaultFalsePositiveRateArr) { + + double pQ1T1 = Math.pow(1-fnr, numInQueryAndInTarget); + double pQ0T1 = Math.pow(fnr, numNOTInQueryAndInTarget); + double pQ1T0 = Math.pow(fpr, numInQueryAndNOTInTarget); + double pQ0T0 = Math.pow(1-fpr, numNOTInQueryAndNOTInTarget); + + + + //LOG.debug("pQ1T1 = "+(1-fnr)+" ^ "+ numInQueryAndInTarget+" = "+pQ1T1); + //LOG.debug("pQ0T1 = "+(fnr)+" ^ "+ numNOTInQueryAndInTarget+" = "+pQ0T1); + //LOG.debug("pQ1T0 = "+(fpr)+" ^ "+ numInQueryAndNOTInTarget+" = "+pQ1T0); + //LOG.debug("pQ0T0 = "+(1-fpr)+" ^ "+ numNOTInQueryAndNOTInTarget+" = "+pQ0T0); + //TODO: optimization. We can precalculate the logs for different integers + p += + Math.exp(Math.log(pQ1T1) + Math.log(pQ0T1) + Math.log(pQ1T0) + Math.log(pQ0T0)); + + } + } + + if (comboPr != null) { + p *= comboPr; + } + cumulativePr += p; + } + pvector[n] = cumulativePr; + indArr[n] = itemId; + + sumOfProbs += cumulativePr; n++; - LOG.debug("p for " + itemId + " = " + p); + LOG.debug("p for "+itemId+" = "+cumulativePr); + } - for (n = 0; n < pvector.length; n++) { + for (n = 0; n()); + } + Map m = individualToInterpretationToTypesBM.get(iix); + if (m.containsKey(n)) { + // use cached value + return m.get(n); + } + + // default direct type map. + // note that associations with frequency annotations are includes here alongside + // normal associations + EWAHCompressedBitmap dtmap = knowledgeBase.getDirectTypesBM(itemId); + + // associations with frequency info + // map is from ClassIndex -> Weight + Map wmap = knowledgeBase.getDirectWeightedTypes(itemId); + + // sort with least frequent first + List sortedTypeIndices = new ArrayList<>(wmap.keySet()); + sortedTypeIndices.sort( (Integer i, Integer j) -> wmap.get(i) - wmap.get(j)); + + EWAHCompressedBitmap mask = new EWAHCompressedBitmap(); + double pr = 1.0; + for (int i=0; i< effectiveK; i++) { + Integer iClassIx = sortedTypeIndices.get(i); + Double w = wmap.get(iClassIx) / 100.0; + //LOG.info("Class "+iClassIx +" which is "+i+"-least frequent has weight "+w+" for individual "+itemId+" in combo "+n); + if ( (n >> i) % 2 == 0) { + mask.set(iClassIx); + pr *= 1-w; + } + else { + pr *= w; + } + } + //LOG.info("Instance "+itemId+" in combo "+n+" has Pr = "+pr); + + EWAHCompressedBitmap dtmapMasked = dtmap.xor(mask); + EWAHCompressedBitmap inferredTypesBM = knowledgeBase.getSuperClassesBM(dtmapMasked); + WeightedTypesBM wtbm = new WeightedTypesBM(inferredTypesBM, pr); + m.put(n, wtbm); + return wtbm; + } /** * @return probability a query class is a false positive */ @Deprecated public double getFalsePositiveRate() { - return defaultFalsePositiveRate; + return defaultFalsePositiveRate; } /** @@ -199,36 +339,39 @@ public double getFalsePositiveRate() { */ @Deprecated public double getFalseNegativeRate() { - return defaultFalseNegativeRate; + return defaultFalseNegativeRate; } + public void compare(String qid, String tid) { ProfileQuery q = createProfileQuery(qid); ProfileQuery t = createProfileQuery(tid); - + EWAHCompressedBitmap queryProfileBM = getProfileBM(q); EWAHCompressedBitmap targetProfileBM = getProfileBM(t); EWAHCompressedBitmap queryBlanketProfileBM = getQueryBlanketBM(q); targetProfileBM = targetProfileBM.and(queryBlanketProfileBM); + int numClassesConsidered = queryBlanketProfileBM.cardinality(); - + int numInQuery = queryProfileBM.cardinality(); int numInTarget = targetProfileBM.cardinality(); - + + int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); int numInQueryAndNOTInTarget = queryProfileBM.andNotCardinality(targetProfileBM); int numNOTInQueryAndInTarget = targetProfileBM.andNotCardinality(queryProfileBM); - int numNOTInQueryAndNOTInTarget = numClassesConsidered - - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); + int numNOTInQueryAndNOTInTarget = + numClassesConsidered - (numInQueryAndInTarget + numInQueryAndNOTInTarget + numNOTInQueryAndInTarget); - // TODO: return appropriate data structure; this is currently only used - // for testing + // TODO: return appropriate data structure; this is currently only used for testing // LAST = fnr \t fpr - System.out.println(qid + "\t" + tid + "\t" + numInQueryAndInTarget + "\t" + numInQueryAndNOTInTarget + "\t" - + numNOTInQueryAndInTarget + "\t" + numNOTInQueryAndNOTInTarget + "\t" - + numNOTInQueryAndInTarget / (double) numInTarget + "\t" + "\t" - + numInQueryAndNOTInTarget / (double) numInQuery); + System.out.println(qid+"\t"+tid+"\t"+numInQueryAndInTarget+ + "\t"+numInQueryAndNOTInTarget+"\t"+numNOTInQueryAndInTarget+ + "\t"+numNOTInQueryAndNOTInTarget+ + "\t"+numNOTInQueryAndInTarget/(double)numInTarget+"\t"+ + "\t"+numInQueryAndNOTInTarget/(double)numInQuery); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java index a06d0ac..8084cc0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/NaiveBayesVariableWeightProfileMatcher.java @@ -3,6 +3,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; @@ -34,6 +36,7 @@ /** * @param kb */ + @Inject public NaiveBayesVariableWeightProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java index 8d3f8f0..62489a6 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/PhenodigmICProfileMatcher.java @@ -3,6 +3,8 @@ import java.util.List; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator.ClassInformationContentPair; @@ -32,6 +34,7 @@ /** * @param kb */ + @Inject public PhenodigmICProfileMatcher(BMKnowledgeBase kb) { super(kb); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java index 065ec3c..e7c207d 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/ThreeStateBayesianNetworkProfileMatcher.java @@ -6,6 +6,8 @@ import java.util.Map; import java.util.Set; +import javax.inject.Inject; + import org.apache.log4j.Logger; import org.monarchinitiative.owlsim.compute.cpt.IncoherentStateException; import org.monarchinitiative.owlsim.compute.cpt.impl.NodeProbabilities; @@ -22,7 +24,7 @@ /** * INCOMPLETE * - * Pr(Child = on | P) = SUM[0 targetToQueryCache; + private Map targetToQueryCache; + @Inject private ThreeStateBayesianNetworkProfileMatcher(BMKnowledgeBase kb) { super(kb); } @@ -63,17 +66,16 @@ public void precompute() { } targetToQueryCache = new HashMap(); } - + public class BitMapPair { public final EWAHCompressedBitmap bm1; public final EWAHCompressedBitmap bm2; - + public BitMapPair(EWAHCompressedBitmap bm1, EWAHCompressedBitmap bm2) { super(); this.bm1 = bm1; this.bm2 = bm2; } - @Override public int hashCode() { final int prime = 31; @@ -83,7 +85,6 @@ public int hashCode() { result = prime * result + ((bm2 == null) ? 0 : bm2.hashCode()); return result; } - @Override public boolean equals(Object obj) { if (this == obj) @@ -107,13 +108,15 @@ public boolean equals(Object obj) { return false; return true; } - private ThreeStateBayesianNetworkProfileMatcher getOuterType() { return ThreeStateBayesianNetworkProfileMatcher.this; } - + + } - + + + /** * note that this is exposed primarily for debugging purposes * @@ -137,11 +140,11 @@ public void calculateConditionalProbabilities(BMKnowledgeBase kb) throws Incoher /** * @param q * @return match profile containing probabilities of each individual - * @throws IncoherentStateException + * @throws IncoherentStateException */ public MatchSet findMatchProfileImpl(ProfileQuery q) throws IncoherentStateException { precompute(); - + boolean isUseNegation = q instanceof QueryWithNegation; if (!isUseNegation) { LOG.error("Consider using TwoState BN, this will be inefficient"); @@ -149,19 +152,20 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) throws IncoherentStateExcep EWAHCompressedBitmap negatedQueryProfileBM = null; Set negatedQueryClassIds = null; - // double fpr = getFalsePositiveRate(); - // double fnr = getFalseNegativeRate(); + //double fpr = getFalsePositiveRate(); + //double fnr = getFalseNegativeRate(); double sumOfProbs = 0.0; - // int numClasses = knowledgeBase.getClassIdsInSignature().size(); - // EWAHCompressedBitmap queryProfileBM = getProfileBM(q); - // EWAHCompressedBitmap negatedQueryProfileBM = null; + //int numClasses = knowledgeBase.getClassIdsInSignature().size(); + //EWAHCompressedBitmap queryProfileBM = getProfileBM(q); + // EWAHCompressedBitmap negatedQueryProfileBM = null; if (isUseNegation) { LOG.info("Using QueryWithNegation"); - QueryWithNegation nq = (QueryWithNegation) q; + QueryWithNegation nq = (QueryWithNegation)q; negatedQueryProfileBM = getDirectNegatedProfileBM(nq); negatedQueryClassIds = knowledgeBase.getClassIds(negatedQueryProfileBM); LOG.info("nqp=" + negatedQueryProfileBM); - } else { + } + else { LOG.info("Not using QueryWithNegation"); } @@ -172,24 +176,24 @@ public MatchSet findMatchProfileImpl(ProfileQuery q) throws IncoherentStateExcep double pvector[] = new double[indIds.size()]; String indArr[] = new String[indIds.size()]; - int n = 0; + int n=0; for (String itemId : indIds) { EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); EWAHCompressedBitmap negatedTargetProfileBM = knowledgeBase.getNegatedTypesBM(itemId); - LOG.debug("TARGET PROFILE for " + itemId + " " + targetProfileBM); + LOG.debug("TARGET PROFILE for "+itemId+" "+targetProfileBM); Calculator calc = new Calculator(targetProfileBM, negatedTargetProfileBM); - // double p = calculateProbability(queryClassIds, targetProfileBM); + //double p = calculateProbability(queryClassIds, targetProfileBM); double p = calc.calculateProbability(queryClassIds, negatedQueryClassIds); pvector[n] = p; indArr[n] = itemId; sumOfProbs += p; n++; - LOG.info("p for " + itemId + " = " + p); + //LOG.info("p for "+itemId+" = "+p); } - for (n = 0; n < pvector.length; n++) { + for (n = 0; n queryClassIds, Set negatedQueryClassIds) - throws IncoherentStateException { + public double calculateProbability(Set queryClassIds, + Set negatedQueryClassIds) throws IncoherentStateException { double cump = 1.0; // treat set of query class Ids as a leaf node that is the // class intersection of all members; ie q1^...^qn // for a class intersection, the CPT is always such that - // Pr=1.0, if all parents=1 - // Pr=0.0 otherwise + // Pr=1.0, if all parents=1 + // Pr=0.0 otherwise for (String queryClassId : queryClassIds) { - // LOG.info("+Q"+queryClassId); + //LOG.info("+Q"+queryClassId); double p = calculateProbability(queryClassId).prOn; cump *= p; } if (negatedQueryClassIds != null) { // TODO: prOff=0 for (String negatedQueryClassId : negatedQueryClassIds) { - LOG.info("-Q" + negatedQueryClassId); + LOG.info("-Q"+negatedQueryClassId); double p = calculateProbability(negatedQueryClassId).prOff; - LOG.info(" prOff=" + p); + LOG.info(" prOff="+p); cump *= p; } } @@ -273,51 +279,54 @@ public double calculateProbability(Set queryClassIds, Set negate * @param queryClassId * @param targetProfileBM * @return probability - * @throws IncoherentStateException + * @throws IncoherentStateException */ private NodeProbabilities calculateProbability(String queryClassId) throws IncoherentStateException { BMKnowledgeBase kb = getKnowledgeBase(); int qcix = kb.getClassIndex(queryClassId); - return calculateProbability(qcix); + return calculateProbability(qcix); } + /** - * Pr(Child = on | P) = SUM[0 pixs = kb.getDirectSuperClassesBM(qcix).getPositions(); NodeProbabilities[] parentOnProbs = new NodeProbabilities[pixs.size()]; LOG.debug("calculating for parents"); - for (int i = 0; i < pixs.size(); i++) { + for (int i=0; i curie = curieUtil.getCurie(iri.toString()); - if (curie.isPresent()) { - return curie.get(); - } else { - return iri.toString(); - } - } - } - - private void populateLabelsFromOntology(LabelMapper labelMapper, OWLOntology ontology) { - LOG.info("Populating labels from " + ontology); - int n = 0; - for (OWLAnnotationAssertionAxiom aaa : ontology.getAxioms(AxiomType.ANNOTATION_ASSERTION)) { - if (aaa.getProperty().isLabel()) { - if (aaa.getSubject() instanceof IRI && aaa.getValue() instanceof OWLLiteral) { - labelMapper.add(getShortForm((IRI) aaa.getSubject()), ((OWLLiteral) aaa.getValue()).getLiteral()); - n++; - } - } - } - if (n == 0) { - LOG.info("Setting labels from fragments"); - Set objs = new HashSet<>(); - objs.addAll(ontology.getClassesInSignature()); - objs.addAll(ontology.getIndividualsInSignature()); - for (OWLNamedObject obj : objs) { - labelMapper.add(getShortForm(obj.getIRI()), obj.getIRI().getFragment()); - n++; - } - } - LOG.info("Label axioms mapped: " + n); - } - - /** - * @return utility object to map labels to ids - */ - public LabelMapper getLabelMapper() { - return labelMapper; - } - - /** - * @return set of all classes - */ - public Set getClassesInSignature() { - return classToNodeMap.keySet(); // TODO - consider optimizing - } - - /** - * @return set of all class identifiers - */ - public Set getClassIdsInSignature() { - Set ids = new HashSet<>(); - for (OWLClass i : getClassesInSignature()) { - ids.add(getShortForm(i.getIRI())); - } - return ids; - } - - public Set getClassIdsByOntology(String ont) { - return getClassIdsInSignature().stream().filter(x -> isIn(x, ont)).collect(Collectors.toSet()); - } - - /** - * @param id - * @param ont - * @return true if id is in ontology - */ - public boolean isIn(String id, String ont) { - // TODO - use curie util - return id.startsWith(ont + ":") || id.contains("/" + ont + "_"); - } - - public int getNumClassNodes() { - return classNodeArray.length; - } - - /** - * @return set of all individual identifiers - */ - protected Set getIndividualsInSignature() { - return individualsInSignature; - } - - /** - * @return ids - */ - public Set getIndividualIdsInSignature() { - Set ids = new HashSet<>(); - for (OWLNamedIndividual i : getIndividualsInSignature()) { - ids.add(getShortForm(i.getIRI())); - } - return ids; - } - - /** - * @return OWLAPI representation of the ontology - */ - protected OWLOntology getOwlOntology() { - return owlOntology; - } - - // Assumption: data ontology includes ObjectPropertyAssertions - // TODO: make flexible - // TODO: extract associations - private void translateFromDataOntology() { - // TODO: allow other axiom types - for (OWLObjectPropertyAssertionAxiom opa : owlDataOntology.getAxioms(AxiomType.OBJECT_PROPERTY_ASSERTION)) { - OWLIndividual obj = opa.getObject(); - if (obj instanceof OWLNamedIndividual) { - OWLClass type = getOWLDataFactory().getOWLClass(((OWLNamedIndividual) obj).getIRI()); - OWLClassAssertionAxiom ca = getOWLDataFactory().getOWLClassAssertionAxiom(type, opa.getSubject()); - owlOntology.getOWLOntologyManager().addAxiom(owlOntology, ca); - } - } - } - - // Each OWLClass and OWLIndividual is mapped to an Integer index - private void createMap() { - LOG.info("Creating mapping from ontology objects to integers"); - classNodes = new HashSet<>(); - individualNodes = new HashSet<>(); - Set classesInSignature; - classesInSignature = owlOntology.getClassesInSignature(true); - LOG.info("|classes|=" + classesInSignature.size()); - classesInSignature.add(getOWLThing()); - classesInSignature.remove(getOWLNothing()); - individualsInSignature = owlOntology.getIndividualsInSignature(true); - LOG.info("|individuals|=" + individualsInSignature.size()); - classToNodeMap = new HashMap<>(); - individualToNodeMap = new HashMap<>(); - classNodeToIntegerMap = new HashMap<>(); - individualNodeToIntegerMap = new HashMap<>(); - propertyValueMapMap = new HashMap<>(); - final HashMap, Integer> classNodeToFrequencyMap = new HashMap<>(); - final HashMap, Double> classNodeToFreqDepthMap = new HashMap<>(); - for (OWLClass c : classesInSignature) { - if (owlReasoner.getInstances(c, false).isEmpty()) { - // TODO: deal with subclasses - // LOG.info("Skipping non-instantiated class: "+c); - // continue; - } - Node node = owlReasoner.getEquivalentClasses(c); - if (node.contains(getOWLNothing())) { - LOG.warn("Ignoring unsatisfiable class: " + c); - continue; - } - classNodes.add(node); - classToNodeMap.put(c, node); - int numAncNodes = owlReasoner.getSuperClasses(c, false).getNodes().size(); - int freq = owlReasoner.getInstances(c, false).getNodes().size(); - classNodeToFrequencyMap.put(node, freq); - - // freq depth is inversely correlated informativeness; - // frequency is primary measure (high freq = low informativeness); - // if frequency is tied, then tie is broken by number of ancestors - // (high ancestors = high informativeness) - // note that if frequency is not tied, then depth/ancestors should - // make - // no overall difference - we ensure this by taking the proportion - // of - // ancestor nodes divided by number of classes (there are always - // equal - // or more classes than nodes) - double freqDepth = freq + 1 - (numAncNodes / (double) classesInSignature.size()); - // LOG.info("freqDepth = "+freq+" "+freqDepth); - classNodeToFreqDepthMap.put(node, freqDepth); - } - - for (OWLNamedIndividual i : individualsInSignature) { - Node node = owlReasoner.getSameIndividuals(i); - individualNodes.add(node); - individualToNodeMap.put(i, node); - setPropertyValues(owlOntology, i); - if (owlDataOntology != null) - setPropertyValues(owlDataOntology, i); - } - - // Order class nodes such that LOW frequencies (HIGH Information - // Content) - // nodes are have LOWER indices - // TODO: use depth as a tie breaker - List> classNodesSorted = new ArrayList<>(classNodes); - classNodesSorted.sort((n1, n2) -> { - double f1 = classNodeToFreqDepthMap.get(n1); - double f2 = classNodeToFreqDepthMap.get(n2); - if (f1 < f2) - return -1; - if (f1 > f2) - return 1; - return 0; - }); - int numClassNodes = classNodesSorted.size(); - classNodeArray = classNodesSorted.toArray(new Node[numClassNodes]); - individualCountPerClassArray = new int[numClassNodes]; - for (int i = 0; i < numClassNodes; i++) { - classNodeToIntegerMap.put(classNodeArray[i], i); - // LOG.info(classNodeArray[i] + " ix="+i + " - // FREQ="+classNodeToFrequencyMap.get(classNodeArray[i])); - // LOG.info(classNodeArray[i] + " ix="+i + " - // IX_REV="+classNodeToIntegerMap.get(classNodeArray[i])); - individualCountPerClassArray[i] = classNodeToFrequencyMap.get(classNodeArray[i]); - } - individualNodeArray = individualNodes.toArray(new Node[individualNodes.size()]); - for (int i = 0; i < individualNodes.size(); i++) { - individualNodeToIntegerMap.put(individualNodeArray[i], i); - } - - } - - private void setPropertyValues(OWLOntology ont, OWLNamedIndividual i) { - Preconditions.checkNotNull(i); - Map> pvm = new HashMap<>(); - String id = getShortForm(i.getIRI()); - propertyValueMapMap.put(id, pvm); - for (OWLIndividualAxiom ax : ont.getAxioms(i)) { - if (ax instanceof OWLPropertyAssertionAxiom) { - OWLPropertyAssertionAxiom paa = (OWLPropertyAssertionAxiom) ax; - OWLPropertyExpression p = paa.getProperty(); - if (p instanceof OWLObjectProperty || p instanceof OWLDataProperty) { - String pid; - if (p instanceof OWLObjectProperty) - pid = getShortForm(((OWLObjectProperty) p).getIRI()); - else - pid = getShortForm(((OWLDataProperty) p).getIRI()); - OWLPropertyAssertionObject obj = paa.getObject(); - if (obj instanceof OWLLiteral) { - addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); - } else if (obj instanceof OWLNamedIndividual) { - addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); - - } - - } else if (false) { - String pid = getShortForm(((OWLDataProperty) p).getIRI()); - OWLLiteral obj = ((OWLDataPropertyAssertionAxiom) paa).getObject(); - if (obj instanceof OWLLiteral) { - addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); - } else if (obj instanceof OWLNamedIndividual) { - addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); - - } - - } - } - } - - } - - private void addPropertyValue(Map> pvm, String pid, String v) { - // LOG.debug("PV="+pid+"="+v); - if (!pvm.containsKey(pid)) - pvm.put(pid, new HashSet<>()); - pvm.get(pid).add(v); - } - - private void addOpposingClassPair(OWLClass c, OWLClassExpression dc) { - addOpposingClassPairAsym(c, dc); - if (!dc.isAnonymous()) - addOpposingClassPairAsym(dc.asOWLClass(), c); - } - - private void addOpposingClassPairAsym(OWLClass c, OWLClassExpression d) { - if (!opposingClassMap.containsKey(c)) - opposingClassMap.put(c, new HashSet<>()); - opposingClassMap.get(c).add(d); - } - - private void storeInferences() { - - // Note: if there are any nodes containing >1 class or individual, then - // the store method is called redundantly. This is unlikely to affect - // performance, - // and the semantics are unchanged - for (OWLClass c : getClassesInSignature()) { - int clsIndex = getIndex(c); - // LOG.info("Storing inferences for "+c+" --> " + clsIndex); - Set sups = getIntegersForClassSet(owlReasoner.getSuperClasses(c, false)); - sups.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); - - Set subs = getIntegersForClassSet(owlReasoner.getSubClasses(c, false)); - subs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); - - ontoEWAHStore.setDirectSuperClasses(clsIndex, getIntegersForClassSet(owlReasoner.getSuperClasses(c, true))); - ontoEWAHStore.setSuperClasses(clsIndex, sups); - ontoEWAHStore.setDirectSubClasses(clsIndex, getIntegersForClassSet(owlReasoner.getSubClasses(c, true))); - ontoEWAHStore.setSubClasses(clsIndex, subs); - - // Find all disjoint pairs plus opposing pairs - for (OWLAnnotationAssertionAxiom aaa : owlOntology.getAnnotationAssertionAxioms(c.getIRI())) { - // RO_0002604 is-opposite-of. TODO - use a vocabulary object - if (aaa.getProperty().getIRI().toString().equals("http://purl.obolibrary.org/obo/RO_0002604")) { - OWLAnnotationValue v = aaa.getValue(); - if (v instanceof IRI) { - IRI dciri = (IRI) v; - OWLClass dc = owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(dciri); - addOpposingClassPair(c, dc); - - } - } - } - - for (OWLDisjointClassesAxiom dca : owlOntology.getDisjointClassesAxioms(c)) { - for (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { - addOpposingClassPair(c, dc); - } - } - - // direct individuals are those asserted to be of type c or anything - // equivalent to c - Set individualInts = new HashSet<>(); - for (OWLClass ec : owlReasoner.getEquivalentClasses(c).getEntities()) { - for (OWLClassAssertionAxiom ax : owlOntology.getClassAssertionAxioms(ec)) { - if (ax.getIndividual().isNamed()) { - individualInts.add(getIndex(ax.getIndividual().asOWLNamedIndividual())); - } - } - } - ontoEWAHStore.setDirectIndividuals(clsIndex, individualInts); - - } - for (OWLNamedIndividual i : individualsInSignature) { - int individualIndex = getIndex(i); - // LOG.info("String inferences for "+i+" --> " +individualIndex); - ontoEWAHStore.setDirectTypes(individualIndex, getIntegersForClassSet(owlReasoner.getTypes(i, true))); - ontoEWAHStore.setTypes(individualIndex, getIntegersForClassSet(owlReasoner.getTypes(i, false))); - - // Treat CLassAssertion( ComplementOf(c) i) as a negative assertion - Set ncs = new HashSet<>(); - Set ncsDirect = new HashSet<>(); - for (OWLClassAssertionAxiom cx : owlOntology.getClassAssertionAxioms(i)) { - // TODO: investigate efficiency - number of items set may be - // high - if (cx.getClassExpression() instanceof OWLObjectComplementOf) { - OWLObjectComplementOf nx = (OWLObjectComplementOf) (cx.getClassExpression()); - OWLClassExpression nc = nx.getOperand(); - ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(nc, false))); - ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); - ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); - } - } - - // Populate negative assertions from DisjointClasses axioms - for (OWLClass c : owlReasoner.getTypes(i, false).getFlattened()) { - LOG.debug("TESTING FOR DCs: " + c); - if (opposingClassMap.containsKey(c)) { - for (OWLClassExpression dc : opposingClassMap.get(c)) { - LOG.info(i + " Type: " + c + " DisjointWith: " + dc); - ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); - ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); - } - } - /* - * for (OWLDisjointClassesAxiom dca : - * owlOntology.getDisjointClassesAxioms(c)) { for - * (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { - * LOG.info(i+" Type: "+c+" DisjointWith: "+dc); - * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses( - * dc, false))); - * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses - * (dc))); ncsDirect.add(getIndexForClassNode(owlReasoner. - * getEquivalentClasses(dc))); } } for - * (OWLAnnotationAssertionAxiom aaa : - * owlOntology.getAnnotationAssertionAxioms(c.getIRI())) { // - * RO_0002604 is-opposite-of. TODO - use a vocabulary object if - * (aaa.getProperty().getIRI().toString().equals( - * "http://purl.obolibrary.org/obo/RO_0002604" )) { - * OWLAnnotationValue v = aaa.getValue(); if (v instanceof IRI) - * { IRI dciri = (IRI)v; OWLClass dc = - * owlOntology.getOWLOntologyManager().getOWLDataFactory(). - * getOWLClass(dciri); - * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses( - * dc, false))); - * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses - * (dc))); ncsDirect.add(getIndexForClassNode(owlReasoner. - * getEquivalentClasses(dc))); - * - * } } } - */ - } - - ontoEWAHStore.setNegatedTypes(individualIndex, ncs); // TODO - - // determine - // if - // storing - // all - // inferred - // negated - // types is - // too - // inefficient - ontoEWAHStore.setDirectNegatedTypes(individualIndex, ncsDirect); - } - - } - - // TODO - private void storeIndividualProperties() { - for (OWLNamedIndividual i : individualsInSignature) { - for (OWLIndividualAxiom ax : owlOntology.getAxioms(i)) { - if (ax instanceof OWLObjectPropertyAssertionAxiom) { - OWLObjectPropertyExpression p = ((OWLObjectPropertyAssertionAxiom) ax).getProperty(); - } - } - } - } - - // TODO - complete this - // TODO - separate this out as it is not an OWLAPI model. Maybe sparql is - // overkill here? - // use sparql to query the memory model - private void storeIndividualToClassFrequencies() { - String sparql = ""; - Query query = QueryFactory.create(sparql); - Model model = null; - QueryExecution qexec = QueryExecutionFactory.create(query, model); - ResultSet results = qexec.execSelect(); - for (; results.hasNext();) { - QuerySolution soln = results.nextSolution(); - RDFNode x = soln.get("varName"); // Get a result variable by name. - Resource r = soln.getResource("VarR"); // Get a result variable - - // must be a resource - Literal l = soln.getLiteral("VarL"); // Get a result variable - must - // be a literal - } - } - - private Set getIntegersForClassSet(NodeSet nodeset) { - Set bits = new HashSet<>(); - for (Node n : nodeset.getNodes()) { - if (n.contains(getOWLNothing())) - continue; - bits.add(getIndexForClassNode(n)); - } - return bits; - } - - private Set getIntegersForIndividualSet(NodeSet nodeset) { - Set bits = new HashSet<>(); - for (Node n : nodeset.getNodes()) { - bits.add(getIndexForIndividualNode(n)); - } - return bits; - } - - /** - * Each class is mapped to an integer - * - * Note that equivalent classes will be mapped to the same integer - * - * @param c - * @return integer representation of class - */ - protected int getIndex(OWLClass c) { - Preconditions.checkNotNull(c); - return getIndexForClassNode(classToNodeMap.get(c)); - } - - /** - * @param id - * @return integer representation of class with id - */ - public int getClassIndex(String id) { - Preconditions.checkNotNull(id); - return getIndex(getOWLClass(id)); - } - - /** - * @param index - * @return OWLClass Node that corresponds to this index - */ - public Node getClassNode(int index) { - return classNodeArray[index]; - } - - /** - * @param index - * @return OWLClass Node that corresponds to this index - */ - public Node getIndividualNode(int index) { - return individualNodeArray[index]; - } - - /** - * @param cix - * @return bitmap - */ - public EWAHCompressedBitmap getDirectIndividualsBM(int cix) { - return ontoEWAHStore.getDirectIndividuals(cix); - } - - @Override - public EWAHCompressedBitmap getIndividualsBM(String classId) { - return getIndividualsBM(getClassIndex(classId)); - } - - @Override - public EWAHCompressedBitmap getIndividualsBM(int classIndex) { - if (classIndex == getRootIndex()) { - EWAHCompressedBitmap indsBM = new EWAHCompressedBitmap(); - indsBM.setSizeInBits(getIndividualIdsInSignature().size(), true); - return indsBM; - } - EWAHCompressedBitmap subsBM = getSubClasses(classIndex); - EWAHCompressedBitmap indsBM = null; - // Note this implementation iterates through all subclasses - // combining individuals; it is too expensive to store all inferred inds - // by class - for (int subcix : subsBM.getPositions()) { - EWAHCompressedBitmap bm = getDirectIndividualsBM(subcix); - if (indsBM == null) { - indsBM = bm; - } else { - indsBM = indsBM.or(bm); - } - } - return indsBM; - } - - /** - * Note: each index can correspond to multiple classes c1...cn if this set - * is an equivalence set. In this case the representative classId is - * returned - * - * @param index - * @return classId - */ - public String getClassId(int index) { - Node n = getClassNode(index); - OWLClass c = n.getRepresentativeElement(); - return getShortForm(c.getIRI()); - } - - public Set getClassIds(int index) { - Node n = getClassNode(index); - Set cids = new HashSet<>(); - for (OWLClass c : n.getEntities()) { - cids.add(getShortForm(c.getIRI())); - } - return cids; - } - - public Set getClassIds(EWAHCompressedBitmap bm) { - Set cids = new HashSet<>(); - for (int x : bm) { - Node n = getClassNode(x); - for (OWLClass c : n.getEntities()) { - cids.add(getShortForm(c.getIRI())); - } - } - return cids; - } - - /** - * @param id - * @return integer representation of class with id - */ - public int getIndividualIndex(String id) { - Preconditions.checkNotNull(id); - return getIndex(getOWLNamedIndividual(id)); - } - - /** - * Each set of equivalent classes (a class node) is mapped to a unique - * integer - * - * @param n - * @return integer representation of class node - */ - protected int getIndexForClassNode(Node n) { - Preconditions.checkNotNull(n); - if (!classNodeToIntegerMap.containsKey(n)) - LOG.error("No such node: " + n); - return classNodeToIntegerMap.get(n); - } - - /** - * Each individual is mapped to an integer - * - * Note that individuals that stand in a SameAs relationship to one another - * will be mapped to the same integer - * - * @param i - * @return integer representation of individual - */ - protected int getIndex(OWLNamedIndividual i) { - return getIndexForIndividualNode(individualToNodeMap.get(i)); - } - - /** - * Each set of same individuals (an individual node) is mapped to a unique - * integer - * - * @param n - * @return integer representation of class node - */ - protected int getIndexForIndividualNode(Node n) { - return individualNodeToIntegerMap.get(n); - } - - /** - * @param c - * @return Bitmap representation of set of superclasses of c (direct and - * indirect) - */ - protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c) { - return ontoEWAHStore.getSuperClasses(getIndex(c)); - } - - /** - * @param c - * @return Bitmap representation of set of direct superclasses of c - */ - protected EWAHCompressedBitmap getDirectSuperClassesBM(OWLClass c) { - return ontoEWAHStore.getDirectSuperClasses(getIndex(c)); - } - - /** - * @param c - * @param isDirect - * @return Bitmap representation of set ofsuperclasses of c - */ - protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c, boolean isDirect) { - return ontoEWAHStore.getSuperClasses(getIndex(c), isDirect); - } - - /** - * @param clsSet - * @return union of all superClasses (direct and indirect) of any input - * class - */ - protected EWAHCompressedBitmap getSuperClassesBMByOWLClassSet(Set clsSet) { - Set classIndices = new HashSet<>(); - for (OWLClass c : clsSet) { - classIndices.add(getIndex(c)); - } - return ontoEWAHStore.getSuperClasses(classIndices); - } - - public EWAHCompressedBitmap getSuperClassesBM(String cid) { - return ontoEWAHStore.getSuperClasses(getClassIndex(cid)); - } - - public EWAHCompressedBitmap getDirectSuperClassesBM(String cid) { - return ontoEWAHStore.getDirectSuperClasses(getClassIndex(cid)); - } - - public EWAHCompressedBitmap getSuperClassesBM(int classIndex) { - return ontoEWAHStore.getSuperClasses(classIndex); - } - - public EWAHCompressedBitmap getClassesBM(Set classIds) { - EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); - for (String id : classIds) { - bm.set(getClassIndex(id)); - } - return bm; - } - - public EWAHCompressedBitmap getDirectSuperClassesBM(int classIndex) { - return ontoEWAHStore.getDirectSuperClasses(classIndex); - } - - public EWAHCompressedBitmap getSubClasses(int classIndex) { - return ontoEWAHStore.getSubClasses(classIndex); - } - - public EWAHCompressedBitmap getDirectSubClassesBM(String cid) { - return ontoEWAHStore.getDirectSubClasses(getClassIndex(cid)); - } - - public EWAHCompressedBitmap getDirectSubClassesBM(int classIndex) { - return ontoEWAHStore.getDirectSubClasses(classIndex); - } - - /** - * @param clsIds - * @return union of all subClasses (direct and indirect) of any input class - */ - public EWAHCompressedBitmap getSubClassesBM(Set clsIds) { - Set classIndices = new HashSet<>(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getSubClasses(classIndices); - } - - /** - * @param clsIds - * @return union of all direct subClasses of all input classes - */ - public EWAHCompressedBitmap getDirectSubClassesBM(Set clsIds) { - Set classIndices = new HashSet<>(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getDirectSubClasses(classIndices); - } - - /** - * @param clsIds - * @return union of all superClasses (direct and indirect) of any input - * class - */ - public EWAHCompressedBitmap getSuperClassesBM(Set clsIds) { - Set classIndices = new HashSet<>(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getSuperClasses(classIndices); - } - - /** - * @param clsIds - * @return union of all direct superClasses of all input classes - */ - public EWAHCompressedBitmap getDirectSuperClassesBM(Set clsIds) { - Set classIndices = new HashSet(); - for (String id : clsIds) { - classIndices.add(getClassIndex(id)); - } - return ontoEWAHStore.getDirectSuperClasses(classIndices); - } - - /** - * @param i - * @return Bitmap representation of set of (direct or indirect) types of i - */ - protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i) { - return ontoEWAHStore.getTypes(getIndex(i)); - } - - /** - * @param i - * @return Bitmap representation of set of direct types of i - */ - protected EWAHCompressedBitmap getDirectTypesBM(OWLNamedIndividual i) { - return ontoEWAHStore.getDirectTypes(getIndex(i)); - } - - /** - * @param i - * @param classFilter - * @return Bitmap representation of the subset of direct types of i, which - * are descendants of classFilter - */ - protected EWAHCompressedBitmap getFilteredDirectTypesBM(OWLNamedIndividual i, OWLClass c) { - return ontoEWAHStore.getDirectTypes(getIndex(i), this.getIndex(c)); - } - - /** - * @param i - * @param isDirect - * @return Bitmap representation of set of (direct or indirect) types of i - */ - protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i, boolean isDirect) { - return ontoEWAHStore.getTypes(getIndex(i), isDirect); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) instantiated - * classes - */ - public EWAHCompressedBitmap getTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getTypes(getIndividualIndex(id)); - } - - /** - * @param individualIndex - * @return bitmap representation of all (direct and indirect) instantiated - * classes - */ - public EWAHCompressedBitmap getTypesBM(int individualIndex) { - return ontoEWAHStore.getTypes(individualIndex); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) classes known - * to be NOT instantiated - */ - public EWAHCompressedBitmap getNegatedTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getNegatedTypes(getIndividualIndex(id)); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) classes known - * to be NOT instantiated - */ - public EWAHCompressedBitmap getDirectNegatedTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getDirectNegatedTypes(getIndividualIndex(id)); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) instantiated - * classes - */ - public EWAHCompressedBitmap getDirectTypesBM(String id) { - Preconditions.checkNotNull(id); - return ontoEWAHStore.getDirectTypes(getIndividualIndex(id)); - } - - /** - * @param id - * @return bitmap representation of all (direct and indirect) instantiated - * classes that are subclasses of classId - */ - public EWAHCompressedBitmap getFilteredDirectTypesBM(String id, String classId) { - Preconditions.checkNotNull(id); - Preconditions.checkNotNull(classId); - return ontoEWAHStore.getDirectTypes(getIndividualIndex(id), getClassIndex(classId)); - } - - private OWLClass getOWLThing() { - return getOWLDataFactory().getOWLThing(); - } - - private OWLClass getOWLNothing() { - return getOWLDataFactory().getOWLNothing(); - } - - private OWLDataFactory getOWLDataFactory() { - return owlOntology.getOWLOntologyManager().getOWLDataFactory(); - } - - /** - * @param obj - * @return CURIE-style identifier - */ - protected String getIdentifier(OWLNamedObject obj) { - return obj.getIRI().toString(); - } - - /** - * @param id - * CURIE-style - * @return OWLAPI Class object - */ - protected OWLClass getOWLClass(String id) { - Preconditions.checkNotNull(id); - if (curieUtil.getCurieMap().isEmpty()) { - return getOWLClass(IRI.create(id)); - } else { - return getOWLClass(IRI.create(curieUtil.getIri(id).orElse(id))); - } - } - - /** - * @param iri - * @return OWLAPI Class object - */ - protected OWLClass getOWLClass(IRI iri) { - return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(iri); - } - - /** - * @param iri - * @return OWLAPI Class object - */ - protected OWLNamedIndividual getOWLNamedIndividual(IRI iri) { - return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLNamedIndividual(iri); - } - - /** - * @param id - * CURIE-style - * @return OWLAPI Class object - */ - public OWLNamedIndividual getOWLNamedIndividual(String id) { - Preconditions.checkNotNull(id); - if (curieUtil.getCurieMap().isEmpty()) { - return getOWLNamedIndividual(IRI.create(id)); - } else { - return getOWLNamedIndividual(IRI.create(curieUtil.getIri(id).orElse(id))); - } - } - - public Attribute getAttribute(String id) { - Preconditions.checkNotNull(id); - String label = labelMapper.getArbitraryLabel(id); - return new Attribute(id, label); - } - - public Entity getEntity(String id) { - Preconditions.checkNotNull(id); - String label = labelMapper.getArbitraryLabel(id); - return new Entity(id, label); - } - - public int[] getIndividualCountPerClassArray() { - return individualCountPerClassArray; - } - - @Override - public Map> getPropertyValueMap(String individualId) { - return propertyValueMapMap.get(individualId); - } - - @Override - public Set getPropertyValues(String individualId, String property) { - Map> m = getPropertyValueMap(individualId); - if (m.containsKey(property)) - return new HashSet<>(m.get(property)); - else - return Collections.emptySet(); - } - - public EWAHCompressedBitmap[] getStoredDirectSubClassIndex() { - return ontoEWAHStore.getStoredDirectSubClasses(); - } - - @Override - public int getRootIndex() { - return getIndex(getOWLThing()); - } - - @Override - public String getIndividualId(int index) { - Node n = getIndividualNode(index); - OWLNamedIndividual ind = n.getRepresentativeElement(); - return getShortForm(ind.getIRI()); - } - - @Override - public EWAHCompressedBitmap getFilteredTypesBM(Set ids, String classId) { - - Set classBits = new HashSet<>(); - for (String id : ids) { - classBits.add(this.getClassIndex(id)); - } - - return ontoEWAHStore.getTypes(classBits, getClassIndex(classId)); - - } - - public EWAHCompressedBitmap getFilteredDirectTypesBM(Set classIds, String classId) { - - Set classBits = new HashSet<>(); - for (String id : classIds) { - classBits.add(this.getClassIndex(id)); - } - - return ontoEWAHStore.getDirectTypes(classBits, getClassIndex(classId)); - - } + private Logger LOG = Logger.getLogger(BMKnowledgeBaseOWLAPIImpl.class); + + private KBMetadata kbMetdata; + + private EWAHKnowledgeBaseStore ontoEWAHStore; + private OWLOntology owlOntology; + private OWLOntology owlDataOntology; + private OWLReasoner owlReasoner; + + private Map, Integer> classNodeToIntegerMap; + private Node[] classNodeArray; + private Map, Integer> individualNodeToIntegerMap; + private Node[] individualNodeArray; + + private Set> classNodes; + private Set> individualNodes; + + private Map> classToNodeMap; + private Map> individualToNodeMap; + // private Set classesInSignature; + private Set individualsInSignature; + private Map>> propertyValueMapMap; + Map> opposingClassMap = + new HashMap>(); + + Map> individualToWeightedDirectTypeMap = new HashMap<>(); + + + private int[] individualCountPerClassArray; + + CURIEMapper curieMapper; + LabelMapper labelMapper; + CurieUtil curieUtil; + + /** + * @param owlOntology + * @param owlDataOntology TODO - fix this + * @param rf + */ + public BMKnowledgeBaseOWLAPIImpl(OWLOntology owlOntology, + OWLOntology owlDataOntology, OWLReasonerFactory rf, + CurieUtil curieUtil) { + super(); + curieMapper = new CURIEMapperImpl(); + labelMapper = new LabelMapperImpl(curieMapper); + + this.owlOntology = owlOntology; + this.owlDataOntology = owlDataOntology; + if (owlDataOntology != null) { + translateFromDataOntology(); + } + this.owlReasoner = rf.createReasoner(owlOntology); + this.curieUtil = curieUtil; + createMap(); + ontoEWAHStore = new EWAHKnowledgeBaseStore(classNodes.size(), individualNodes.size()); + storeInferences(); + populateLabelsFromOntology(labelMapper, owlOntology); + if (owlDataOntology != null) { + LOG.info("Fetching labels from " + owlDataOntology); + // the data ontology may contain labels of data items + populateLabelsFromOntology(labelMapper, owlDataOntology); + } + } + + public static BMKnowledgeBase create(OWLOntology owlOntology, OWLReasonerFactory rf, + CurieUtil curieUtil) { + return new BMKnowledgeBaseOWLAPIImpl(owlOntology, null, rf, curieUtil); + } + + /** + * @param owlOntology + * @param owlDataOntology + * @param rf + * @return + */ + public static BMKnowledgeBase create(OWLOntology owlOntology, OWLOntology owlDataOntology, + OWLReasonerFactory rf, CurieUtil curieUtil) { + return new BMKnowledgeBaseOWLAPIImpl(owlOntology, owlDataOntology, rf, curieUtil); + } + + + + public KBMetadata getKbMetdata() { + return kbMetdata; + } + + + + public void setKbMetdata(KBMetadata kbMetdata) { + this.kbMetdata = kbMetdata; + } + + private String getShortForm(IRI iri) { + if (curieUtil.getCurieMap().isEmpty()) { + return iri.toString(); + } else { + Optional curie = curieUtil.getCurie(iri.toString()); + if (curie.isPresent()) { + return curie.get(); + } + else { + return iri.toString(); + } + } + } + + private void populateLabelsFromOntology(LabelMapper labelMapper, OWLOntology ontology) { + LOG.info("Populating labels from " + ontology); + int n = 0; + for (OWLAnnotationAssertionAxiom aaa : ontology.getAxioms(AxiomType.ANNOTATION_ASSERTION)) { + if (aaa.getProperty().isLabel()) { + if (aaa.getSubject() instanceof IRI && aaa.getValue() instanceof OWLLiteral) { + labelMapper.add(getShortForm((IRI) aaa.getSubject()), + ((OWLLiteral) aaa.getValue()).getLiteral()); + n++; + } + } + } + if (n == 0) { + LOG.info("Setting labels from fragments"); + Set objs = new HashSet(); + objs.addAll(ontology.getClassesInSignature()); + objs.addAll(ontology.getIndividualsInSignature()); + for (OWLNamedObject obj : objs) { + labelMapper.add(getShortForm(obj.getIRI()), obj.getIRI().getFragment()); + n++; + } + } + LOG.info("Label axioms mapped: " + n); + } + + /** + * @return utility object to map labels to ids + */ + public LabelMapper getLabelMapper() { + return labelMapper; + } + + /** + * @return set of all classes + */ + public Set getClassesInSignature() { + return classToNodeMap.keySet(); // TODO - consider optimizing + } + + /** + * @return set of all class identifiers + */ + public Set getClassIdsInSignature() { + Set ids = new HashSet(); + for (OWLClass i : getClassesInSignature()) { + ids.add(getShortForm(i.getIRI())); + } + return ids; + } + + public Set getClassIdsByOntology(String ont) { + return getClassIdsInSignature().stream().filter(x -> isIn(x, ont)).collect(Collectors.toSet()); + } + + /** + * @param id + * @param ont + * @return true if id is in ontology + */ + public boolean isIn(String id, String ont) { + // TODO - use curie util + return id.startsWith(ont+":") || id.contains("/"+ont+"_"); + } + + public int getNumClassNodes() { + return classNodeArray.length; + } + + + + /** + * @return set of all individual identifiers + */ + protected Set getIndividualsInSignature() { + return individualsInSignature; + } + + /** + * @return ids + */ + public Set getIndividualIdsInSignature() { + Set ids = new HashSet(); + for (OWLNamedIndividual i : getIndividualsInSignature()) { + ids.add(getShortForm(i.getIRI())); + } + return ids; + } + + + + /** + * @return OWLAPI representation of the ontology + */ + protected OWLOntology getOwlOntology() { + return owlOntology; + } + + // Assumption: data ontology includes ObjectPropertyAssertions + // TODO: make flexible + // TODO: extract associations + private void translateFromDataOntology() { + // TODO: allow other axiom types + for (OWLObjectPropertyAssertionAxiom opa : owlDataOntology + .getAxioms(AxiomType.OBJECT_PROPERTY_ASSERTION)) { + OWLIndividual obj = opa.getObject(); + if (obj instanceof OWLNamedIndividual) { + OWLClass type = getOWLDataFactory().getOWLClass(((OWLNamedIndividual) obj).getIRI()); + OWLClassAssertionAxiom ca = + getOWLDataFactory().getOWLClassAssertionAxiom(type, opa.getSubject()); + owlOntology.getOWLOntologyManager().addAxiom(owlOntology, ca); + } + } + } + + + // Each OWLClass and OWLIndividual is mapped to an Integer index + private void createMap() { + LOG.info("Creating mapping from ontology objects to integers"); + classNodes = new HashSet>(); + individualNodes = new HashSet>(); + Set classesInSignature; + classesInSignature = owlOntology.getClassesInSignature(true); + LOG.info("|classes|=" + classesInSignature.size()); + classesInSignature.add(getOWLThing()); + classesInSignature.remove(getOWLNothing()); + individualsInSignature = owlOntology.getIndividualsInSignature(true); + LOG.info("|individuals|=" + individualsInSignature.size()); + classToNodeMap = new HashMap>(); + individualToNodeMap = new HashMap>(); + classNodeToIntegerMap = new HashMap, Integer>(); + individualNodeToIntegerMap = new HashMap, Integer>(); + propertyValueMapMap = new HashMap>>(); + final HashMap, Integer> classNodeToFrequencyMap = + new HashMap, Integer>(); + final HashMap, Double> classNodeToFreqDepthMap = + new HashMap, Double>(); + for (OWLClass c : classesInSignature) { + if (owlReasoner.getInstances(c, false).isEmpty()) { + // TODO: deal with subclasses + // LOG.info("Skipping non-instantiated class: "+c); + // continue; + } + Node node = owlReasoner.getEquivalentClasses(c); + if (node.contains(getOWLNothing())) { + LOG.warn("Ignoring unsatisfiable class: " + c); + continue; + } + classNodes.add(node); + classToNodeMap.put(c, node); + int numAncNodes = owlReasoner.getSuperClasses(c, false).getNodes().size(); + int freq = owlReasoner.getInstances(c, false).getNodes().size(); + classNodeToFrequencyMap.put(node, freq); + + // freq depth is inversely correlated informativeness; + // frequency is primary measure (high freq = low informativeness); + // if frequency is tied, then tie is broken by number of ancestors + // (high ancestors = high informativeness) + // note that if frequency is not tied, then depth/ancestors should make + // no overall difference - we ensure this by taking the proportion of + // ancestor nodes divided by number of classes (there are always equal + // or more classes than nodes) + double freqDepth = freq + 1 - (numAncNodes / (double) classesInSignature.size()); + // LOG.info("freqDepth = "+freq+" "+freqDepth); + classNodeToFreqDepthMap.put(node, freqDepth); + } + + for (OWLNamedIndividual i : individualsInSignature) { + Node node = owlReasoner.getSameIndividuals(i); + individualNodes.add(node); + individualToNodeMap.put(i, node); + setPropertyValues(owlOntology, i); + if (owlDataOntology != null) + setPropertyValues(owlDataOntology, i); + } + + // Order class nodes such that LOW frequencies (HIGH Information Content) + // nodes are have LOWER indices + // TODO: use depth as a tie breaker + List> classNodesSorted = new ArrayList>(classNodes); + Collections.sort(classNodesSorted, new Comparator>() { + public int compare(Node n1, Node n2) { + double f1 = classNodeToFreqDepthMap.get(n1); + double f2 = classNodeToFreqDepthMap.get(n2); + if (f1 < f2) + return -1; + if (f1 > f2) + return 1; + return 0; + } + }); + int numClassNodes = classNodesSorted.size(); + classNodeArray = classNodesSorted.toArray(new Node[numClassNodes]); + individualCountPerClassArray = new int[numClassNodes]; + for (int i = 0; i < numClassNodes; i++) { + classNodeToIntegerMap.put(classNodeArray[i], i); + // LOG.info(classNodeArray[i] + " ix="+i + " + // FREQ="+classNodeToFrequencyMap.get(classNodeArray[i])); + // LOG.info(classNodeArray[i] + " ix="+i + " + // IX_REV="+classNodeToIntegerMap.get(classNodeArray[i])); + individualCountPerClassArray[i] = classNodeToFrequencyMap.get(classNodeArray[i]); + } + individualNodeArray = individualNodes.toArray(new Node[individualNodes.size()]); + for (int i = 0; i < individualNodes.size(); i++) { + individualNodeToIntegerMap.put(individualNodeArray[i], i); + } + + } + + + private void setPropertyValues(OWLOntology ont, OWLNamedIndividual i) { + Preconditions.checkNotNull(i); + Map> pvm = new HashMap>(); + String id = getShortForm(i.getIRI()); + propertyValueMapMap.put(id, pvm); + for (OWLIndividualAxiom ax : ont.getAxioms(i)) { + if (ax instanceof OWLPropertyAssertionAxiom) { + OWLPropertyAssertionAxiom paa = (OWLPropertyAssertionAxiom) ax; + OWLPropertyExpression p = paa.getProperty(); + if (p instanceof OWLObjectProperty || p instanceof OWLDataProperty) { + String pid; + if (p instanceof OWLObjectProperty) + pid = getShortForm(((OWLObjectProperty) p).getIRI()); + else + pid = getShortForm(((OWLDataProperty) p).getIRI()); + OWLPropertyAssertionObject obj = paa.getObject(); + if (obj instanceof OWLLiteral) { + addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); + } else if (obj instanceof OWLNamedIndividual) { + addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); + + } + + } else if (false) { + String pid = getShortForm(((OWLDataProperty) p).getIRI()); + OWLLiteral obj = ((OWLDataPropertyAssertionAxiom) paa).getObject(); + if (obj instanceof OWLLiteral) { + addPropertyValue(pvm, pid, ((OWLLiteral) obj).getLiteral()); + } else if (obj instanceof OWLNamedIndividual) { + addPropertyValue(pvm, pid, getShortForm(((OWLNamedIndividual) obj).getIRI())); + + } + + } + } + } + + } + + + private void addPropertyValue(Map> pvm, String pid, String v) { + // LOG.debug("PV="+pid+"="+v); + if (!pvm.containsKey(pid)) + pvm.put(pid, new HashSet()); + pvm.get(pid).add(v); + } + + private void addOpposingClassPair(OWLClass c, OWLClassExpression dc) { + addOpposingClassPairAsym(c, dc); + if (!dc.isAnonymous()) + addOpposingClassPairAsym(dc.asOWLClass(), c); + } + + private void addOpposingClassPairAsym(OWLClass c, OWLClassExpression d) { + if (!opposingClassMap.containsKey(c)) + opposingClassMap.put(c, new HashSet()); + opposingClassMap.get(c).add(d); + } + + private void storeInferences() { + + + // Note: if there are any nodes containing >1 class or individual, then + // the store method is called redundantly. This is unlikely to affect performance, + // and the semantics are unchanged + for (OWLClass c : getClassesInSignature()) { + int clsIndex = getIndex(c); + // LOG.info("Storing inferences for "+c+" --> " + clsIndex); + Set sups = getIntegersForClassSet(owlReasoner.getSuperClasses(c, false)); + sups.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); + + Set subs = getIntegersForClassSet(owlReasoner.getSubClasses(c, false)); + subs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(c))); + + ontoEWAHStore.setDirectSuperClasses(clsIndex, + getIntegersForClassSet(owlReasoner.getSuperClasses(c, true))); + ontoEWAHStore.setSuperClasses(clsIndex, sups); + ontoEWAHStore.setDirectSubClasses(clsIndex, + getIntegersForClassSet(owlReasoner.getSubClasses(c, true))); + ontoEWAHStore.setSubClasses(clsIndex, subs); + + // Find all disjoint pairs plus opposing pairs + for (OWLAnnotationAssertionAxiom aaa : owlOntology.getAnnotationAssertionAxioms(c.getIRI())) { + // RO_0002604 is-opposite-of. TODO - use a vocabulary object + if (aaa.getProperty().getIRI().toString() + .equals("http://purl.obolibrary.org/obo/RO_0002604")) { + OWLAnnotationValue v = aaa.getValue(); + if (v instanceof IRI) { + IRI dciri = (IRI) v; + OWLClass dc = + owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(dciri); + addOpposingClassPair(c, dc); + + } + } + } + + for (OWLDisjointClassesAxiom dca : owlOntology.getDisjointClassesAxioms(c)) { + for (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { + addOpposingClassPair(c, dc); + } + } + + + // direct individuals are those asserted to be of type c or anything equivalent to c + Set individualInts = new HashSet(); + for (OWLClass ec : owlReasoner.getEquivalentClasses(c).getEntities()) { + for (OWLClassAssertionAxiom ax : owlOntology.getClassAssertionAxioms(ec)) { + if (ax.getIndividual().isNamed()) { + individualInts.add(getIndex(ax.getIndividual().asOWLNamedIndividual())); + } + } + } + ontoEWAHStore.setDirectIndividuals(clsIndex, individualInts); + + } + + // populate frequency-awareness map + individualToWeightedDirectTypeMap = new HashMap<>(); + for (OWLNamedIndividual i : individualsInSignature) { + int individualIndex = getIndex(i); + // LOG.info("String inferences for "+i+" --> " +individualIndex); + ontoEWAHStore.setDirectTypes(individualIndex, + getIntegersForClassSet(owlReasoner.getTypes(i, true))); + ontoEWAHStore.setTypes(individualIndex, + getIntegersForClassSet(owlReasoner.getTypes(i, false))); + + // TODO - ensure robust for equivalent individuals + Map wmap = new HashMap<>(); + individualToWeightedDirectTypeMap.put(individualIndex, wmap); + for (OWLClassAssertionAxiom caax : owlOntology.getClassAssertionAxioms(i)) { + int cix; + + // only associations to named classes + if (caax.getClassExpression().isAnonymous()) { + continue; + } + cix = getIndex(caax.getClassExpression().asOWLClass()); + + // we use reification to store probability + for (OWLAnnotation ann : caax.getAnnotations()) { + OWLAnnotationProperty prop = ann.getProperty(); + OWLAnnotationValue v = ann.getValue(); + if (v instanceof OWLLiteral) { + OWLLiteral lv = v.asLiteral().get(); + Double pr = null; + if (lv.isDouble()) { + pr = lv.parseDouble(); + } + if (lv.isFloat()) { + pr = (double) lv.parseFloat(); + } + if (pr != null) { + // TODO : decide on a vocabulary + if (prop.getIRI().toString().contains("probability")) { + wmap.put(cix, (int) (pr * 100)); + } + } + if (lv.isInteger()) { + if (prop.getIRI().toString().contains("frequency")) { + wmap.put(cix, lv.parseInteger()); + } + + } + } + } + } + + // Treat ClassAssertion( ComplementOf(c) i) as a negative assertion + Set ncs = new HashSet(); + Set ncsDirect = new HashSet(); + for (OWLClassAssertionAxiom cx : owlOntology.getClassAssertionAxioms(i)) { + // TODO: investigate efficiency - number of items set may be high + if (cx.getClassExpression() instanceof OWLObjectComplementOf) { + OWLObjectComplementOf nx = (OWLObjectComplementOf) (cx.getClassExpression()); + OWLClassExpression nc = nx.getOperand(); + ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(nc, false))); + ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); + ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(nc))); + } + } + + // Populate negative assertions from DisjointClasses axioms + for (OWLClass c : owlReasoner.getTypes(i, false).getFlattened()) { + LOG.debug("TESTING FOR DCs: " + c); + if (opposingClassMap.containsKey(c)) { + for (OWLClassExpression dc : opposingClassMap.get(c)) { + LOG.info(i + " Type: " + c + " DisjointWith: " + dc); + ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); + ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + } + } + /* + * for (OWLDisjointClassesAxiom dca : owlOntology.getDisjointClassesAxioms(c)) { for + * (OWLClassExpression dc : dca.getClassExpressionsMinus(c)) { + * LOG.info(i+" Type: "+c+" DisjointWith: "+dc); + * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); + * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + * ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); } } for + * (OWLAnnotationAssertionAxiom aaa : owlOntology.getAnnotationAssertionAxioms(c.getIRI())) + * { // RO_0002604 is-opposite-of. TODO - use a vocabulary object if + * (aaa.getProperty().getIRI().toString().equals("http://purl.obolibrary.org/obo/RO_0002604" + * )) { OWLAnnotationValue v = aaa.getValue(); if (v instanceof IRI) { IRI dciri = (IRI)v; + * OWLClass dc = owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(dciri); + * ncs.addAll(getIntegersForClassSet(owlReasoner.getSubClasses(dc, false))); + * ncs.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + * ncsDirect.add(getIndexForClassNode(owlReasoner.getEquivalentClasses(dc))); + * + * } } } + */ + } + + ontoEWAHStore.setNegatedTypes(individualIndex, ncs); // TODO - determine if storing all + // inferred negated types is too + // inefficient + ontoEWAHStore.setDirectNegatedTypes(individualIndex, ncsDirect); + } + + } + + // TODO + private void storeIndividualProperties() { + for (OWLNamedIndividual i : individualsInSignature) { + for (OWLIndividualAxiom ax : owlOntology.getAxioms(i)) { + if (ax instanceof OWLObjectPropertyAssertionAxiom) { + OWLObjectPropertyExpression p = ((OWLObjectPropertyAssertionAxiom) ax).getProperty(); + } + } + } + } + + // TODO - complete this + // TODO - separate this out as it is not an OWLAPI model. Maybe sparql is overkill here? + // use sparql to query the memory model + private void storeIndividualToClassFrequencies() { + String sparql = ""; + Query query = QueryFactory.create(sparql); + Model model = null; + QueryExecution qexec = QueryExecutionFactory.create(query, model); + ResultSet results = qexec.execSelect(); + for (; results.hasNext();) { + QuerySolution soln = results.nextSolution(); + RDFNode x = soln.get("varName"); // Get a result variable by name. + Resource r = soln.getResource("VarR"); // Get a result variable - must be a resource + Literal l = soln.getLiteral("VarL"); // Get a result variable - must be a literal + } + } + + + + private Set getIntegersForClassSet(NodeSet nodeset) { + Set bits = new HashSet(); + for (Node n : nodeset.getNodes()) { + if (n.contains(getOWLNothing())) + continue; + bits.add(getIndexForClassNode(n)); + } + return bits; + } + + + private Set getIntegersForIndividualSet(NodeSet nodeset) { + Set bits = new HashSet(); + for (Node n : nodeset.getNodes()) { + bits.add(getIndexForIndividualNode(n)); + } + return bits; + } + + /** + * Each class is mapped to an integer + * + * Note that equivalent classes will be mapped to the same integer + * + * @param c + * @return integer representation of class + */ + protected int getIndex(OWLClass c) { + Preconditions.checkNotNull(c); + return getIndexForClassNode(classToNodeMap.get(c)); + } + + /** + * @param id + * @return integer representation of class with id + */ + public int getClassIndex(String id) { + Preconditions.checkNotNull(id); + return getIndex(getOWLClass(id)); + } + + /** + * @param index + * @return OWLClass Node that corresponds to this index + */ + public Node getClassNode(int index) { + return classNodeArray[index]; + } + + /** + * @param index + * @return OWLClass Node that corresponds to this index + */ + public Node getIndividualNode(int index) { + return individualNodeArray[index]; + } + + /** + * @param cix + * @return bitmap + */ + public EWAHCompressedBitmap getDirectIndividualsBM(int cix) { + return ontoEWAHStore.getDirectIndividuals(cix); + } + + @Override + public EWAHCompressedBitmap getIndividualsBM(String classId) { + return getIndividualsBM(getClassIndex(classId)); + } + + @Override + public EWAHCompressedBitmap getIndividualsBM(int classIndex) { + if (classIndex == getRootIndex()) { + EWAHCompressedBitmap indsBM = new EWAHCompressedBitmap(); + indsBM.setSizeInBits(getIndividualIdsInSignature().size(), true); + return indsBM; + } + EWAHCompressedBitmap subsBM = getSubClasses(classIndex); + EWAHCompressedBitmap indsBM = null; + // Note this implementation iterates through all subclasses + // combining individuals; it is too expensive to store all inferred inds by class + for (int subcix : subsBM.getPositions()) { + EWAHCompressedBitmap bm = getDirectIndividualsBM(subcix); + if (indsBM == null) { + indsBM = bm; + } else { + indsBM = indsBM.or(bm); + } + } + return indsBM; + } + + + /** + * Note: each index can correspond to multiple classes c1...cn if this set is an equivalence set. + * In this case the representative classId is returned + * + * @param index + * @return classId + */ + public String getClassId(int index) { + Node n = getClassNode(index); + OWLClass c = n.getRepresentativeElement(); + return getShortForm(c.getIRI()); + } + + public Set getClassIds(int index) { + Node n = getClassNode(index); + Set cids = new HashSet(); + for (OWLClass c : n.getEntities()) { + cids.add(getShortForm(c.getIRI())); + } + return cids; + } + + public Set getClassIds(EWAHCompressedBitmap bm) { + Set cids = new HashSet(); + for (int x : bm) { + Node n = getClassNode(x); + for (OWLClass c : n.getEntities()) { + cids.add(getShortForm(c.getIRI())); + } + } + return cids; + } + + + /** + * @param id + * @return integer representation of class with id + */ + public int getIndividualIndex(String id) { + Preconditions.checkNotNull(id); + return getIndex(getOWLNamedIndividual(id)); + } + + /** + * Each set of equivalent classes (a class node) is mapped to a unique integer + * + * @param n + * @return integer representation of class node + */ + protected int getIndexForClassNode(Node n) { + Preconditions.checkNotNull(n); + if (!classNodeToIntegerMap.containsKey(n)) + LOG.error("No such node: " + n); + return classNodeToIntegerMap.get(n); + } + + /** + * Each individual is mapped to an integer + * + * Note that individuals that stand in a SameAs relationship to one another will be mapped to the + * same integer + * + * @param i + * @return integer representation of individual + */ + protected int getIndex(OWLNamedIndividual i) { + return getIndexForIndividualNode(individualToNodeMap.get(i)); + } + + /** + * Each set of same individuals (an individual node) is mapped to a unique integer + * + * @param n + * @return integer representation of class node + */ + protected int getIndexForIndividualNode(Node n) { + return individualNodeToIntegerMap.get(n); + } + + + + /** + * @param c + * @return Bitmap representation of set of superclasses of c (direct and indirect) + */ + protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c) { + return ontoEWAHStore.getSuperClasses(getIndex(c)); + } + + /** + * @param c + * @return Bitmap representation of set of direct superclasses of c + */ + protected EWAHCompressedBitmap getDirectSuperClassesBM(OWLClass c) { + return ontoEWAHStore.getDirectSuperClasses(getIndex(c)); + } + + /** + * @param c + * @param isDirect + * @return Bitmap representation of set ofsuperclasses of c + */ + protected EWAHCompressedBitmap getSuperClassesBM(OWLClass c, boolean isDirect) { + return ontoEWAHStore.getSuperClasses(getIndex(c), isDirect); + } + + /** + * @param clsSet + * @return union of all superClasses (direct and indirect) of any input class + */ + protected EWAHCompressedBitmap getSuperClassesBMByOWLClassSet(Set clsSet) { + Set classIndices = new HashSet(); + for (OWLClass c : clsSet) { + classIndices.add(getIndex(c)); + } + return ontoEWAHStore.getSuperClasses(classIndices); + } + + /* (non-Javadoc) + * @see org.monarchinitiative.owlsim.kb.BMKnowledgeBase#getSuperClassesBM(com.googlecode.javaewah.EWAHCompressedBitmap) + */ + public EWAHCompressedBitmap getSuperClassesBM(EWAHCompressedBitmap classesBM) { + return ontoEWAHStore.getSuperClasses(new HashSet<>(classesBM.getPositions())); + } + + public EWAHCompressedBitmap getSuperClassesBM(String cid) { + return ontoEWAHStore.getSuperClasses(getClassIndex(cid)); + } + + public EWAHCompressedBitmap getDirectSuperClassesBM(String cid) { + return ontoEWAHStore.getDirectSuperClasses(getClassIndex(cid)); + } + + public EWAHCompressedBitmap getSuperClassesBM(int classIndex) { + return ontoEWAHStore.getSuperClasses(classIndex); + } + + public EWAHCompressedBitmap getClassesBM(Set classIds) { + EWAHCompressedBitmap bm = new EWAHCompressedBitmap(); + for (String id : classIds) { + bm.set(getClassIndex(id)); + } + return bm; + } + + + public EWAHCompressedBitmap getDirectSuperClassesBM(int classIndex) { + return ontoEWAHStore.getDirectSuperClasses(classIndex); + } + + public EWAHCompressedBitmap getSubClasses(int classIndex) { + return ontoEWAHStore.getSubClasses(classIndex); + } + + public EWAHCompressedBitmap getDirectSubClassesBM(String cid) { + return ontoEWAHStore.getDirectSubClasses(getClassIndex(cid)); + } + + public EWAHCompressedBitmap getDirectSubClassesBM(int classIndex) { + return ontoEWAHStore.getDirectSubClasses(classIndex); + } + + /** + * @param clsIds + * @return union of all subClasses (direct and indirect) of any input class + */ + public EWAHCompressedBitmap getSubClassesBM(Set clsIds) { + Set classIndices = new HashSet(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getSubClasses(classIndices); + } + + /** + * @param clsIds + * @return union of all direct subClasses of all input classes + */ + public EWAHCompressedBitmap getDirectSubClassesBM(Set clsIds) { + Set classIndices = new HashSet(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getDirectSubClasses(classIndices); + } + + + /** + * @param clsIds + * @return union of all superClasses (direct and indirect) of any input class + */ + public EWAHCompressedBitmap getSuperClassesBM(Set clsIds) { + Set classIndices = new HashSet(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getSuperClasses(classIndices); + } + + /** + * @param clsIds + * @return union of all direct superClasses of all input classes + */ + public EWAHCompressedBitmap getDirectSuperClassesBM(Set clsIds) { + Set classIndices = new HashSet(); + for (String id : clsIds) { + classIndices.add(getClassIndex(id)); + } + return ontoEWAHStore.getDirectSuperClasses(classIndices); + } + + /** + * @param i + * @return Bitmap representation of set of (direct or indirect) types of i + */ + protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i) { + return ontoEWAHStore.getTypes(getIndex(i)); + } + + /** + * @param i + * @return Bitmap representation of set of direct types of i + */ + protected EWAHCompressedBitmap getDirectTypesBM(OWLNamedIndividual i) { + return ontoEWAHStore.getDirectTypes(getIndex(i)); + } + + /** + * @param i + * @param classFilter + * @return Bitmap representation of the subset of direct types of i, which are descendants of + * classFilter + */ + protected EWAHCompressedBitmap getFilteredDirectTypesBM(OWLNamedIndividual i, OWLClass c) { + return ontoEWAHStore.getDirectTypes(getIndex(i), this.getIndex(c)); + } + + /** + * @param i + * @param isDirect + * @return Bitmap representation of set of (direct or indirect) types of i + */ + protected EWAHCompressedBitmap getTypesBM(OWLNamedIndividual i, boolean isDirect) { + return ontoEWAHStore.getTypes(getIndex(i), isDirect); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) instantiated classes + */ + public EWAHCompressedBitmap getTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getTypes(getIndividualIndex(id)); + } + + /** + * @param individualIndex + * @return bitmap representation of all (direct and indirect) instantiated classes + */ + public EWAHCompressedBitmap getTypesBM(int individualIndex) { + return ontoEWAHStore.getTypes(individualIndex); + } + + /* (non-Javadoc) + * @see org.monarchinitiative.owlsim.kb.BMKnowledgeBase#getDirectWeightedTypes(java.lang.String) + */ + public Map getDirectWeightedTypes(String id) { + int iix = getIndividualIndex(id); + return individualToWeightedDirectTypeMap.get(iix); + } + + + /** + * @param id + * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated + */ + public EWAHCompressedBitmap getNegatedTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getNegatedTypes(getIndividualIndex(id)); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) classes known to be NOT instantiated + */ + public EWAHCompressedBitmap getDirectNegatedTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getDirectNegatedTypes(getIndividualIndex(id)); + } + + + /** + * @param id + * @return bitmap representation of all (direct and indirect) instantiated classes + */ + public EWAHCompressedBitmap getDirectTypesBM(String id) { + Preconditions.checkNotNull(id); + return ontoEWAHStore.getDirectTypes(getIndividualIndex(id)); + } + + /** + * @param id + * @return bitmap representation of all (direct and indirect) instantiated classes that are + * subclasses of classId + */ + public EWAHCompressedBitmap getFilteredDirectTypesBM(String id, String classId) { + Preconditions.checkNotNull(id); + Preconditions.checkNotNull(classId); + return ontoEWAHStore.getDirectTypes(getIndividualIndex(id), getClassIndex(classId)); + } + + + + private OWLClass getOWLThing() { + return getOWLDataFactory().getOWLThing(); + } + + private OWLClass getOWLNothing() { + return getOWLDataFactory().getOWLNothing(); + } + + private OWLDataFactory getOWLDataFactory() { + return owlOntology.getOWLOntologyManager().getOWLDataFactory(); + } + + + /** + * @param obj + * @return CURIE-style identifier + */ + protected String getIdentifier(OWLNamedObject obj) { + return obj.getIRI().toString(); + } + + /** + * @param id CURIE-style + * @return OWLAPI Class object + */ + protected OWLClass getOWLClass(String id) { + Preconditions.checkNotNull(id); + if (curieUtil.getCurieMap().isEmpty()) { + return getOWLClass(IRI.create(id)); + } else { + return getOWLClass(IRI.create(curieUtil.getIri(id).or(id))); + } + } + + /** + * @param iri + * @return OWLAPI Class object + */ + protected OWLClass getOWLClass(IRI iri) { + return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(iri); + } + + /** + * @param iri + * @return OWLAPI Class object + */ + protected OWLNamedIndividual getOWLNamedIndividual(IRI iri) { + return owlOntology.getOWLOntologyManager().getOWLDataFactory().getOWLNamedIndividual(iri); + } + + /** + * @param id CURIE-style + * @return OWLAPI Class object + */ + public OWLNamedIndividual getOWLNamedIndividual(String id) { + Preconditions.checkNotNull(id); + if (curieUtil.getCurieMap().isEmpty()) { + return getOWLNamedIndividual(IRI.create(id)); + } else { + return getOWLNamedIndividual(IRI.create(curieUtil.getIri(id).or(id))); + } + } + + public Attribute getAttribute(String id) { + Preconditions.checkNotNull(id); + String label = labelMapper.getArbitraryLabel(id); + return new Attribute(id, label); + } + + public Entity getEntity(String id) { + Preconditions.checkNotNull(id); + String label = labelMapper.getArbitraryLabel(id); + return new Entity(id, label); + } + + public int[] getIndividualCountPerClassArray() { + return individualCountPerClassArray; + } + + + + @Override + public Map> getPropertyValueMap(String individualId) { + return propertyValueMapMap.get(individualId); + } + + @Override + public Set getPropertyValues(String individualId, String property) { + Map> m = getPropertyValueMap(individualId); + if (m.containsKey(property)) + return new HashSet(m.get(property)); + else + return Collections.emptySet(); + } + + public EWAHCompressedBitmap[] getStoredDirectSubClassIndex() { + return ontoEWAHStore.getStoredDirectSubClasses(); + } + + @Override + public int getRootIndex() { + return getIndex(getOWLThing()); + } + + + + @Override + public String getIndividualId(int index) { + Node n = getIndividualNode(index); + OWLNamedIndividual ind = n.getRepresentativeElement(); + return getShortForm(ind.getIRI()); + } + + + + @Override + public EWAHCompressedBitmap getFilteredTypesBM(Set ids, String classId) { + + Set classBits = new HashSet(); + for (String id : ids) { + classBits.add(this.getClassIndex(id)); + } + + return ontoEWAHStore.getTypes(classBits, getClassIndex(classId)); + + } + + + public EWAHCompressedBitmap getFilteredDirectTypesBM(Set classIds, String classId) { + + Set classBits = new HashSet(); + for (String id : classIds) { + classBits.add(this.getClassIndex(id)); + } + + return ontoEWAHStore.getDirectTypes(classBits, getClassIndex(classId)); + + } + + } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java index 9318ede..041b20a 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java @@ -109,10 +109,13 @@ protected void load(String fn, String... ontfns) throws OWLOntologyCreationExcep kb = loader.createKnowledgeBaseInterface(); } - protected void loadSimplePhenoWithNegation() throws OWLOntologyCreationException { - load("simple-pheno-with-negation.owl"); - - } + protected void loadSimplePhenoWithNegation() throws OWLOntologyCreationException { + load("simple-pheno-with-negation.owl"); + } + + protected void loadSimplePhenoWithFrequency() throws OWLOntologyCreationException { + load("simple-pheno-with-freqs.owl"); + } @Deprecated protected void search(ProfileMatcher profileMatcher, @@ -188,5 +191,17 @@ protected boolean isRankedLast(String matchId, MatchSet matchSet) { LOG.info("Rank of match "+matchId+" is "+matchRank+" which is last or joint last"); return true; } + + protected boolean isRankedAt(String matchId, MatchSet matchSet, int expectedRank) { + int matchRank = 0; + for (Match m : matchSet.getMatches()) { + int rank = m.getRank(); + + if (m.getMatchId().equals(matchId)) { + return (rank == expectedRank); + } + } + return false; + } } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java index b83ab08..5c379b2 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/NaiveBayesFixedWeightTwoStateProfileMatcherTest.java @@ -80,6 +80,59 @@ public void testExamplePositiveOnly() throws Exception { } + @Test + public void testFrequencyAware() throws Exception { + loadSimplePhenoWithFrequency(); + //LOG.info("INDS="+kb.getIndividualIdsInSignature()); + ProfileMatcher profileMatcher = createProfileMatcher(kb); + ((NaiveBayesFixedWeightTwoStateProfileMatcher) profileMatcher).setkLeastFrequent(3); + + Assert.assertTrue(kb.getIndividualIdsInSignature().size() > 0); + + int nOk = 0; + for (String i : kb.getIndividualIdsInSignature()) { + + ProfileQuery pq = profileMatcher.createPositiveProfileQuery(i); + TestQuery tq = new TestQuery(pq, i, 4); // self should always be ranked first + String fn = i.replaceAll(".*/", ""); + eval.writeJsonTo("target/naivebfreq-test-results-"+fn+".json"); + Assert.assertTrue(eval.evaluateTestQuery(profileMatcher, tq)); + + if (i.equals("http://x.org/ind-dec-all")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-no-brain-phenotype", tq.matchSet)); + nOk++; + } + if (i.equals("http://x.org/ind-big-heart-small-brain")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-big-femur", tq.matchSet)); + + // targets with frequency + Assert.assertTrue(isRankedAt("http://x.org/fplus-big-heart-small-brain", tq.matchSet, 2)); + Assert.assertTrue(isRankedAt("http://x.org/f0-big-heart-small-brain", tq.matchSet, 3)); + Assert.assertTrue(isRankedAt("http://x.org/fminus-big-heart-small-brain", tq.matchSet, 4)); + nOk++; + } + if (i.equals("http://x.org/ind-small-heart-big-brain")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-big-femur", tq.matchSet)); + + // targets with frequency + Assert.assertTrue(isRankedAt("http://x.org/fminus-big-heart-small-brain", tq.matchSet, 2)); + Assert.assertTrue(isRankedAt("http://x.org/f0-big-heart-small-brain", tq.matchSet, 3)); + Assert.assertTrue(isRankedAt("http://x.org/fplus-big-heart-small-brain", tq.matchSet, 4)); + nOk++; + } + if (i.equals("http://x.org/ind-unstated-phenotype")) { + //Assert.assertTrue(isRankedLast("http://x.org/ind-no-phenotype", tq.matchSet)); + //temporarily removed the no-phenotype individual from test; auto-pass this for now + nOk++; + } + if (i.equals("http://x.org/ind-no-brain-phenotype")) { + Assert.assertTrue(isRankedLast("http://x.org/ind-inc-all", tq.matchSet)); + nOk++; + } + + } + Assert.assertEquals(5, nOk); + } } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java index a906c89..bdb2a72 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/PhenodigmICProfileMatcherTest.java @@ -75,17 +75,16 @@ public void testCompareProfileFile() throws Exception { Set tcids = kb.getClassIds(kb.getDirectTypesBM(j)); ProfileQuery tp = profileMatcher.createProfileQueryFromClasses(tcids, null); - String fn = i.replaceAll(".*/", ""); + String fn = i.replaceAll(".*/", ""); //eval.writeJsonTo("target/pdgm-test-results-"+fn+".json"); Match pairMatch = profileMatcher.compareProfilePair(qp, tp); // note: scores may deiverge slightly; this is because // disjointness axioms are used for to populate negative class // assertions for individuals at KB creation time - System.out.println("COMPARING: "+i+" -vs- "+j); - System.out.println(pairMatch); - System.out.println(match); - System.out.println("---"); + LOG.debug("COMPARING: "+i+" -vs- "+j); + LOG.debug(pairMatch); + LOG.debug(match); } } diff --git a/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl b/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl new file mode 100644 index 0000000..2b5850a --- /dev/null +++ b/owlsim-core/src/test/resources/simple-pheno-with-freqs.owl @@ -0,0 +1,282 @@ +Prefix: : +Prefix: dc: +Prefix: owl: +Prefix: rdf: +Prefix: rdfs: +Prefix: xml: +Prefix: xsd: +Prefix: x: + + +Ontology: + + +AnnotationProperty: x:probability + + +Datatype: xsd:double + + +Class: absent-heart + + SubClassOf: + hypoplastic-heart + + +Class: bone-length + + SubClassOf: + bone-morphology + + +Class: bone-morphology + + SubClassOf: + skeletal-phenotype + + +Class: bone-shape + + SubClassOf: + bone-morphology + + +Class: brain-morphology + + SubClassOf: + neuro-phenotype + + +Class: brain-shape + + SubClassOf: + brain-morphology + + +Class: brain-size + + SubClassOf: + brain-morphology + + +Class: circulatory-phenotype + + SubClassOf: + phenotype + + +Class: dec-bone-length + + SubClassOf: + bone-length + + + +Class: dec-brain-size + + SubClassOf: + brain-size + + + +Class: dec-femur-length + + SubClassOf: + dec-bone-length + + + +Class: heart-morphology + + SubClassOf: + circulatory-phenotype + + +Class: heart-shape + + SubClassOf: + heart-morphology + + +Class: heart-size + + SubClassOf: + heart-morphology + + +Class: hyperplastic-heart + + SubClassOf: + heart-size + + + +Class: hypoplastic-heart + + SubClassOf: + heart-size + + + +Class: inc-bone-length + + SubClassOf: + bone-length + + + +Class: inc-brain-size + + SubClassOf: + brain-size + + + +Class: inc-femur-length + + SubClassOf: + inc-bone-length + + + +Class: neuro-phenotype + + SubClassOf: + phenotype + + +Class: phenotype + + +Class: skeletal-phenotype + + SubClassOf: + phenotype + + +Individual: ind-big-femur + + Types: + inc-femur-length + + +Individual: ind-big-heart-big-brain + + Types: + hyperplastic-heart, + inc-brain-size + +Individual: fplus-big-heart-small-brain + + Types: + Annotations: x:probability "0.75"^^xsd:double dec-brain-size, + Annotations: x:probability "0.25"^^xsd:double inc-brain-size, + Annotations: x:probability "0.75"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.25"^^xsd:double hypoplastic-heart + +Individual: f0-big-heart-small-brain + + Types: + Annotations: x:probability "0.5"^^xsd:double dec-brain-size, + Annotations: x:probability "0.5"^^xsd:double inc-brain-size, + Annotations: x:probability "0.5"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.5"^^xsd:double hypoplastic-heart + +Individual: fminus-big-heart-small-brain + + Types: + Annotations: x:probability "0.25"^^xsd:double dec-brain-size, + Annotations: x:probability "0.75"^^xsd:double inc-brain-size, + Annotations: x:probability "0.25"^^xsd:double hyperplastic-heart, + Annotations: x:probability "0.75"^^xsd:double hypoplastic-heart + + +Individual: ind-big-heart-small-brain + + Types: dec-brain-size, + hyperplastic-heart + + +Individual: ind-bone + + Types: + bone-morphology + + +Individual: ind-brain + + Types: + brain-morphology + + +Individual: ind-dec-all + + Types: + dec-bone-length, + dec-brain-size, + hypoplastic-heart + + +Individual: ind-heart-bone + + Types: + bone-morphology, + heart-morphology + + +Individual: ind-heart-brain + + Types: + brain-morphology, + heart-morphology + + +Individual: ind-heart-brain-bone + + Types: + bone-morphology, + brain-morphology, + heart-morphology + + +Individual: ind-inc-all + + Types: + hyperplastic-heart, + inc-bone-length, + inc-brain-size + + +Individual: ind-no-brain-phenotype + + Types: + phenotype, + not (brain-morphology) + + +Individual: ind-small-femur + + Types: + dec-femur-length + + +Individual: ind-small-heart-big-brain + + Types: + hypoplastic-heart, + inc-brain-size + + +Individual: ind-small-heart-small-brain + + Types: + dec-brain-size, + hypoplastic-heart + + +Individual: ind-unstated-phenotype + + Types: + phenotype + + diff --git a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java index 429aa97..e6b76c7 100644 --- a/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java +++ b/owlsim-services/src/main/java/org/monarchinitiative/owlsim/services/modules/KnowledgeBaseModule.java @@ -14,6 +14,11 @@ import javax.inject.Singleton; import org.apache.commons.validator.routines.UrlValidator; +import org.monarchinitiative.owlsim.compute.classmatch.ClassMatcher; +import org.monarchinitiative.owlsim.compute.enrich.impl.HypergeometricEnrichmentEngine; +import org.monarchinitiative.owlsim.compute.matcher.impl.BayesianNetworkProfileMatcher; +import org.monarchinitiative.owlsim.compute.mica.MostInformativeCommonAncestorCalculator; +import org.monarchinitiative.owlsim.compute.mica.impl.MostInformativeCommonAncestorCalculatorImpl; import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; import org.monarchinitiative.owlsim.kb.impl.BMKnowledgeBaseOWLAPIImpl; import org.monarchinitiative.owlsim.services.modules.bindings.IndicatesDataTsvs; @@ -123,5 +128,25 @@ OWLOntology getDataTsvs(OWLOntologyManager manager) throws OWLOntologyCreationException, FileNotFoundException, IOException { return mergeOntologies(manager, dataTsvs); } + + @Provides + MostInformativeCommonAncestorCalculator getMostInformativeCommonAncestorCalculator(BMKnowledgeBase knowledgeBase) { + return new MostInformativeCommonAncestorCalculatorImpl(knowledgeBase); + } + + @Provides + HypergeometricEnrichmentEngine getHypergeometricEnrichmentEngine(BMKnowledgeBase knowledgeBase) { + return new HypergeometricEnrichmentEngine(knowledgeBase); + } + + @Provides + BayesianNetworkProfileMatcher getBayesianNetworkProfileMatcher(BMKnowledgeBase knowledgeBase) { + return BayesianNetworkProfileMatcher.create(knowledgeBase); + } + + @Provides + ClassMatcher getClassMatcher(BMKnowledgeBase knowledgeBase) { + return new ClassMatcher(knowledgeBase); + } }