Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| package org.monarchinitiative.owlsim.compute.cpt.impl; | |
| import java.util.HashMap; | |
| import java.util.List; | |
| import java.util.Map; | |
| import org.apache.log4j.Logger; | |
| import org.monarchinitiative.owlsim.compute.cpt.ConditionalProbabilityIndex; | |
| import org.monarchinitiative.owlsim.compute.cpt.IncoherentStateException; | |
| import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; | |
| import com.googlecode.javaewah.EWAHCompressedBitmap; | |
| /** | |
| * An implementation of {@link ConditionalProbabilityIndex} in which only two states are | |
| * possible: | |
| * | |
| * 0 = u (unknown/unobserved/off) | |
| * 1 = t (true/observed/on) | |
| * | |
| * | |
| * | |
| * @author cjm | |
| * | |
| */ | |
| //TODO: Use Abstract parent class | |
| public class TwoStateConditionalProbabilityIndex implements ConditionalProbabilityIndex { | |
| private Logger LOG = Logger.getLogger(TwoStateConditionalProbabilityIndex.class); | |
| BMKnowledgeBase kb; | |
| private char UNKNOWN = 'u'; | |
| private char ON = 't'; | |
| private char[] STATES = { UNKNOWN, ON }; | |
| Double[][] conditionalProbabilityByChildParentState; | |
| Map<Integer,Character>[][] parentStateMapByIndex; | |
| /** | |
| * @param size | |
| */ | |
| public TwoStateConditionalProbabilityIndex(int size) { | |
| super(); | |
| init(size); | |
| } | |
| /** | |
| * @param kb | |
| */ | |
| public TwoStateConditionalProbabilityIndex(BMKnowledgeBase kb) { | |
| super(); | |
| this.kb = kb; | |
| init(kb.getNumClassNodes()); | |
| } | |
| private void init(int size) { | |
| conditionalProbabilityByChildParentState = new Double[size][]; | |
| parentStateMapByIndex = | |
| (Map<Integer,Character>[][])new Map[size][]; | |
| } | |
| /** | |
| * @param kb | |
| * @return CPI | |
| */ | |
| public static ConditionalProbabilityIndex create(BMKnowledgeBase kb) { | |
| return new TwoStateConditionalProbabilityIndex(kb.getNumClassNodes()); | |
| } | |
| /** | |
| * @param size | |
| * @return CPI | |
| */ | |
| public static ConditionalProbabilityIndex create(int size) { | |
| return new TwoStateConditionalProbabilityIndex(size); | |
| } | |
| public Double getConditionalProbabilityChildIsOn(int clsIndex, int parentsState) { | |
| return conditionalProbabilityByChildParentState[clsIndex][parentsState]; | |
| } | |
| public Map<Integer, Character> getParentsToStateMapping(int clsIndex, int parentsState) { | |
| return parentStateMapByIndex[clsIndex][parentsState]; | |
| } | |
| public int getNumberOfParentStates(int clsIndex) { | |
| return parentStateMapByIndex[clsIndex] == null ? | |
| 0 : parentStateMapByIndex[clsIndex].length; | |
| } | |
| public void setConditionalProbabilityChildIsOn(int childClassIndex, int parentsState, int numStates, double cp) throws IncoherentStateException { | |
| if (conditionalProbabilityByChildParentState[childClassIndex] == null) | |
| conditionalProbabilityByChildParentState[childClassIndex] = new Double[numStates]; | |
| if (cp < 0.0) { | |
| throw new IncoherentStateException("Pr(C|Parents)="+cp); | |
| } | |
| if (cp > 1.0) { | |
| throw new IncoherentStateException("Pr(C|Parents)="+cp); | |
| } | |
| conditionalProbabilityByChildParentState[childClassIndex][parentsState] = cp; | |
| } | |
| public void calculateConditionalProbabilities(BMKnowledgeBase kb) throws IncoherentStateException { | |
| this.kb = kb; | |
| //int[] icpca = kb.getIndividualCountPerClassArray(); | |
| int totalInds = kb.getIndividualsBM(kb.getRootIndex()).cardinality(); | |
| LOG.info("Calculating entire CPT..."); | |
| for (String cid : kb.getClassIdsInSignature()) { | |
| LOG.debug(" Calculating CPT for "+cid); | |
| int cix = kb.getClassIndex(cid); | |
| int numIndividualsForChild = kb.getIndividualsBM(cix).cardinality(); | |
| EWAHCompressedBitmap sups = kb.getDirectSuperClassesBM(cid); | |
| List<Integer> pixs = sups.getPositions(); // ASSUME STABLE ORDERING | |
| int numParents = pixs.size(); | |
| // assume two states for now: will be extendable to yes, no, unknown | |
| int numStates = (int) Math.pow(2, numParents); | |
| if (numParents == 0) { | |
| LOG.debug("Root: "+cid); | |
| continue; | |
| } | |
| if (parentStateMapByIndex[cix] == null) | |
| parentStateMapByIndex[cix] = new Map[numStates]; | |
| for (int parentState=0; parentState<numStates; parentState++) { | |
| // Pr(C=on | P1=P1_s, ..., Pn=Pn_s) = |C| / |{ p : p in P & p=on } | | |
| Map<Integer, Character> parentStateMap = calculateParentStateMapForIndex(parentState, pixs); | |
| EWAHCompressedBitmap allIndsForOnParentsBM = null; | |
| for (int pix : parentStateMap.keySet()) { | |
| char state = parentStateMap.get(pix); | |
| if (state == ON) { | |
| EWAHCompressedBitmap indsBM = kb.getIndividualsBM(pix); | |
| if (allIndsForOnParentsBM == null) | |
| allIndsForOnParentsBM = indsBM; | |
| else | |
| allIndsForOnParentsBM = allIndsForOnParentsBM.and(indsBM); | |
| } | |
| } | |
| int numIndividualsForOnParents = | |
| allIndsForOnParentsBM == null ? | |
| totalInds : allIndsForOnParentsBM.cardinality(); | |
| // in any given corpus, there may be unseen classes, which can lead to 0/0=Nan | |
| // we therefore boost the population by making 9 additional 'clones' of any | |
| double conditionalProbability = | |
| (10*numIndividualsForChild+1) / (double) (10*numIndividualsForOnParents+1); | |
| LOG.debug(" CP for "+parentStateMap+" = "+numIndividualsForChild+"/"+numIndividualsForOnParents+" = "+conditionalProbability); | |
| setConditionalProbabilityChildIsOn(cix, parentState, | |
| numStates, conditionalProbability); | |
| parentStateMapByIndex[cix][parentState] = parentStateMap; | |
| } | |
| } | |
| LOG.info("DONE Calculating CPT"); | |
| } | |
| private Map<Integer,Character> calculateParentStateMapForIndex(int parentState, | |
| List<Integer> parentIxs) { | |
| int numStateTypes = STATES.length; | |
| Map<Integer,Character> parentStateMap = new HashMap<Integer,Character>(); | |
| for (int i=0; i<parentIxs.size(); i++) { | |
| int mod = parentState % numStateTypes; | |
| Integer p = parentIxs.get(i); | |
| parentStateMap.put(p, STATES[mod]); | |
| parentState = parentState / numStateTypes; | |
| } | |
| return parentStateMap; | |
| } | |
| } |