Skip to content
Browse files

Merge branch 'master' of https://github.com/wonlay/bobo

  • Loading branch information...
2 parents b715450 + 475f45d commit 90c16234aca55849abf474c160ba37d57a16ab92 Volodymyr Zhabiuk committed Oct 21, 2012
Showing with 838 additions and 244 deletions.
  1. +2 −2 bobo-browse/pom.xml
  2. +26 −0 bobo-browse/src/main/java/com/browseengine/bobo/api/BoboIndexReader.java
  3. +2 −1 bobo-browse/src/main/java/com/browseengine/bobo/api/ComparatorFactory.java
  4. +4 −7 bobo-browse/src/main/java/com/browseengine/bobo/facets/CombinedFacetAccessible.java
  5. +2 −1 bobo-browse/src/main/java/com/browseengine/bobo/facets/FacetCountCollector.java
  6. +2 −1 bobo-browse/src/main/java/com/browseengine/bobo/facets/attribute/AttributesFacetCountCollector.java
  7. +19 −17 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/BucketFacetCountCollector.java
  8. +5 −4 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/CompactMultiValueFacetHandler.java
  9. +9 −8 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultDoubleFacetIterator.java
  10. +24 −22 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultFacetCountCollector.java
  11. +6 −5 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultFacetIterator.java
  12. +9 −8 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultFloatFacetIterator.java
  13. +10 −8 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultIntFacetIterator.java
  14. +9 −8 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultLongFacetIterator.java
  15. +9 −8 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultShortFacetIterator.java
  16. +5 −3 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/FacetHitcountComparatorFactory.java
  17. +2 −1 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/FacetValueComparatorFactory.java
  18. +15 −12 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/GeoFacetCountCollector.java
  19. +9 −7 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/GeoSimpleFacetCountCollector.java
  20. +28 −26 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/HistogramFacetHandler.java
  21. +2 −1 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/MultiValueFacetHandler.java
  22. +2 −1 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/MultiValuedPathFacetCountCollector.java
  23. +16 −13 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/PathFacetCountCollector.java
  24. +26 −23 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/RangeFacetCountCollector.java
  25. +11 −5 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/SimpleFacetHandler.java
  26. +17 −14 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/SimpleGroupbyFacetHandler.java
  27. +3 −2 bobo-browse/src/main/java/com/browseengine/bobo/facets/statistics/FacetCountStatisicsGenerator.java
  28. +25 −0 bobo-browse/src/main/java/com/browseengine/bobo/sort/SortCollector.java
  29. +11 −11 bobo-browse/src/main/java/com/browseengine/bobo/sort/SortCollectorImpl.java
  30. +18 −0 bobo-browse/src/main/java/com/browseengine/bobo/util/BigIntArray.java
  31. +66 −0 bobo-browse/src/main/java/com/browseengine/bobo/util/BigNestedIntArray.java
  32. +12 −8 bobo-browse/src/main/java/com/browseengine/bobo/util/BigSegmentedArray.java
  33. +260 −0 bobo-browse/src/main/java/com/browseengine/bobo/util/LazyBigIntArray.java
  34. +11 −4 bobo-browse/src/test/java/com/browseengine/bobo/test/BoboFacetIteratorTest.java
  35. +8 −6 bobo-browse/src/test/java/com/browseengine/bobo/test/BoboTestCase.java
  36. +147 −0 bobo-browse/src/test/java/com/browseengine/bobo/util/test/BigSegmentedArrayTest.java
  37. +0 −1 bobo-browse/src/test/java/com/browseengine/bobo/util/test/SparseFloatArrayTest.java
  38. +2 −2 bobo-contrib/pom.xml
  39. +1 −1 bobo-parent/pom.xml
  40. +1 −1 bobo-solr/pom.xml
  41. +1 −1 cardemo/pom.xml
  42. +1 −1 pom.xml
View
4 bobo-browse/pom.xml
@@ -17,7 +17,7 @@
<parent>
<groupId>com.browseengine.bobo</groupId>
<artifactId>bobo-parent</artifactId>
- <version>3.1.1-SNAPSHOT</version>
+ <version>3.1.2-SNAPSHOT</version>
<relativePath>../bobo-parent/pom.xml</relativePath>
</parent>
@@ -37,7 +37,7 @@
<version>2.5</version>
<configuration>
<excludes>
- <exclude>com/browseengine/bobo/util/test/*.java</exclude>
+ <exclude>com/browseengine/bobo/util/test/SparseFloatArrayTest.java</exclude>
<exclude>com/browseengine/bobo/test/TestPathMultiVal.java</exclude>
</excludes>
</configuration>
View
26 bobo-browse/src/main/java/com/browseengine/bobo/api/BoboIndexReader.java
@@ -734,6 +734,32 @@ public void dumpFields(File outFile) throws IOException
return _runtimeFacetHandlerFactoryMap;
}
+ /**
+ * @return the map of RuntimeFacetHandlers
+ */
+ public Map<String, RuntimeFacetHandler<?>> getRuntimeFacetHandlerMap()
+ {
+ return _runtimeFacetHandlerMap.get();
+ }
+
+ /**
+ * @return the map of RuntimeFacetData
+ */
+ public Map<String, Object> getRuntimeFacetDataMap()
+ {
+ return _runtimeFacetDataMap.get();
+ }
+
+ public void setRuntimeFacetHandlerMap(Map<String, RuntimeFacetHandler<?>> map)
+ {
+ _runtimeFacetHandlerMap.set(map);
+ }
+
+ public void setRuntimeFacetDataMap(Map<String, Object> map)
+ {
+ _runtimeFacetDataMap.set(map);
+ }
+
@Override
public Document document(int docid) throws IOException
{
View
3 bobo-browse/src/main/java/com/browseengine/bobo/api/ComparatorFactory.java
@@ -2,6 +2,7 @@
import java.util.Comparator;
+import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
/**
@@ -15,7 +16,7 @@
* @param counts hit counts
* @return Comparator instance
*/
- IntComparator newComparator(FieldValueAccessor fieldValueAccessor,int[] counts);
+ IntComparator newComparator(FieldValueAccessor fieldValueAccessor,BigSegmentedArray counts);
/**
* Providers a Comparator. This is called when doing a merge across browses.
View
11 bobo-browse/src/main/java/com/browseengine/bobo/facets/CombinedFacetAccessible.java
@@ -34,9 +34,9 @@
public class CombinedFacetAccessible implements FacetAccessible
{
private static final Logger log = Logger.getLogger(CombinedFacetAccessible.class);
- private final List<FacetAccessible> _list;
- private final FacetSpec _fspec;
- private boolean _closed;
+ protected final List<FacetAccessible> _list;
+ protected final FacetSpec _fspec;
+ protected boolean _closed;
public CombinedFacetAccessible(FacetSpec fspec,List<FacetAccessible> list)
{
@@ -85,10 +85,7 @@ public int getCappedFacetCount(Object value, int cap)
{
for (FacetAccessible facetAccessor : _list)
{
- if (facetAccessor instanceof CombinedFacetAccessible)
- sum += ((CombinedFacetAccessible)facetAccessor).getCappedFacetCount(value, cap-sum);
- else
- sum += facetAccessor.getFacetHitsCount(value);
+ sum += facetAccessor.getFacetHitsCount(value);
if (sum >= cap)
return cap;
}
View
3 bobo-browse/src/main/java/com/browseengine/bobo/facets/FacetCountCollector.java
@@ -5,6 +5,7 @@
import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.FacetAccessible;
+import com.browseengine.bobo.util.BigSegmentedArray;
/**
* Collects facet counts for a given browse request
@@ -32,7 +33,7 @@
* Returns an integer array representing the distribution function of a given facet.
* @return integer array of count values representing distribution of the facet values.
*/
- int[] getCountDistribution();
+ BigSegmentedArray getCountDistribution();
/**
* Empty facet list.
View
3 ...e/src/main/java/com/browseengine/bobo/facets/attribute/AttributesFacetCountCollector.java
@@ -14,6 +14,7 @@
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.facets.data.MultiValueFacetDataCache;
import com.browseengine.bobo.facets.impl.DefaultFacetCountCollector;
+import com.browseengine.bobo.util.BigIntArray;
import com.browseengine.bobo.util.BigNestedIntArray;
public final class AttributesFacetCountCollector extends DefaultFacetCountCollector {
@@ -49,7 +50,7 @@ public final void collect(int docid) {
@Override
public final void collectAll()
{
- _count = _dataCache.freqs;
+ _count = BigIntArray.fromArray(_dataCache.freqs);
}
@Override
public List<BrowseFacet> getFacets() {
View
36 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/BucketFacetCountCollector.java
@@ -14,14 +14,16 @@
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.facets.data.TermStringList;
import com.browseengine.bobo.facets.data.TermValueList;
+import com.browseengine.bobo.util.BigSegmentedArray;
+import com.browseengine.bobo.util.LazyBigIntArray;
public class BucketFacetCountCollector implements FacetCountCollector
{
private final String _name;
private final DefaultFacetCountCollector _subCollector;
private final FacetSpec _ospec;
private final Map<String,String[]> _predefinedBuckets;
- private int[] _collapsedCounts;
+ private BigSegmentedArray _collapsedCounts;
private TermStringList _bucketValues;
private final int _numdocs;
@@ -46,13 +48,13 @@ protected BucketFacetCountCollector(String name, DefaultFacetCountCollector sub
_bucketValues.seal();
}
- private int[] getCollapsedCounts(){
+ private BigSegmentedArray getCollapsedCounts(){
if (_collapsedCounts==null){
- _collapsedCounts = new int[_bucketValues.size()];
+ _collapsedCounts = new LazyBigIntArray(_bucketValues.size());
FacetDataCache dataCache = _subCollector._dataCache;
TermValueList<?> subList = dataCache.valArray;
- int[] subcounts = _subCollector._count;
- BitVector indexSet = new BitVector(subcounts.length);
+ BigSegmentedArray subcounts = _subCollector._count;
+ BitVector indexSet = new BitVector(subcounts.size());
int c = 0;
int i = 0;
for (String val : _bucketValues){
@@ -62,25 +64,25 @@ protected BucketFacetCountCollector(String name, DefaultFacetCountCollector sub
for (String subVal : subVals){
int index = subList.indexOf(subVal);
if (index>0){
- int subcount = subcounts[index];
+ int subcount = subcounts.get(index);
count+=subcount;
if (!indexSet.get(index)){
indexSet.set(index);
c+=dataCache.freqs[index];
}
}
}
- _collapsedCounts[i] = count;
+ _collapsedCounts.add(i, count);
}
i++;
}
- _collapsedCounts[0] = (_numdocs-c);
+ _collapsedCounts.add(0, (_numdocs-c));
}
return _collapsedCounts;
}
// get the total count of all possible elements
- public int[] getCountDistribution()
+ public BigSegmentedArray getCountDistribution()
{
return getCollapsedCounts();
}
@@ -98,9 +100,9 @@ public BrowseFacet getFacet(String bucketValue)
return new BrowseFacet(bucketValue,0);
}
- int[] counts = getCollapsedCounts();
+ BigSegmentedArray counts = getCollapsedCounts();
- return new BrowseFacet(bucketValue,counts[index]);
+ return new BrowseFacet(bucketValue,counts.get(index));
}
public int getFacetHitsCount(Object value)
@@ -110,9 +112,9 @@ public int getFacetHitsCount(Object value)
return 0;
}
- int[] counts = getCollapsedCounts();
+ BigSegmentedArray counts = getCollapsedCounts();
- return counts[index];
+ return counts.get(index);
}
public final void collect(int docid) {
@@ -128,8 +130,8 @@ public final void collectAll()
public List<BrowseFacet> getFacets()
{
- int[] counts = getCollapsedCounts();
- return DefaultFacetCountCollector.getFacets(_ospec, counts, counts.length, _bucketValues);
+ BigSegmentedArray counts = getCollapsedCounts();
+ return DefaultFacetCountCollector.getFacets(_ospec, counts, counts.size(), _bucketValues);
}
@@ -141,8 +143,8 @@ public void close()
public FacetIterator iterator()
{
- int[] counts = getCollapsedCounts();
- return new DefaultFacetIterator(_bucketValues, counts, counts.length, true);
+ BigSegmentedArray counts = getCollapsedCounts();
+ return new DefaultFacetIterator(_bucketValues, counts, counts.size(), true);
}
}
View
9 ...browse/src/main/java/com/browseengine/bobo/facets/impl/CompactMultiValueFacetHandler.java
@@ -386,7 +386,7 @@ public final float score(int docid) {
@Override
public final void collectAll()
{
- _count = _dataCache.freqs;
+ _count = BigIntArray.fromArray(_dataCache.freqs);
_aggregated = true;
}
@@ -425,7 +425,7 @@ public int getFacetHitsCount(Object value)
return super.getFacetHitsCount(value);
}
@Override
- public int[] getCountDistribution()
+ public BigSegmentedArray getCountDistribution()
{
if(!_aggregated) aggregateCounts();
return _count;
@@ -440,7 +440,7 @@ public int getFacetHitsCount(Object value)
private void aggregateCounts()
{
- _count[0] = _noValCount;
+ _count.add(0, _noValCount);
for(int i = 1; i < _combinationCount.length; i++)
{
@@ -454,7 +454,8 @@ private void aggregateCounts()
{
if ((encoded & 0x00000001) != 0x0)
{
- _count[index + offset] += count;
+ int idx = index+offset;
+ _count.add(idx, _count.get(idx) + count);
}
index++;
encoded >>>= 1;
View
17 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultDoubleFacetIterator.java
@@ -4,6 +4,7 @@
import com.browseengine.bobo.api.DoubleFacetIterator;
import com.browseengine.bobo.facets.data.TermDoubleList;
+import com.browseengine.bobo.util.BigSegmentedArray;
/**
* @author "Xiaoyang Gu<xgu@linkedin.com>"
@@ -13,12 +14,12 @@
{
public TermDoubleList _valList;
- private int[] _count;
+ private BigSegmentedArray _count;
private int _countlength;
private int _countLengthMinusOne;
private int _index;
- public DefaultDoubleFacetIterator(TermDoubleList valList, int[] countarray, int countlength,
+ public DefaultDoubleFacetIterator(TermDoubleList valList, BigSegmentedArray countarray, int countlength,
boolean zeroBased)
{
_valList = valList;
@@ -84,7 +85,7 @@ public String next()
throw new NoSuchElementException("No more facets in this iteration");
_index++;
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return _valList.get(_index);
}
@@ -97,7 +98,7 @@ public double nextDouble()
throw new NoSuchElementException("No more facets in this iteration");
_index++;
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return facet;
}
@@ -121,10 +122,10 @@ public String next(int minHits)
{
while (++_index < _countlength)
{
- if (_count[_index] >= minHits)
+ if (_count.get(_index) >= minHits)
{
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return _valList.format(facet);
}
}
@@ -140,10 +141,10 @@ public double nextDouble(int minHits)
{
while (++_index < _countlength)
{
- if (_count[_index] >= minHits)
+ if (_count.get(_index) >= minHits)
{
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return facet;
}
}
View
46 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultFacetCountCollector.java
@@ -1,7 +1,6 @@
package com.browseengine.bobo.facets.impl;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@@ -31,40 +30,42 @@
import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
+import com.browseengine.bobo.util.LazyBigIntArray;
import com.browseengine.bobo.util.MemoryManager;
import com.browseengine.bobo.util.MemoryManagerAdminMBean;
public abstract class DefaultFacetCountCollector implements FacetCountCollector
{
private static final Logger log = Logger.getLogger(DefaultFacetCountCollector.class.getName());
protected final FacetSpec _ospec;
- public int[] _count;
+ public BigSegmentedArray _count;
+
public int _countlength;
protected FacetDataCache _dataCache;
private final String _name;
protected final BrowseSelection _sel;
protected final BigSegmentedArray _array;
private int _docBase;
- protected final LinkedList<int[]> intarraylist = new LinkedList<int[]>();
+ protected final LinkedList<BigSegmentedArray> intarraylist = new LinkedList<BigSegmentedArray>();
private Iterator _iterator;
private boolean _closed = false;
- protected static MemoryManager<int[]> intarraymgr = new MemoryManager<int[]>(new MemoryManager.Initializer<int[]>()
+ protected static MemoryManager<BigSegmentedArray> intarraymgr = new MemoryManager<BigSegmentedArray>(new MemoryManager.Initializer<BigSegmentedArray>()
{
- public void init(int[] buf)
+ public void init(BigSegmentedArray buf)
{
- Arrays.fill(buf, 0);
+ buf.fill(0);
}
- public int[] newInstance(int size)
+ public BigSegmentedArray newInstance(int size)
{
- return new int[size];
+ return new LazyBigIntArray(size);
}
- public int size(int[] buf)
+ public int size(BigSegmentedArray buf)
{
- assert buf!=null;
- return buf.length;
+ assert buf != null;
+ return buf.size();
}
});
@@ -89,16 +90,17 @@ public DefaultFacetCountCollector(String name,FacetDataCache dataCache,int docBa
_ospec = ospec;
_name = name;
_dataCache=dataCache;
+ _countlength = _dataCache.freqs.length;
+
if (_dataCache.freqs.length <= 3096)
{
- _countlength = _dataCache.freqs.length;
- _count = new int[_countlength];
+ _count = new LazyBigIntArray(_countlength);
} else
{
- _countlength = _dataCache.freqs.length;
- _count = intarraymgr.get(_countlength);//new int[_dataCache.freqs.length];
+ _count = intarraymgr.get(_countlength);
intarraylist.add(_count);
}
+
_array = _dataCache.orderArray;
_docBase = docBase;
}
@@ -121,7 +123,7 @@ public BrowseFacet getFacet(String value)
BrowseFacet facet = null;
int index=_dataCache.valArray.indexOf(value);
if (index >=0 ){
- facet = new BrowseFacet(_dataCache.valArray.get(index),_count[index]);
+ facet = new BrowseFacet(_dataCache.valArray.get(index),_count.get(index));
}
else{
facet = new BrowseFacet(_dataCache.valArray.format(value),0);
@@ -138,14 +140,14 @@ public int getFacetHitsCount(Object value)
int index=_dataCache.valArray.indexOf(value);
if (index >= 0)
{
- return _count[index];
+ return _count.get(index);
}
else{
return 0;
}
}
- public int[] getCountDistribution()
+ public BigSegmentedArray getCountDistribution()
{
return _count;
}
@@ -154,7 +156,7 @@ public FacetDataCache getFacetDataCache(){
return _dataCache;
}
- public static List<BrowseFacet> getFacets(FacetSpec ospec,int[] count, int countlength, final TermValueList<?> valList){
+ public static List<BrowseFacet> getFacets(FacetSpec ospec, BigSegmentedArray count, int countlength, final TermValueList<?> valList){
if (ospec!=null)
{
int minCount=ospec.getMinHitCount();
@@ -168,7 +170,7 @@ public FacetDataCache getFacetDataCache(){
facetColl=new ArrayList<BrowseFacet>(max);
for (int i = 1; i < countlength;++i) // exclude zero
{
- int hits=count[i];
+ int hits=count.get(i);
if (hits>=minCount)
{
BrowseFacet facet=new BrowseFacet(valList.get(i),hits);
@@ -208,7 +210,7 @@ public Object getRawValue(int index) {
for (int i=1;i<countlength;++i)
{
- int hits=count[i];
+ int hits=count.get(i);
if (hits>=minCount)
{
pq.offer(i);
@@ -218,7 +220,7 @@ public Object getRawValue(int index) {
int val;
while((val = pq.pollInt()) != forbidden)
{
- BrowseFacet facet=new BrowseFacet(valList.get(val),count[val]);
+ BrowseFacet facet=new BrowseFacet(valList.get(val),count.get(val));
((LinkedList<BrowseFacet>)facetColl).addFirst(facet);
}
}
View
11 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultFacetIterator.java
@@ -5,6 +5,7 @@
import com.browseengine.bobo.api.FacetIterator;
import com.browseengine.bobo.facets.data.TermValueList;
+import com.browseengine.bobo.util.BigSegmentedArray;
/**
* @author nnarkhed
@@ -13,12 +14,12 @@
public class DefaultFacetIterator extends FacetIterator {
private TermValueList _valList;
- private int[] _count;
+ private BigSegmentedArray _count;
private int _countlength;
private int _index;
private int _lastIndex;
- public DefaultFacetIterator(TermValueList valList, int[] counts, int countlength, boolean zeroBased)
+ public DefaultFacetIterator(TermValueList valList, BigSegmentedArray counts, int countlength, boolean zeroBased)
{
_valList = valList;
_count = counts;
@@ -45,7 +46,7 @@ public boolean hasNext() {
public Comparable next() {
_index++;
facet = (Comparable)_valList.getRawValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return format(facet);
}
@@ -63,10 +64,10 @@ public Comparable next(int minHits)
{
while(++_index < _countlength)
{
- if(_count[_index] >= minHits)
+ if(_count.get(_index) >= minHits)
{
facet = (Comparable)_valList.getRawValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return format(facet);
}
}
View
17 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultFloatFacetIterator.java
@@ -7,6 +7,7 @@
import com.browseengine.bobo.api.FloatFacetIterator;
import com.browseengine.bobo.facets.data.TermFloatList;
+import com.browseengine.bobo.util.BigSegmentedArray;
/**
* @author "Xiaoyang Gu<xgu@linkedin.com>"
@@ -16,12 +17,12 @@
{
public TermFloatList _valList;
- private int[] _count;
+ private BigSegmentedArray _count;
private int _countlength;
private int _countLengthMinusOne;
private int _index;
- public DefaultFloatFacetIterator(TermFloatList valList, int[] countarray, int countlength,
+ public DefaultFloatFacetIterator(TermFloatList valList, BigSegmentedArray countarray, int countlength,
boolean zeroBased)
{
_valList = valList;
@@ -87,7 +88,7 @@ public String next()
throw new NoSuchElementException("No more facets in this iteration");
_index++;
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return _valList.get(_index);
}
@@ -102,7 +103,7 @@ public float nextFloat()
throw new NoSuchElementException("No more facets in this iteration");
_index++;
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return facet;
}
@@ -126,10 +127,10 @@ public String next(int minHits)
{
while (++_index < _countlength)
{
- if (_count[_index] >= minHits)
+ if (_count.get(_index) >= minHits)
{
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return _valList.format(facet);
}
}
@@ -147,10 +148,10 @@ public float nextFloat(int minHits)
{
while (++_index < _countlength)
{
- if (_count[_index] >= minHits)
+ if (_count.get(_index) >= minHits)
{
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return facet;
}
}
View
18 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultIntFacetIterator.java
@@ -4,17 +4,19 @@
import com.browseengine.bobo.api.IntFacetIterator;
import com.browseengine.bobo.facets.data.TermIntList;
+import com.browseengine.bobo.util.BigIntArray;
+import com.browseengine.bobo.util.BigSegmentedArray;
public class DefaultIntFacetIterator extends IntFacetIterator
{
public TermIntList _valList;
- private int[] _count;
+ private BigSegmentedArray _count;
private int _countlength;
private int _countLengthMinusOne;
private int _index;
- public DefaultIntFacetIterator(TermIntList valList, int[] countarray, int countlength, boolean zeroBased)
+ public DefaultIntFacetIterator(TermIntList valList, BigSegmentedArray countarray, int countlength, boolean zeroBased)
{
_valList = valList;
_count = countarray;
@@ -64,7 +66,7 @@ public String next() {
throw new NoSuchElementException("No more facets in this iteration");
_index++;
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return _valList.get(_index);
}
@@ -77,7 +79,7 @@ public int nextInt()
throw new NoSuchElementException("No more facets in this iteration");
_index++;
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return facet;
}
@@ -95,10 +97,10 @@ public String next(int minHits)
{
while(++_index < _countlength)
{
- if(_count[_index] >= minHits)
+ if(_count.get(_index) >= minHits)
{
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return _valList.format(facet);
}
}
@@ -113,10 +115,10 @@ public int nextInt(int minHits)
{
while(++_index < _countlength)
{
- if(_count[_index] >= minHits)
+ if(_count.get(_index) >= minHits)
{
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return facet;
}
}
View
17 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultLongFacetIterator.java
@@ -7,6 +7,7 @@
import com.browseengine.bobo.api.LongFacetIterator;
import com.browseengine.bobo.facets.data.TermLongList;
+import com.browseengine.bobo.util.BigSegmentedArray;
/**
* @author "Xiaoyang Gu<xgu@linkedin.com>"
@@ -16,12 +17,12 @@
{
public TermLongList _valList;
- private int[] _count;
+ private BigSegmentedArray _count;
private int _countlength;
private int _countLengthMinusOne;
private int _index;
- public DefaultLongFacetIterator(TermLongList valList, int[] countarray, int countlength,
+ public DefaultLongFacetIterator(TermLongList valList, BigSegmentedArray countarray, int countlength,
boolean zeroBased)
{
_valList = valList;
@@ -87,7 +88,7 @@ public String next()
throw new NoSuchElementException("No more facets in this iteration");
_index++;
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return _valList.get(_index);
}
@@ -100,7 +101,7 @@ public long nextLong()
throw new NoSuchElementException("No more facets in this iteration");
_index++;
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return facet;
}
@@ -124,10 +125,10 @@ public String next(int minHits)
{
while (++_index < _countlength)
{
- if (_count[_index] >= minHits)
+ if (_count.get(_index) >= minHits)
{
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return _valList.format(facet);
}
}
@@ -143,10 +144,10 @@ public long nextLong(int minHits)
{
while (++_index < _countlength)
{
- if (_count[_index] >= minHits)
+ if (_count.get(_index) >= minHits)
{
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return facet;
}
}
View
17 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultShortFacetIterator.java
@@ -7,6 +7,7 @@
import com.browseengine.bobo.api.ShortFacetIterator;
import com.browseengine.bobo.facets.data.TermShortList;
+import com.browseengine.bobo.util.BigSegmentedArray;
/**
* @author "Xiaoyang Gu<xgu@linkedin.com>"
@@ -16,12 +17,12 @@
{
public TermShortList _valList;
- private int[] _count;
+ private BigSegmentedArray _count;
private int _countlength;
private int _countLengthMinusOne;
private int _index;
- public DefaultShortFacetIterator(TermShortList valList, int[] countarray, int countlength,
+ public DefaultShortFacetIterator(TermShortList valList, BigSegmentedArray countarray, int countlength,
boolean zeroBased)
{
_valList = valList;
@@ -87,7 +88,7 @@ public String next()
throw new NoSuchElementException("No more facets in this iteration");
_index++;
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return _valList.get(_index);
}
@@ -100,7 +101,7 @@ public short nextShort()
throw new NoSuchElementException("No more facets in this iteration");
_index++;
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return facet;
}
@@ -124,10 +125,10 @@ public String next(int minHits)
{
while (++_index < _countlength)
{
- if (_count[_index] >= minHits)
+ if (_count.get(_index) >= minHits)
{
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return _valList.format(facet);
}
}
@@ -143,10 +144,10 @@ public short nextShort(int minHits)
{
while (++_index < _countlength)
{
- if (_count[_index] >= minHits)
+ if (_count.get(_index) >= minHits)
{
facet = _valList.getPrimitiveValue(_index);
- count = _count[_index];
+ count = _count.get(_index);
return facet;
}
}
View
8 ...rowse/src/main/java/com/browseengine/bobo/facets/impl/FacetHitcountComparatorFactory.java
@@ -5,15 +5,17 @@
import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.ComparatorFactory;
import com.browseengine.bobo.api.FieldValueAccessor;
+import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
public class FacetHitcountComparatorFactory implements ComparatorFactory {
public IntComparator newComparator(FieldValueAccessor valueList,
- final int[] counts) {
+ final BigSegmentedArray counts) {
+
return new IntComparator(){
public int compare(Integer f1, Integer f2) {
- int val = counts[f1] - counts[f2];
+ int val = counts.get(f1) - counts.get(f2);
if (val==0)
{
val=f2-f1;
@@ -24,7 +26,7 @@ public int compare(Integer f1, Integer f2) {
// use ploymorphism to avoid auto-boxing
public int compare(int f1, int f2)
{
- int val = counts[f1] - counts[f2];
+ int val = counts.get(f1) - counts.get(f2);
if (val==0)
{
val=f2-f1;
View
3 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/FacetValueComparatorFactory.java
@@ -5,12 +5,13 @@
import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.ComparatorFactory;
import com.browseengine.bobo.api.FieldValueAccessor;
+import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
public class FacetValueComparatorFactory implements ComparatorFactory {
public IntComparator newComparator(
- FieldValueAccessor fieldValueAccessor, int[] counts) {
+ FieldValueAccessor fieldValueAccessor, BigSegmentedArray counts) {
return new IntComparator(){
public int compare(Integer o1, Integer o2) {
return o2-o1;
View
27 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/GeoFacetCountCollector.java
@@ -15,7 +15,9 @@
import com.browseengine.bobo.facets.filter.GeoFacetFilter;
import com.browseengine.bobo.facets.impl.GeoFacetHandler.GeoFacetData;
import com.browseengine.bobo.util.BigFloatArray;
+import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.GeoMatchUtil;
+import com.browseengine.bobo.util.LazyBigIntArray;
/**
* @author nnarkhed
@@ -25,7 +27,7 @@
private final String _name;
private final FacetSpec _spec;
- private int[] _count;
+ private BigSegmentedArray _count;
private int _countlength;
private GeoFacetData _dataCache;
private final TermStringList _predefinedRanges;
@@ -92,7 +94,7 @@ protected GeoFacetCountCollector(String name, GeoFacetData dataCache,
_predefinedRanges.addAll(predefinedRanges);
_docBase = docBase;
_countlength = predefinedRanges.size();
- _count = new int[_countlength];
+ _count = new LazyBigIntArray(_countlength);
_ranges = new GeoRange[predefinedRanges.size()];
int index = 0;
for(String range: predefinedRanges) {
@@ -149,7 +151,7 @@ public void collect(int docid) {
if(GeoFacetFilter.inCircle(docX, docY, docZ, targetX, targetY, targetZ, radius)) {
// if the lat, lon values of this docid match the current user-specified range, then increment the
// appropriate count[] value
- _count[countIndex]++;
+ _count.add(countIndex, _count.get(countIndex) + 1);
// do not break here, since one document could lie in multiple user-specified ranges
}
}
@@ -162,13 +164,14 @@ public void collectAll() {
/**
* @return Count distribution for all the user specified range values
*/
- public int[] getCountDistribution() {
- int[] dist = null;
+ public BigSegmentedArray getCountDistribution() {
+ BigSegmentedArray dist = null;
if(_predefinedRanges != null) {
- dist = new int[_predefinedRanges.size()];
+ dist = new LazyBigIntArray(_predefinedRanges.size());
int distIdx = 0;
- for(int count : _count) {
- dist[distIdx++] = count;
+ for (int i = 0; i < _count.size(); i++) {
+ int count = _count.get(i);
+ dist.add(distIdx++, count);
}
}
return dist;
@@ -188,7 +191,7 @@ public BrowseFacet getFacet(String value) {
int index = 0;
if((index = _predefinedRanges.indexOf(value)) != -1) {
BrowseFacet choice = new BrowseFacet();
- choice.setHitCount(_count[index]);
+ choice.setHitCount(_count.get(index));
choice.setValue(value);
return choice;
}
@@ -211,7 +214,7 @@ public int getFacetHitsCount(Object value)
int index = 0;
if((index = _predefinedRanges.indexOf(value)) != -1)
{
- return _count[index];
+ return _count.get(index);
}
else
{
@@ -235,9 +238,9 @@ public int getFacetHitsCount(Object value)
int countIndex = -1;
for(String value : _predefinedRanges) {
countIndex++;
- if(_count[countIndex] >= minHitCount) {
+ if(_count.get(countIndex) >= minHitCount) {
BrowseFacet choice = new BrowseFacet();
- choice.setHitCount(_count[countIndex]);
+ choice.setHitCount(_count.get(countIndex));
choice.setValue(value);
facets.add(choice);
}
View
16 ...-browse/src/main/java/com/browseengine/bobo/facets/impl/GeoSimpleFacetCountCollector.java
@@ -17,7 +17,9 @@
import com.browseengine.bobo.facets.data.TermStringList;
import com.browseengine.bobo.facets.filter.FacetRangeFilter;
import com.browseengine.bobo.facets.filter.GeoSimpleFacetFilter;
+import com.browseengine.bobo.util.BigIntArray;
import com.browseengine.bobo.util.BigSegmentedArray;
+import com.browseengine.bobo.util.LazyBigIntArray;
/**
* @author nnarkhed
@@ -103,10 +105,10 @@ public void collectAll() {
/* (non-Javadoc)
* @see com.browseengine.bobo.facets.FacetCountCollector#getCountDistribution()
*/
- public int[] getCountDistribution() {
- int[] dist = null;
+ public BigSegmentedArray getCountDistribution() {
+ BigSegmentedArray dist = null;
if(_latPredefinedRangeIndexes != null) {
- dist = new int[_latPredefinedRangeIndexes.length];
+ dist = new LazyBigIntArray(_latPredefinedRangeIndexes.length);
int n = 0;
int start;
int end;
@@ -117,7 +119,7 @@ public void collectAll() {
for(int i = start; i < end; i++) {
sum += _latCount[i];
}
- dist[n++] = sum;
+ dist.add(n++, sum);
}
}
return dist;
@@ -215,18 +217,18 @@ public void close()
public FacetIterator iterator() {
// each range is of the form <lat, lon, radius>
- int[] rangeCounts = new int[_latPredefinedRangeIndexes.length];
+ LazyBigIntArray rangeCounts = new LazyBigIntArray(_latPredefinedRangeIndexes.length);
for (int i=0;i<_latCount.length;++i){
if (_latCount[i] >0 ){
for (int k=0;k<_latPredefinedRangeIndexes.length;++k)
{
if (i>=_latPredefinedRangeIndexes[k][0] && i<=_latPredefinedRangeIndexes[k][1])
{
- rangeCounts[k]+=_latCount[i];
+ rangeCounts.add(k, rangeCounts.get(k) + _latCount[i]);
}
}
}
}
- return new DefaultFacetIterator(_predefinedRanges, rangeCounts, rangeCounts.length, true);
+ return new DefaultFacetIterator(_predefinedRanges, rangeCounts, rangeCounts.size(), true);
}
}
View
54 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/HistogramFacetHandler.java
@@ -26,6 +26,8 @@
import com.browseengine.bobo.facets.data.TermValueList;
import com.browseengine.bobo.facets.filter.RandomAccessFilter;
import com.browseengine.bobo.sort.DocComparatorSource;
+import com.browseengine.bobo.util.BigSegmentedArray;
+import com.browseengine.bobo.util.LazyBigIntArray;
public class HistogramFacetHandler<T extends Number> extends RuntimeFacetHandler<FacetDataNone>
{
@@ -119,7 +121,7 @@ public FacetCountCollector getFacetCountCollector(BoboIndexReader reader, int do
private final T _start;
private final T _end;
private final T _unit;
- private final int[] _count;
+ private final BigSegmentedArray _count;
private final TermValueList<?> _valArray;
private final FacetCountCollector _baseCollector;
private final String _facetName;
@@ -136,7 +138,7 @@ protected HistogramCollector(String facetName,FacetCountCollector baseCollector,
_start = start;
_end = end;
_unit = unit;
- _count = new int[countArraySize()];
+ _count = new LazyBigIntArray(countArraySize());
}
private int countArraySize()
@@ -161,7 +163,7 @@ else if(_start instanceof Integer)
/**
* not supported
*/
- public int[] getCountDistribution()
+ public BigSegmentedArray getCountDistribution()
{
if(!_isAggregated) aggregate();
return _count;
@@ -172,9 +174,9 @@ public BrowseFacet getFacet(String value)
if(!_isAggregated) aggregate();
int idx = Integer.parseInt(value);
- if(idx >= 0 && idx < _count.length)
+ if(idx >= 0 && idx < _count.size())
{
- return new BrowseFacet(value, _count[idx]);
+ return new BrowseFacet(value, _count.get(idx));
}
return null;
}
@@ -188,9 +190,9 @@ public int getFacetHitsCount(Object value)
idx = Integer.parseInt((String)value);
else
idx = ((Number)value).intValue();
- if(idx >= 0 && idx < _count.length)
+ if(idx >= 0 && idx < _count.size())
{
- return _count[idx];
+ return _count.get(idx);
}
return 0;
}
@@ -217,7 +219,7 @@ private void aggregate()
int endIdx = _valArray.indexOf(_end);
if (endIdx < 0) endIdx = -(endIdx + 1);
- int[] baseCounts = _baseCollector.getCountDistribution();
+ BigSegmentedArray baseCounts = _baseCollector.getCountDistribution();
if(_start instanceof Long)
{
long start = _start.longValue();
@@ -227,9 +229,9 @@ private void aggregate()
{
long val = valArray.getPrimitiveValue(i);
int idx = (int)((val - start) / unit);
- if(idx >= 0 && idx < _count.length)
+ if(idx >= 0 && idx < _count.size())
{
- _count[idx] += baseCounts[i];
+ _count.add(idx, _count.get(idx) + baseCounts.get(i));
}
}
}
@@ -242,9 +244,9 @@ else if(_start instanceof Integer)
{
int val = valArray.getPrimitiveValue(i);
int idx = ((val - start) / unit);
- if(idx >= 0 && idx < _count.length)
+ if(idx >= 0 && idx < _count.size())
{
- _count[idx] += baseCounts[i];
+ _count.add(idx, _count.get(idx) + baseCounts.get(i));
}
}
}
@@ -256,9 +258,9 @@ else if(_start instanceof Integer)
{
Number val = (Number)_valArray.getRawValue(i);
int idx = (int)((val.doubleValue() - start) / unit);
- if(idx >= 0 && idx < _count.length)
+ if(idx >= 0 && idx < _count.size())
{
- _count[idx] += baseCounts[i];
+ _count.add(idx, _count.get(idx) + baseCounts.get(i));
}
}
}
@@ -270,16 +272,16 @@ else if(_start instanceof Integer)
{
int minCount = _ospec.getMinHitCount();
int max = _ospec.getMaxCount();
- if (max <= 0) max = _count.length;
+ if (max <= 0) max = _count.size();
List<BrowseFacet> facetColl;
FacetSortSpec sortspec = _ospec.getOrderBy();
if (sortspec == FacetSortSpec.OrderValueAsc)
{
facetColl = new ArrayList<BrowseFacet>(max);
- for (int i = 0; i < _count.length; ++i)
+ for (int i = 0; i < _count.size(); ++i)
{
- int hits = _count[i];
+ int hits = _count.get(i);
if (hits >= minCount)
{
BrowseFacet facet = new BrowseFacet(_formatter.format(i),hits);
@@ -319,23 +321,23 @@ public void close()
public static class HistogramFacetIterator extends IntFacetIterator
{
private final DecimalFormat _formatter;
- private final int[] _count;
+ private final BigSegmentedArray _count;
private final int _maxMinusOne;
private int _idx;
- public HistogramFacetIterator(int count[], DecimalFormat formatter)
+ public HistogramFacetIterator(BigSegmentedArray count, DecimalFormat formatter)
{
_idx = -1;
_count = count;
- _maxMinusOne = count.length - 1;
+ _maxMinusOne = count.size() - 1;
_formatter = formatter;
}
public Integer next()
{
if(hasNext())
{
- count = _count[++_idx];
+ count = _count.get(++_idx);
return (facet = _idx);
}
return null;
@@ -345,9 +347,9 @@ public Integer next(int minHits)
{
while(_idx < _maxMinusOne)
{
- if(_count[++_idx] >= minHits)
+ if(_count.get(++_idx) >= minHits)
{
- count = _count[_idx];
+ count = _count.get(_idx);
return (facet = _idx);
}
}
@@ -358,7 +360,7 @@ public int nextInt()
{
if(hasNext())
{
- count = _count[++_idx];
+ count = _count.get(++_idx);
return (facet = _idx);
}
return TermIntList.VALUE_MISSING;
@@ -368,9 +370,9 @@ public int nextInt(int minHits)
{
while(_idx < _maxMinusOne)
{
- if(_count[++_idx] >= minHits)
+ if(_count.get(++_idx) >= minHits)
{
- count = _count[_idx];
+ count = _count.get(_idx);
return (facet = _idx);
}
}
View
3 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/MultiValueFacetHandler.java
@@ -35,6 +35,7 @@
import com.browseengine.bobo.query.scoring.FacetScoreable;
import com.browseengine.bobo.query.scoring.FacetTermScoringFunctionFactory;
import com.browseengine.bobo.sort.DocComparatorSource;
+import com.browseengine.bobo.util.BigIntArray;
import com.browseengine.bobo.util.BigNestedIntArray;
public class MultiValueFacetHandler extends FacetHandler<MultiValueFacetDataCache> implements FacetScoreable
@@ -303,7 +304,7 @@ public final void collect(int docid)
@Override
public final void collectAll()
{
- _count = _dataCache.freqs;
+ _count = BigIntArray.fromArray(_dataCache.freqs);
}
}
}
View
3 ...e/src/main/java/com/browseengine/bobo/facets/impl/MultiValuedPathFacetCountCollector.java
@@ -4,6 +4,7 @@
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.facets.data.MultiValueFacetDataCache;
+import com.browseengine.bobo.util.BigIntArray;
import com.browseengine.bobo.util.BigNestedIntArray;
public class MultiValuedPathFacetCountCollector extends PathFacetCountCollector {
@@ -25,6 +26,6 @@ public final void collect(int docid)
@Override
public final void collectAll()
{
- _count = _dataCache.freqs;
+ _count = BigIntArray.fromArray(_dataCache.freqs);
}
}
View
29 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/PathFacetCountCollector.java
@@ -18,16 +18,18 @@
import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.data.FacetDataCache;
+import com.browseengine.bobo.util.BigIntArray;
import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.BoundedPriorityQueue;
+import com.browseengine.bobo.util.LazyBigIntArray;
import com.browseengine.bobo.util.ListMerger;
public class PathFacetCountCollector implements FacetCountCollector
{
private static final Logger log = Logger.getLogger(PathFacetCountCollector.class.getName());
private final BrowseSelection _sel;
private final FacetSpec _ospec;
- protected int[] _count;
+ protected BigSegmentedArray _count;
private final String _name;
private final String _sep;
private final BigSegmentedArray _orderArray;
@@ -49,13 +51,13 @@
_dataCache = dataCache;
_sep = sep;
_sepArray = sep.toCharArray();
- _count=new int[_dataCache.freqs.length];
- log.info(name +": " + _count.length);
+ _count = new LazyBigIntArray(_dataCache.freqs.length);
+ log.info(name +": " + _count.size());
_orderArray = _dataCache.orderArray;
_minHitCount = ospec.getMinHitCount();
_maxCount = ospec.getMaxCount();
if (_maxCount<1){
- _maxCount = _count.length;
+ _maxCount = _count.size();
}
FacetSortSpec sortOption = ospec.getOrderBy();
switch(sortOption){
@@ -71,7 +73,7 @@
}
- public int[] getCountDistribution()
+ public BigSegmentedArray getCountDistribution()
{
return _count;
}
@@ -82,12 +84,13 @@ public String getName()
}
public void collect(int docid) {
- _count[_orderArray.get(docid)]++;
+ int i = _orderArray.get(docid);
+ _count.add(i, _count.get(i) + 1);
}
public void collectAll()
{
- _count = _dataCache.freqs;
+ _count = BigIntArray.fromArray(_dataCache.freqs);
}
public BrowseFacet getFacet(String value)
@@ -208,12 +211,12 @@ public int compare(BrowseFacet o1, BrowseFacet o2) {
String[] pathParts;
StringBuffer buf = new StringBuffer();
- for (int i=index;i<_count.length;++i){
- if (_count[i] >= minCount){
+ for (int i=index;i<_count.size();++i){
+ if (_count.get(i) >= minCount){
String path=_dataCache.valArray.get(i);
//if (path==null || path.equals(selectedPath)) continue;
- int subCount=_count[i];
+ int subCount=_count.get(i);
// do not use Java split string in a loop !
// String[] pathParts=path.split(_sep);
@@ -371,20 +374,20 @@ public FacetIterator iterator() {
String[] paths= _sel == null ? null : _sel.getValues();
if (paths==null || paths.length == 0)
{
- finalList = getFacetsForPath(null, depth, strict, Integer.MIN_VALUE, _count.length);
+ finalList = getFacetsForPath(null, depth, strict, Integer.MIN_VALUE, _count.size());
return new PathFacetIterator(finalList);
}
if (paths.length==1) {
- finalList = getFacetsForPath(paths[0],depth,strict, Integer.MIN_VALUE, _count.length);
+ finalList = getFacetsForPath(paths[0],depth,strict, Integer.MIN_VALUE, _count.size());
return new PathFacetIterator(finalList);
}
finalList=new LinkedList<BrowseFacet>();
ArrayList<Iterator<BrowseFacet>> iterList = new ArrayList<Iterator<BrowseFacet>>(paths.length);
for (String path : paths)
{
- List<BrowseFacet> subList=getFacetsForPath(path, depth, strict, Integer.MIN_VALUE, _count.length);
+ List<BrowseFacet> subList=getFacetsForPath(path, depth, strict, Integer.MIN_VALUE, _count.size());
if (subList.size() > 0)
{
iterList.add(subList.iterator());
View
49 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/RangeFacetCountCollector.java
@@ -15,14 +15,16 @@
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.facets.data.TermStringList;
import com.browseengine.bobo.facets.filter.FacetRangeFilter;
+import com.browseengine.bobo.util.BigIntArray;
import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
+import com.browseengine.bobo.util.LazyBigIntArray;
public class RangeFacetCountCollector implements FacetCountCollector
{
private final FacetSpec _ospec;
- protected int[] _count;
+ protected BigSegmentedArray _count;
private int _countlength;
private final BigSegmentedArray _array;
protected FacetDataCache _dataCache;
@@ -36,7 +38,7 @@ public RangeFacetCountCollector(String name,FacetDataCache dataCache,int docBase
_name = name;
_dataCache = dataCache;
_countlength = _dataCache.freqs.length;
- _count=new int[_countlength];
+ _count= new LazyBigIntArray(_countlength);
_array = _dataCache.orderArray;
_docBase = docBase;
_ospec=ospec;
@@ -62,12 +64,12 @@ public RangeFacetCountCollector(String name,FacetDataCache dataCache,int docBase
/**
* gets distribution of the value arrays. When predefined ranges are available, this returns distribution by predefined ranges.
*/
- public int[] getCountDistribution()
+ public BigSegmentedArray getCountDistribution()
{
- int[] dist;
+ BigSegmentedArray dist;
if (_predefinedRangeIndexes!=null)
{
- dist = new int[_predefinedRangeIndexes.length];
+ dist = new LazyBigIntArray(_predefinedRangeIndexes.length);
int n=0;
for (int[] range : _predefinedRangeIndexes)
{
@@ -77,9 +79,9 @@ public RangeFacetCountCollector(String name,FacetDataCache dataCache,int docBase
int sum = 0;
for (int i=start;i<end;++i)
{
- sum += _count[i];
+ sum += _count.get(i);
}
- dist[n++]=sum;
+ dist.add(n++, sum);
}
}
else
@@ -104,7 +106,7 @@ public BrowseFacet getFacet(String value)
int sum=0;
for (int i=range[0];i<=range[1];++i)
{
- sum+=_count[i];
+ sum+=_count.get(i);
}
facet = new BrowseFacet(value,sum);
}
@@ -119,19 +121,20 @@ public int getFacetHitsCount(Object value)
{
for (int i=range[0]; i<=range[1]; ++i)
{
- sum += _count[i];
+ sum += _count.get(i);
}
}
return sum;
}
public void collect(int docid) {
- _count[_array.get(docid)]++;
+ int i = _array.get(docid);
+ _count.add(i, _count.get(i) + 1);
}
public final void collectAll()
{
- _count = _dataCache.freqs;
+ _count = BigIntArray.fromArray(_dataCache.freqs);
_countlength = _dataCache.freqs.length;
}
@@ -204,7 +207,7 @@ void convertFacets(BrowseFacet[] facets){
int end = _predefinedRangeIndexes[k][1];
while(idx <= end)
{
- count += _count[idx++];
+ count += _count.get(idx++);
}
rangeCount[k] = count;
}
@@ -240,7 +243,7 @@ void convertFacets(BrowseFacet[] facets){
int maxNumOfFacets = _ospec.getMaxCount();
if (maxNumOfFacets <= 0 || maxNumOfFacets > _predefinedRangeIndexes.length) maxNumOfFacets = _predefinedRangeIndexes.length;
- int[] rangeCount = new int[_predefinedRangeIndexes.length];
+ BigSegmentedArray rangeCount = new LazyBigIntArray(_predefinedRangeIndexes.length);
for (int k=0;k<_predefinedRangeIndexes.length;++k)
{
@@ -249,9 +252,9 @@ void convertFacets(BrowseFacet[] facets){
int end = _predefinedRangeIndexes[k][1];
while(idx <= end)
{
- count += _count[idx++];
+ count += _count.get(idx++);
}
- rangeCount[k] = count;
+ rangeCount.add(k, count);
}
List<BrowseFacet> facetColl;
@@ -261,9 +264,9 @@ void convertFacets(BrowseFacet[] facets){
facetColl = new ArrayList<BrowseFacet>(maxNumOfFacets);
for (int k=0;k<_predefinedRangeIndexes.length;++k)
{
- if(rangeCount[k] >= minCount)
+ if(rangeCount.get(k) >= minCount)
{
- BrowseFacet choice=new BrowseFacet(_predefinedRanges.get(k), rangeCount[k]);
+ BrowseFacet choice=new BrowseFacet(_predefinedRanges.get(k), rangeCount.get(k));
facetColl.add(choice);
}
if(facetColl.size() >= maxNumOfFacets) break;
@@ -300,14 +303,14 @@ public Object getRawValue(int index) {
IntBoundedPriorityQueue pq=new IntBoundedPriorityQueue(comparator, maxNumOfFacets, forbidden);
for (int i=0; i<_predefinedRangeIndexes.length; ++i)
{
- if (rangeCount[i]>=minCount) pq.offer(i);
+ if (rangeCount.get(i)>=minCount) pq.offer(i);
}
int val;
facetColl=new LinkedList<BrowseFacet>();
while((val = pq.pollInt()) != forbidden)
{
- BrowseFacet facet=new BrowseFacet(_predefinedRanges.get(val),rangeCount[val]);
+ BrowseFacet facet=new BrowseFacet(_predefinedRanges.get(val),rangeCount.get(val));
((LinkedList<BrowseFacet>)facetColl).addFirst(facet);
}
}
@@ -350,19 +353,19 @@ public void close()
public FacetIterator iterator() {
if(_predefinedRanges != null) {
- int[] rangeCounts = new int[_predefinedRangeIndexes.length];
+ BigSegmentedArray rangeCounts = new LazyBigIntArray(_predefinedRangeIndexes.length);
for (int k=0;k<_predefinedRangeIndexes.length;++k)
{
int count = 0;
int idx = _predefinedRangeIndexes[k][0];
int end = _predefinedRangeIndexes[k][1];
while(idx <= end)
{
- count += _count[idx++];
+ count += _count.get(idx++);
}
- rangeCounts[k] += count;
+ rangeCounts.add(k, rangeCounts.get(k) + count);
}
- return new DefaultFacetIterator(_predefinedRanges, rangeCounts, rangeCounts.length, true);
+ return new DefaultFacetIterator(_predefinedRanges, rangeCounts, rangeCounts.size(), true);
}
return null;
}
View
16 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/SimpleFacetHandler.java
@@ -30,6 +30,7 @@
import com.browseengine.bobo.query.scoring.FacetScoreable;
import com.browseengine.bobo.query.scoring.FacetTermScoringFunctionFactory;
import com.browseengine.bobo.sort.DocComparatorSource;
+import com.browseengine.bobo.util.BigIntArray;
public class SimpleFacetHandler extends FacetHandler<FacetDataCache> implements FacetScoreable
{
@@ -231,11 +232,12 @@ public SimpleFacetCountCollector(String name,FacetDataCache dataCache,int docBas
}
public final void collect(int docid) {
- _count[_array.get(docid)]++;
+ int index = _array.get(docid);
+ _count.add(index, _count.get(index) + 1);
}
public final void collectAll() {
- _count = _dataCache.freqs;
+ _count = BigIntArray.fromArray(_dataCache.freqs);
}
}
@@ -250,12 +252,15 @@ public SimpleGroupByFacetCountCollector(String name,FacetDataCache dataCache,int
}
public final void collect(int docid) {
- if(++_count[_array.get(docid)] <= 1)
+ int index = _array.get(docid);
+ int newValue = _count.get(index) + 1;
+ _count.add(index, newValue);
+ if(newValue <= 1)
++_totalGroups;
}
public final void collectAll() {
- _count = _dataCache.freqs;
+ _count = BigIntArray.fromArray(_dataCache.freqs);
_totalGroups = -1;
}
@@ -265,7 +270,8 @@ public final int getTotalGroups() {
// If the user calls collectAll instead of collect, we have to collect all the groups here:
_totalGroups = 0;
- for (int c: _count) {
+ for (int i = 0; i < _count.size(); i++) {
+ int c = _count.get(i);
if (c > 0)
++_totalGroups;
}
View
31 bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/SimpleGroupbyFacetHandler.java
@@ -19,8 +19,8 @@
import com.browseengine.bobo.api.ComparatorFactory;
import com.browseengine.bobo.api.FacetIterator;
import com.browseengine.bobo.api.FacetSpec;
-import com.browseengine.bobo.api.FieldValueAccessor;
import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
+import com.browseengine.bobo.api.FieldValueAccessor;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.FacetCountCollectorSource;
import com.browseengine.bobo.facets.FacetHandler;
@@ -29,8 +29,11 @@
import com.browseengine.bobo.facets.filter.RandomAccessFilter;
import com.browseengine.bobo.sort.DocComparator;
import com.browseengine.bobo.sort.DocComparatorSource;
+import com.browseengine.bobo.util.BigIntArray;
+import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
+import com.browseengine.bobo.util.LazyBigIntArray;
public class SimpleGroupbyFacetHandler extends FacetHandler<FacetDataNone> {
private final LinkedHashSet<String> _fieldsSet;
@@ -182,7 +185,7 @@ public int compareTo(Object o) {
private final DefaultFacetCountCollector[] _subcollectors;
private final String _name;
private final FacetSpec _fspec;
- private final int[] _count;
+ private final BigSegmentedArray _count;
private final int _countlength;
private final int[] _lens;
private final int _maxdoc;
@@ -200,7 +203,7 @@ public GroupbyFacetCountCollector(String name,FacetSpec fspec,DefaultFacetCountC
totalLen*=_lens[i];
}
_countlength = totalLen;
- _count = new int[_countlength];
+ _count = new LazyBigIntArray(_countlength);
_maxdoc = maxdoc;
}
@@ -212,7 +215,7 @@ final public void collect(int docid) {
segsize = segsize / _lens[i++];
idx+=(subcollector._dataCache.orderArray.get(docid) * segsize);
}
- _count[idx]++;
+ _count.add(idx, _count.get(idx) + 1);
}
public void collectAll() {
@@ -221,7 +224,7 @@ public void collectAll() {
}
}
- public int[] getCountDistribution() {
+ public BigSegmentedArray getCountDistribution() {
return _count;
}
@@ -250,7 +253,7 @@ public BrowseFacet getFacet(String value) {
int count = 0;
for (int i = startIdx;i<startIdx+segLen;++i){
- count+=_count[i];
+ count+=_count.get(i);
}
BrowseFacet f = new BrowseFacet(buf.toString(),count);
@@ -273,7 +276,7 @@ public int getFacetHitsCount(Object value)
int count = 0;
for (int i=startIdx; i<startIdx+segLen; ++i)
- count += _count[i];
+ count += _count.get(i);
return count;
}
@@ -320,7 +323,7 @@ private final String getFacetString(int idx){
facetColl=new ArrayList<BrowseFacet>(max);
for (int i = 1; i < _countlength;++i) // exclude zero
{
- int hits=_count[i];
+ int hits=_count.get(i);
if (hits>=minCount)
{
BrowseFacet facet=new BrowseFacet(getFacetString(i),hits);
@@ -359,7 +362,7 @@ public Object getRawValue(int index) {
for (int i=1;i<_countlength;++i) // exclude zero
{
- int hits=_count[i];
+ int hits=_count.get(i);
if (hits>=minCount)
{
if(!pq.offer(i))
@@ -373,7 +376,7 @@ public Object getRawValue(int index) {
int val;
while((val = pq.pollInt()) != forbidden)
{
- BrowseFacet facet=new BrowseFacet(getFacetString(val),_count[val]);
+ BrowseFacet facet=new BrowseFacet(getFacetString(val),_count.get(val));
((LinkedList<BrowseFacet>)facetColl).addFirst(facet);
}
}
@@ -412,7 +415,7 @@ public Comparable next() {
throw new NoSuchElementException("No more facets in this iteration");
_index++;
facet = getFacetString(_index);
- count = _count[_index];
+ count = _count.get(_index);
return facet;
}
@@ -444,11 +447,11 @@ public Comparable next(int minHits)
do
{
_index++;
- }while( (_index < (_countlength-1)) && (_count[_index] < minHits) );
- if(_count[_index] >= minHits)
+ }while( (_index < (_countlength-1)) && (_count.get(_index) < minHits) );
+ if(_count.get(_index) >= minHits)
{
facet = getFacetString(_index);
- count = _count[_index];
+ count = _count.get(_index);
}
else
{
View
5 ...e/src/main/java/com/browseengine/bobo/facets/statistics/FacetCountStatisicsGenerator.java
@@ -24,6 +24,7 @@
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
import com.browseengine.bobo.facets.FacetCountCollector;
+import com.browseengine.bobo.util.BigIntArray;
public abstract class FacetCountStatisicsGenerator
{
@@ -89,7 +90,7 @@ public FacetCountStatistics generateStatistic(int[] distribution,int n)
public FacetCountStatistics generateStatistic(FacetCountCollector countHitCollector,int n)
{
- return generateStatistic(countHitCollector.getCountDistribution(),n);
+ return generateStatistic(BigIntArray.toArray(countHitCollector.getCountDistribution()),n);
}
public static void main(String[] args) throws Exception
@@ -133,7 +134,7 @@ public static void main(String[] args) throws Exception
{
System.out.println("====================================");
FacetCountCollector fc = (FacetCountCollector)f;
- int[] dist = fc.getCountDistribution();
+ int[] dist = BigIntArray.toArray(fc.getCountDistribution());