Permalink
Browse files

Nice threshold support

  • Loading branch information...
1 parent 853ae70 commit 879bfa76c673fbf26302eeb4b3de0984c686aa8e @lemire committed Jan 3, 2014
View
@@ -53,8 +53,7 @@ public static void main(final String[] args) throws java.io.IOException {
// mark as true a bit that occurs at least T times in the source
// bitmaps
//
- EWAHCompressedBitmap threshold2 = new EWAHCompressedBitmap();
- (new RunningBitmapMerge()).symmetric(new ThresholdFuncBitmap(2), threshold2,ewahBitmap1,ewahBitmap2,
+ EWAHCompressedBitmap threshold2 = EWAHCompressedBitmap.threshold(2, ewahBitmap1,ewahBitmap2,
ewahBitmap3,ewahBitmap4);
System.out.println("threshold 2 : "+threshold2);
@@ -8,6 +8,9 @@
import java.util.*;
import java.io.*;
+import com.googlecode.javaewah.symmetric.RunningBitmapMerge;
+import com.googlecode.javaewah.symmetric.ThresholdFuncBitmap;
+
/**
@@ -1130,7 +1133,7 @@ public boolean setSizeInBits(final int size, final boolean defaultvalue) {
* @return the size in bits
*/
@Override
-public int sizeInBits() {
+ public int sizeInBits() {
return this.sizeinbits;
}
@@ -1141,9 +1144,43 @@ public int sizeInBits() {
* @return the size in bytes
*/
@Override
-public int sizeInBytes() {
+ public int sizeInBytes() {
return this.actualsizeinwords * (wordinbits / 8);
}
+
+ /**
+ *
+ * Compute a Boolean threshold function: bits are true where at least T bitmaps have
+ * a true bit.
+ *
+ * @since 0.8.1
+ * @param T the threshold
+ * @param bitmaps input data
+ * @return the aggregated bitmap
+ */
+ public static EWAHCompressedBitmap threshold(final int T, final EWAHCompressedBitmap... bitmaps) {
+ final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
+ thresholdWithContainer(container,T,bitmaps);
+ return container;
+ }
+
+
+
+ /**
+ *
+ * Compute a Boolean threshold function: bits are true where at least T bitmaps have
+ * a true bit.
+ *
+ * @since 0.8.1
+ * @param T the threshold
+ * @param bitmaps input data
+ * @param container where we writethe aggregated bitmap
+ */
+ public static void thresholdWithContainer(final BitmapStorage container,final int T,
+ final EWAHCompressedBitmap... bitmaps) {
+ (new RunningBitmapMerge()).symmetric(new ThresholdFuncBitmap(T), container,bitmaps);
+ }
+
/**
* Populate an array of (sorted integers) corresponding to the location of the
@@ -1,12 +1,15 @@
package com.googlecode.javaewah.symmetric;
+import java.util.ArrayList;
import java.util.Arrays;
import com.googlecode.javaewah.BitmapStorage;
/**
* A threshold Boolean function returns true if the number of true values exceed a
* threshold. It is a symmetric Boolean function.
*
+ * This object is not thread safe: you should use one function per thread.
+ *
* @see <a href="http://en.wikipedia.org/wiki/Symmetric_Boolean_function">http://en.wikipedia.org/wiki/Symmetric_Boolean_function</a>
* @author Daniel Lemire
* @since 0.8.0
@@ -16,6 +19,7 @@
int min;
long[] buffers;
int bufferUsed;
+ ArrayList<EWAHPointer> litbuffer = new ArrayList<EWAHPointer>();
/**
* Construction a threshold function with a given threshold
@@ -38,14 +42,62 @@ public void dispatch(BitmapStorage out, int runbegin, int runend) {
out.addStreamOfEmptyWords(false, runlength);
} else {
int deficit = min - hammingWeight;
- bufferUsed = this.getNumberOfLiterals();
+ litbuffer.clear();
+ this.fillWithLiterals(litbuffer);
+ bufferUsed = litbuffer.size();
+ // trivial case where there is just one lit. word (we copy it)
+ if(bufferUsed == 1) {
+ EWAHPointer R = this.litbuffer.get(0);
+ for (int i = 0; i < runlength; ++i) {
+ out.add(R.iterator.getLiteralWordAt(i + runbegin
+ - R.beginOfRun()));
+ }
+ return;
+ }
+ // next if deficit is 1 we need to compute OR, this can be done fast
+ if(deficit == 1) {
+ long[] w = new long[runlength];
+ for (EWAHPointer R : this.litbuffer) {
+ for (int i = 0; i < runlength; ++i) {
+ w[i] |= R.iterator
+ .getLiteralWordAt(i + runbegin
+ - R.beginOfRun());
+ }
+ }
+ for (int i = 0; i < runlength; ++i) {
+ out.add(w[i]);
+ }
+ return;
+ }
+ // next if deficit is bufferUsed, we use AND
+ if(bufferUsed == deficit) {
+ long[] w = new long[runlength];
+ for (int i = 0; i < runlength; ++i) {
+ w[i] = litbuffer.get(0).iterator
+ .getLiteralWordAt(i + runbegin
+ - litbuffer.get(0).beginOfRun());
+ }
+ for(int k = 1; k < litbuffer.size(); ++k) {
+ for (int i = 0; i < runlength; ++i) {
+ w[i] &= litbuffer.get(k).iterator
+ .getLiteralWordAt(i + runbegin
+ - litbuffer.get(k).beginOfRun());
+ }
+ }
+ for (int i = 0; i < runlength; ++i) {
+ out.add(w[i]);
+ }
+
+ return;
+ }
+ // general case
if(bufferUsed > buffers.length)
buffers = Arrays.copyOf(
buffers,
2 * bufferUsed);
for (int i = 0; i < runlength; ++i) {
int p = 0;
- for (EWAHPointer R : this.getLiterals()) {
+ for (EWAHPointer R : litbuffer) {
buffers[p++] = R.iterator
.getLiteralWordAt(i + runbegin
- R.beginOfRun());
@@ -58,10 +110,10 @@ public void dispatch(BitmapStorage out, int runbegin, int runend) {
- private static int[] bufcounters = new int[64];
+ private int[] bufcounters = new int[64];
private static final int[] zeroes64 = new int[64];
- private static long threshold2buf(int T, long[] buffers, int bufUsed) {
+ private long threshold2buf(int T, long[] buffers, int bufUsed) {
long result = 0L;
int[] counters = bufcounters;
System.arraycopy(zeroes64, 0, counters, 0, 64);
@@ -80,7 +132,7 @@ private static long threshold2buf(int T, long[] buffers, int bufUsed) {
return result;
}
- private static long threshold3(int T, long[] buffers, int bufUsed) {
+ private long threshold3(int T, long[] buffers, int bufUsed) {
if (buffers.length == 0)
return 0;
long[] v = new long[T];
@@ -96,7 +148,7 @@ private static long threshold3(int T, long[] buffers, int bufUsed) {
return v[T - 1];
}
- private static long threshold4(int T, long[] buffers, int bufUsed) {
+ private long threshold4(int T, long[] buffers, int bufUsed) {
if (T >= 128)
return threshold2buf(T, buffers,bufUsed);
int B = 0;
@@ -1,6 +1,8 @@
package com.googlecode.javaewah.symmetric;
import java.util.Iterator;
+import java.util.List;
+
import com.googlecode.javaewah.datastructure.StaticBitSet;
import com.googlecode.javaewah.BitmapStorage;
@@ -34,6 +36,7 @@
public int getNumberOfLiterals() {
return litwlist.cardinality();
}
+
/**
* Goes through the literals.
@@ -70,6 +73,17 @@ public void remove() {
}
/**
+ * append to the list the literal words as EWAHPointer
+ * @param container where we write
+ */
+ public void fillWithLiterals(final List<EWAHPointer> container) {
+ for(int k = litwlist.nextSetBit(0);k >= 0;k = litwlist.nextSetBit(k + 1)) {
+ container.add(rw[k]);
+ }
+ }
+
+
+ /**
* @param newsize the number of inputs
*/
public void resize(final int newsize) {
@@ -19,19 +19,29 @@ public void basictest() {
EWAHCompressedBitmap ewah1 = EWAHCompressedBitmap.bitmapOf(1,53,110,1000, 1201,50000);
EWAHCompressedBitmap ewah2 = EWAHCompressedBitmap.bitmapOf(1,100,1000,1100,1200,31416,50001);
EWAHCompressedBitmap ewah3 = EWAHCompressedBitmap.bitmapOf(1,110,1000,1101,1200, 1201,31416, 31417);
- EWAHCompressedBitmap ewahorth = new EWAHCompressedBitmap();
- (new RunningBitmapMerge()).symmetric(new ThresholdFuncBitmap(1), ewahorth, ewah1, ewah2, ewah3);
+ Assert.assertTrue(EWAHCompressedBitmap.threshold(1, ewah1).equals(ewah1));
+ Assert.assertTrue(EWAHCompressedBitmap.threshold(1, ewah2).equals(ewah2));
+ Assert.assertTrue(EWAHCompressedBitmap.threshold(1, ewah3).equals(ewah3));
+ Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah1, ewah1).equals(ewah1));
+ Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah2, ewah2).equals(ewah2));
+ Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah3, ewah3).equals(ewah3));
+
+ EWAHCompressedBitmap zero = new EWAHCompressedBitmap();
+ Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah1).equals(zero));
+ Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah2).equals(zero));
+ Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah3).equals(zero));
+ Assert.assertTrue(EWAHCompressedBitmap.threshold(4, ewah1, ewah2, ewah3).equals(zero));
+
+ EWAHCompressedBitmap ewahorth = EWAHCompressedBitmap.threshold(1, ewah1, ewah2, ewah3);
EWAHCompressedBitmap ewahtrueor = EWAHCompressedBitmap.or(ewah1,ewah2,ewah3);
Assert.assertTrue(ewahorth.equals(ewahtrueor));
- EWAHCompressedBitmap ewahandth = new EWAHCompressedBitmap();
- (new RunningBitmapMerge()).symmetric(new ThresholdFuncBitmap(3), ewahandth, ewah1, ewah2, ewah3);
+ EWAHCompressedBitmap ewahandth = EWAHCompressedBitmap.threshold(3, ewah1, ewah2, ewah3);
EWAHCompressedBitmap ewahtrueand = EWAHCompressedBitmap.and(ewah1,ewah2,ewah3);
Assert.assertTrue(ewahandth.equals(ewahtrueand));
- EWAHCompressedBitmap ewahmajth = new EWAHCompressedBitmap();
- (new RunningBitmapMerge()).symmetric(new ThresholdFuncBitmap(2), ewahmajth, ewah1, ewah2, ewah3);
+ EWAHCompressedBitmap ewahmajth = EWAHCompressedBitmap.threshold(2, ewah1, ewah2, ewah3);
EWAHCompressedBitmap ewahtruemaj = EWAHCompressedBitmap.or(ewah1.and(ewah2),ewah1.and(ewah3),ewah2.and(ewah3));
Assert.assertTrue(ewahmajth.equals(ewahtruemaj));
}

0 comments on commit 879bfa7

Please sign in to comment.