Skip to content

Commit

Permalink
Nice threshold support
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Jan 3, 2014
1 parent 853ae70 commit 879bfa7
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 16 deletions.
3 changes: 1 addition & 2 deletions example.java
Expand Up @@ -53,8 +53,7 @@ public static void main(final String[] args) throws java.io.IOException {
// mark as true a bit that occurs at least T times in the source
// bitmaps
//
EWAHCompressedBitmap threshold2 = new EWAHCompressedBitmap();
(new RunningBitmapMerge()).symmetric(new ThresholdFuncBitmap(2), threshold2,ewahBitmap1,ewahBitmap2,
EWAHCompressedBitmap threshold2 = EWAHCompressedBitmap.threshold(2, ewahBitmap1,ewahBitmap2,
ewahBitmap3,ewahBitmap4);
System.out.println("threshold 2 : "+threshold2);

Expand Down
41 changes: 39 additions & 2 deletions src/main/java/com/googlecode/javaewah/EWAHCompressedBitmap.java
Expand Up @@ -8,6 +8,9 @@
import java.util.*;
import java.io.*;

import com.googlecode.javaewah.symmetric.RunningBitmapMerge;
import com.googlecode.javaewah.symmetric.ThresholdFuncBitmap;



/**
Expand Down Expand Up @@ -1130,7 +1133,7 @@ public boolean setSizeInBits(final int size, final boolean defaultvalue) {
* @return the size in bits
*/
@Override
public int sizeInBits() {
public int sizeInBits() {
return this.sizeinbits;
}

Expand All @@ -1141,9 +1144,43 @@ public int sizeInBits() {
* @return the size in bytes
*/
@Override
public int sizeInBytes() {
public int sizeInBytes() {
return this.actualsizeinwords * (wordinbits / 8);
}

/**
*
* Compute a Boolean threshold function: bits are true where at least T bitmaps have
* a true bit.
*
* @since 0.8.1
* @param T the threshold
* @param bitmaps input data
* @return the aggregated bitmap
*/
public static EWAHCompressedBitmap threshold(final int T, final EWAHCompressedBitmap... bitmaps) {
final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
thresholdWithContainer(container,T,bitmaps);
return container;
}



/**
*
* Compute a Boolean threshold function: bits are true where at least T bitmaps have
* a true bit.
*
* @since 0.8.1
* @param T the threshold
* @param bitmaps input data
* @param container where we writethe aggregated bitmap
*/
public static void thresholdWithContainer(final BitmapStorage container,final int T,
final EWAHCompressedBitmap... bitmaps) {
(new RunningBitmapMerge()).symmetric(new ThresholdFuncBitmap(T), container,bitmaps);
}


/**
* Populate an array of (sorted integers) corresponding to the location of the
Expand Down
@@ -1,12 +1,15 @@
package com.googlecode.javaewah.symmetric;

import java.util.ArrayList;
import java.util.Arrays;
import com.googlecode.javaewah.BitmapStorage;

/**
* A threshold Boolean function returns true if the number of true values exceed a
* threshold. It is a symmetric Boolean function.
*
* This object is not thread safe: you should use one function per thread.
*
* @see <a href="http://en.wikipedia.org/wiki/Symmetric_Boolean_function">http://en.wikipedia.org/wiki/Symmetric_Boolean_function</a>
* @author Daniel Lemire
* @since 0.8.0
Expand All @@ -16,6 +19,7 @@ public class ThresholdFuncBitmap extends UpdateableBitmapFunction {
int min;
long[] buffers;
int bufferUsed;
ArrayList<EWAHPointer> litbuffer = new ArrayList<EWAHPointer>();

/**
* Construction a threshold function with a given threshold
Expand All @@ -38,14 +42,62 @@ public void dispatch(BitmapStorage out, int runbegin, int runend) {
out.addStreamOfEmptyWords(false, runlength);
} else {
int deficit = min - hammingWeight;
bufferUsed = this.getNumberOfLiterals();
litbuffer.clear();
this.fillWithLiterals(litbuffer);
bufferUsed = litbuffer.size();
// trivial case where there is just one lit. word (we copy it)
if(bufferUsed == 1) {
EWAHPointer R = this.litbuffer.get(0);
for (int i = 0; i < runlength; ++i) {
out.add(R.iterator.getLiteralWordAt(i + runbegin
- R.beginOfRun()));
}
return;
}
// next if deficit is 1 we need to compute OR, this can be done fast
if(deficit == 1) {
long[] w = new long[runlength];
for (EWAHPointer R : this.litbuffer) {
for (int i = 0; i < runlength; ++i) {
w[i] |= R.iterator
.getLiteralWordAt(i + runbegin
- R.beginOfRun());
}
}
for (int i = 0; i < runlength; ++i) {
out.add(w[i]);
}
return;
}
// next if deficit is bufferUsed, we use AND
if(bufferUsed == deficit) {
long[] w = new long[runlength];
for (int i = 0; i < runlength; ++i) {
w[i] = litbuffer.get(0).iterator
.getLiteralWordAt(i + runbegin
- litbuffer.get(0).beginOfRun());
}
for(int k = 1; k < litbuffer.size(); ++k) {
for (int i = 0; i < runlength; ++i) {
w[i] &= litbuffer.get(k).iterator
.getLiteralWordAt(i + runbegin
- litbuffer.get(k).beginOfRun());
}
}
for (int i = 0; i < runlength; ++i) {
out.add(w[i]);
}

return;
}
// general case
if(bufferUsed > buffers.length)
buffers = Arrays.copyOf(
buffers,
2 * bufferUsed);
for (int i = 0; i < runlength; ++i) {
int p = 0;
for (EWAHPointer R : this.getLiterals()) {
for (EWAHPointer R : litbuffer) {
buffers[p++] = R.iterator
.getLiteralWordAt(i + runbegin
- R.beginOfRun());
Expand All @@ -58,10 +110,10 @@ public void dispatch(BitmapStorage out, int runbegin, int runend) {



private static int[] bufcounters = new int[64];
private int[] bufcounters = new int[64];
private static final int[] zeroes64 = new int[64];

private static long threshold2buf(int T, long[] buffers, int bufUsed) {
private long threshold2buf(int T, long[] buffers, int bufUsed) {
long result = 0L;
int[] counters = bufcounters;
System.arraycopy(zeroes64, 0, counters, 0, 64);
Expand All @@ -80,7 +132,7 @@ private static long threshold2buf(int T, long[] buffers, int bufUsed) {
return result;
}

private static long threshold3(int T, long[] buffers, int bufUsed) {
private long threshold3(int T, long[] buffers, int bufUsed) {
if (buffers.length == 0)
return 0;
long[] v = new long[T];
Expand All @@ -96,7 +148,7 @@ private static long threshold3(int T, long[] buffers, int bufUsed) {
return v[T - 1];
}

private static long threshold4(int T, long[] buffers, int bufUsed) {
private long threshold4(int T, long[] buffers, int bufUsed) {
if (T >= 128)
return threshold2buf(T, buffers,bufUsed);
int B = 0;
Expand Down
@@ -1,6 +1,8 @@
package com.googlecode.javaewah.symmetric;

import java.util.Iterator;
import java.util.List;

import com.googlecode.javaewah.datastructure.StaticBitSet;
import com.googlecode.javaewah.BitmapStorage;

Expand Down Expand Up @@ -34,6 +36,7 @@ public abstract class UpdateableBitmapFunction {
public int getNumberOfLiterals() {
return litwlist.cardinality();
}


/**
* Goes through the literals.
Expand Down Expand Up @@ -69,6 +72,17 @@ public void remove() {
};
}

/**
* append to the list the literal words as EWAHPointer
* @param container where we write
*/
public void fillWithLiterals(final List<EWAHPointer> container) {
for(int k = litwlist.nextSetBit(0);k >= 0;k = litwlist.nextSetBit(k + 1)) {
container.add(rw[k]);
}
}


/**
* @param newsize the number of inputs
*/
Expand Down
22 changes: 16 additions & 6 deletions src/test/java/com/googlecode/javaewah/ThresholdFuncBitmapTest.java
Expand Up @@ -19,19 +19,29 @@ public void basictest() {
EWAHCompressedBitmap ewah1 = EWAHCompressedBitmap.bitmapOf(1,53,110,1000, 1201,50000);
EWAHCompressedBitmap ewah2 = EWAHCompressedBitmap.bitmapOf(1,100,1000,1100,1200,31416,50001);
EWAHCompressedBitmap ewah3 = EWAHCompressedBitmap.bitmapOf(1,110,1000,1101,1200, 1201,31416, 31417);
EWAHCompressedBitmap ewahorth = new EWAHCompressedBitmap();
(new RunningBitmapMerge()).symmetric(new ThresholdFuncBitmap(1), ewahorth, ewah1, ewah2, ewah3);

Assert.assertTrue(EWAHCompressedBitmap.threshold(1, ewah1).equals(ewah1));
Assert.assertTrue(EWAHCompressedBitmap.threshold(1, ewah2).equals(ewah2));
Assert.assertTrue(EWAHCompressedBitmap.threshold(1, ewah3).equals(ewah3));
Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah1, ewah1).equals(ewah1));
Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah2, ewah2).equals(ewah2));
Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah3, ewah3).equals(ewah3));

EWAHCompressedBitmap zero = new EWAHCompressedBitmap();
Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah1).equals(zero));
Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah2).equals(zero));
Assert.assertTrue(EWAHCompressedBitmap.threshold(2, ewah3).equals(zero));
Assert.assertTrue(EWAHCompressedBitmap.threshold(4, ewah1, ewah2, ewah3).equals(zero));

EWAHCompressedBitmap ewahorth = EWAHCompressedBitmap.threshold(1, ewah1, ewah2, ewah3);
EWAHCompressedBitmap ewahtrueor = EWAHCompressedBitmap.or(ewah1,ewah2,ewah3);
Assert.assertTrue(ewahorth.equals(ewahtrueor));

EWAHCompressedBitmap ewahandth = new EWAHCompressedBitmap();
(new RunningBitmapMerge()).symmetric(new ThresholdFuncBitmap(3), ewahandth, ewah1, ewah2, ewah3);
EWAHCompressedBitmap ewahandth = EWAHCompressedBitmap.threshold(3, ewah1, ewah2, ewah3);
EWAHCompressedBitmap ewahtrueand = EWAHCompressedBitmap.and(ewah1,ewah2,ewah3);
Assert.assertTrue(ewahandth.equals(ewahtrueand));

EWAHCompressedBitmap ewahmajth = new EWAHCompressedBitmap();
(new RunningBitmapMerge()).symmetric(new ThresholdFuncBitmap(2), ewahmajth, ewah1, ewah2, ewah3);
EWAHCompressedBitmap ewahmajth = EWAHCompressedBitmap.threshold(2, ewah1, ewah2, ewah3);
EWAHCompressedBitmap ewahtruemaj = EWAHCompressedBitmap.or(ewah1.and(ewah2),ewah1.and(ewah3),ewah2.and(ewah3));
Assert.assertTrue(ewahmajth.equals(ewahtruemaj));
}
Expand Down

0 comments on commit 879bfa7

Please sign in to comment.