Permalink
Browse files

Initial release

  • Loading branch information...
0 parents commit 78679b837819ff42936c0ccf8187d216d2a2ecff @lemire committed May 28, 2012
@@ -0,0 +1 @@
+May 28th 2012 : Initial release
@@ -0,0 +1,28 @@
+Simple benchmark between compressed bitmap libraries in Java
+
+Author: Daniel Lemire
+
+
+=== Copyright and licensing ===
+
+The testing Java code in this benchmark is
+released in the public domain. Note that this says nothing about the
+libraries necessary to run the benchmark.
+
+=== Questions ===
+
+- How fast can you compute logical ORs between many bitmaps?
+- What is the space-speed trade-off offered by the different libraries?
+
+
+
+=== Libraries being tested ===
+
+- compressedbitset 0.1
+- extendedset 2.2
+- JavaEWAH 0.5
+
+
+=== How to use ===
+
+Under MacOS or Linux, just execute the run.sh script.
@@ -0,0 +1,41 @@
+# For each instance, we report the size, the construction time,
+# the time required to recover the set bits,
+# and the time required to compute logical ors (unions) between lots of bitmaps.
+# sparsity 1 average set bit per 32-bit word = 16.0
+# generating random data...
+# generating random data... ok.
+# ConciseSet 32 bit using the extendedset_2.2 library
+# size, construction time, time to recover set bits, time to compute unions
+ 624 0.685 0.536 0.272
+# WAH 32 bit using the compressedbitset library
+# size, construction time, time to recover set bits, time to compute unions
+ 634 0.566 4.751 0.452
+# EWAH using the javaewah library
+# size, construction time, time to recover set bits, time to compute unions
+ 625 0.57 0.179 0.223
+
+# sparsity 5 average set bit per 32-bit word = 1.0
+# generating random data...
+# generating random data... ok.
+# ConciseSet 32 bit using the extendedset_2.2 library
+# size, construction time, time to recover set bits, time to compute unions
+ 4391 0.937 1.535 2.711
+# WAH 32 bit using the compressedbitset library
+# size, construction time, time to recover set bits, time to compute unions
+ 5738 0.802 0.984 2.442
+# EWAH using the javaewah library
+# size, construction time, time to recover set bits, time to compute unions
+ 7341 0.647 0.249 0.821
+
+# sparsity 9 average set bit per 32-bit word = 0.0625
+# generating random data...
+# generating random data... ok.
+# ConciseSet 32 bit using the extendedset_2.2 library
+# size, construction time, time to recover set bits, time to compute unions
+ 9084 1.283 0.919 12.027
+# WAH 32 bit using the compressedbitset library
+# size, construction time, time to recover set bits, time to compute unions
+ 16044 1.017 1.505 15.275
+# EWAH using the javaewah library
+# size, construction time, time to recover set bits, time to compute unions
+ 28489 2.252 0.623 4.281
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,3 @@
+mkdir -p bin
+javac -sourcepath ./src -classpath .:lib/JavaEWAH-0.5.0.jar:lib/compressedbitset-0.1.jar:lib/extendedset_2.2.jar -d bin ./src/bitmapbenchmarks/synth/benchmark.java
+java -server -cp bin:lib/JavaEWAH-0.5.0.jar:lib/compressedbitset-0.1.jar:lib/extendedset_2.2.jar bitmapbenchmarks.synth.benchmark
@@ -0,0 +1,63 @@
+package bitmapbenchmarks.synth;
+
+
+/**
+ * This class will generate uniformly distributed lists of random integers.
+ *
+ * @author Daniel Lemire
+ */
+public class ClusteredDataGenerator {
+
+ UniformDataGenerator unidg = new UniformDataGenerator();
+
+ public ClusteredDataGenerator() {
+ }
+
+ void fillUniform(int[] array, int offset, int length, int Min, int Max) {
+ int[] v = this.unidg.generateUniform(length, Max - Min);
+ for (int k = 0; k < v.length; ++k)
+ array[k + offset] = Min + v[k];
+ }
+
+ void fillClustered(int[] array, int offset, int length, int Min, int Max) {
+ final int range = Max - Min;
+ if ((range == length) || (length <= 10)) {
+ fillUniform(array, offset, length, Min, Max);
+ return;
+ }
+ final int cut = length / 2
+ + ((range - length - 1 > 0) ? this.unidg.rand.nextInt(range - length - 1) : 0);
+ final double p = this.unidg.rand.nextDouble();
+ if (p < 0.25) {
+ fillUniform(array, offset, length / 2, Min, Min + cut);
+ fillClustered(array, offset + length / 2, length - length / 2, Min + cut,
+ Max);
+ } else if (p < 0.5) {
+ fillClustered(array, offset, length / 2, Min, Min + cut);
+ fillUniform(array, offset + length / 2, length - length / 2, Min + cut,
+ Max);
+ } else {
+ fillClustered(array, offset, length / 2, Min, Min + cut);
+ fillClustered(array, offset + length / 2, length - length / 2, Min + cut,
+ Max);
+ }
+ }
+
+ /**
+ * generates randomly N distinct integers from 0 to Max.
+ */
+ public int[] generateClustered(int N, int Max) {
+ int[] array = new int[N];
+ fillClustered(array, 0, N, 0, Max);
+ return array;
+ }
+
+ public static void main(String[] args) {
+ int[] example = (new ClusteredDataGenerator()).generateClustered(20, 1000);
+ for (int k = 0; k < example.length; ++k)
+ System.out.println(example[k]);
+ }
+
+}
+
+
@@ -0,0 +1,41 @@
+package bitmapbenchmarks.synth;
+
+import java.util.Iterator;
+import java.util.Random;
+import java.util.TreeSet;
+
+/**
+ * This class will generate "clustered" lists of random integers. That is, the
+ * integers tend not to be randomly distributed.
+ *
+ * @author Daniel Lemire
+ */
+public class UniformDataGenerator {
+ Random rand = new Random();
+ public UniformDataGenerator(){
+ }
+
+ /**
+ * generates randomly N distinct integers from 0 to Max.
+ */
+ int[] generateUniform(int N, int Max) {
+ if (N > Max)
+ throw new RuntimeException("not possible");
+ int[] ans = new int[N];
+ if (N == Max) {
+ for (int k = 0; k < N; ++k)
+ ans[k]=k;
+ return ans;
+ }
+ // can be done faster:
+ TreeSet<Integer> s = new TreeSet<Integer>();
+ while (s.size() < N)
+ s.add(new Integer(this.rand.nextInt(Max)));
+ Iterator<Integer> i = s.iterator();
+ for (int k = 0; k < N; ++k)
+ ans[k]=i.next().intValue();
+ return ans;
+ }
+
+
+}
@@ -0,0 +1,189 @@
+package bitmapbenchmarks.synth;
+
+
+import it.uniroma3.mat.extendedset.intset.ConciseSet;
+import java.text.DecimalFormat;
+import java.util.BitSet;
+import java.util.Iterator;
+import java.util.List;
+import org.devbrat.util.WAHBitSet;
+import javaewah.EWAHCompressedBitmap;
+import javaewah32.EWAHCompressedBitmap32;
+
+public class benchmark {
+
+ public static void main(String args[]) {
+ test(10, 18, 10);
+ }
+
+ public static long testWAH32(int[][] data, int repeat,DecimalFormat df ) {
+ System.out.println("# WAH 32 bit using the compressedbitset library");
+ System.out.println("# size, construction time, time to recover set bits, time to compute unions");
+ long bef,aft;
+ String line = "";
+ long bogus = 0;
+ int N = data.length;
+ bef = System.currentTimeMillis();
+ WAHBitSet[] bitmap = new WAHBitSet[N];
+ int size = 0;
+ for (int r = 0; r < repeat; ++r) {
+ size = 0;
+ for (int k = 0; k < N; ++k) {
+ bitmap[k] = new WAHBitSet();
+ for (int x = 0; x < data[k].length; ++x) {
+ bitmap[k].set(data[k][x]);
+ }
+ size += bitmap[k].memSize()*4;
+ }
+ }
+ aft = System.currentTimeMillis();
+ line += "\t" + size/1024;
+ line += "\t" + df.format((aft - bef) / 1000.0);
+ // uncompressing
+ bef = System.currentTimeMillis();
+ for (int r = 0; r < repeat; ++r)
+ for (int k = 0; k < N; ++k) {
+ int[] array = new int[bitmap[k].cardinality()];
+ int c = 0;
+ for(@SuppressWarnings("unchecked")
+ Iterator<Integer> i = bitmap[k].iterator(); i.hasNext();array[c++] =i.next().intValue()){}
+ }
+ aft = System.currentTimeMillis();
+ line += "\t" + df.format((aft - bef) / 1000.0);
+ // logical or
+ bef = System.currentTimeMillis();
+ for (int r = 0; r < repeat; ++r)
+ for (int k = 0; k < N; ++k) {
+ WAHBitSet bitmapor = bitmap[0];
+ for (int j = 1; j < k; ++j) {
+ bitmapor = bitmapor.or(bitmap[j]);
+ }
+ }
+ aft = System.currentTimeMillis();
+ line += "\t" + df.format((aft - bef) / 1000.0);
+ System.out.println(line);
+ return bogus;
+ }
+
+
+ public static long testConciseSet(int[][] data, int repeat,DecimalFormat df ) {
+ System.out.println("# ConciseSet 32 bit using the extendedset_2.2 library");
+ System.out.println("# size, construction time, time to recover set bits, time to compute unions");
+ long bef,aft;
+ String line = "";
+ long bogus = 0;
+ int N = data.length;
+ bef = System.currentTimeMillis();
+ ConciseSet[] bitmap = new ConciseSet[N];
+ int size = 0;
+ for (int r = 0; r < repeat; ++r) {
+ size = 0;
+ for (int k = 0; k < N; ++k) {
+ bitmap[k] = new ConciseSet();
+ for (int x = 0; x < data[k].length; ++x) {
+ bitmap[k].add(data[k][x]);
+ }
+ size += (int)(bitmap[k].size() *bitmap[k].collectionCompressionRatio())*4;
+ }
+ }
+ aft = System.currentTimeMillis();
+ line += "\t" + size/1024;
+ line += "\t" + df.format((aft-bef) / 1000.0);
+ // uncompressing
+ bef = System.currentTimeMillis();
+ for (int r = 0; r < repeat; ++r)
+ for (int k = 0; k < N; ++k) {
+ int[] array = bitmap[k].toArray();
+ }
+ aft = System.currentTimeMillis();
+ line += "\t" + df.format((aft - bef) / 1000.0);
+ // logical or
+ bef = System.currentTimeMillis();
+ for (int r = 0; r < repeat; ++r)
+ for (int k = 0; k < N; ++k) {
+ ConciseSet bitmapor = bitmap[0];
+ for (int j = 1; j < k; ++j) {
+ bitmapor = bitmapor.union(bitmap[j]);
+ }
+ }
+ aft = System.currentTimeMillis();
+ line += "\t" + df.format((aft - bef) / 1000.0);
+ System.out.println(line);
+ return bogus;
+ }
+
+
+
+
+ public static long testEWAH64(int[][] data, int repeat,DecimalFormat df ) {
+ System.out.println("# EWAH using the javaewah library");
+ System.out.println("# size, construction time, time to recover set bits, time to compute unions");
+ long bef,aft;
+ String line = "";
+ long bogus = 0;
+ int N = data.length;
+ bef = System.currentTimeMillis();
+ EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N];
+ int size = 0;
+ for (int r = 0; r < repeat; ++r) {
+ size = 0;
+ for (int k = 0; k < N; ++k) {
+ ewah[k] = new EWAHCompressedBitmap();
+ for (int x = 0; x < data[k].length; ++x) {
+ ewah[k].set(data[k][x]);
+ }
+ size += ewah[k].sizeInBytes();
+ }
+ }
+ aft = System.currentTimeMillis();
+ line += "\t" + size/1024;
+ line += "\t" + df.format((aft - bef) / 1000.0);
+ // uncompressing
+ bef = System.currentTimeMillis();
+ for (int r = 0; r < repeat; ++r)
+ for (int k = 0; k < N; ++k) {
+ int[] array = ewah[k].toArray();
+ bogus += array.length;
+ }
+ aft = System.currentTimeMillis();
+ line += "\t" + df.format((aft - bef) / 1000.0);
+ // fast logical or
+ bef = System.currentTimeMillis();
+ for (int r = 0; r < repeat; ++r)
+ for (int k = 0; k < N; ++k) {
+ EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
+ for (int j = 0; j < k + 1; ++j) {
+ ewahcp[j] = ewah[k];
+ }
+ EWAHCompressedBitmap bitmapor = EWAHCompressedBitmap.or(ewahcp);
+ bogus += bitmapor.sizeInBits();
+ }
+ aft = System.currentTimeMillis();
+ line += "\t" + df.format((aft - bef) / 1000.0);
+ System.out.println(line);
+ return bogus;
+ }
+
+ public static void test(int N, int nbr, int repeat) {
+ DecimalFormat df = new DecimalFormat("0.###");
+ ClusteredDataGenerator cdg = new ClusteredDataGenerator();
+ System.out.println("# For each instance, we report the size, the construction time, ");
+ System.out.println("# the time required to recover the set bits,");
+ System.out.println("# and the time required to compute logical ors (unions) between lots of bitmaps.");
+ for (int sparsity = 1; sparsity < 31 - nbr; sparsity += 4) {
+ System.out.println("# sparsity "+sparsity+" average set bit per 32-bit word = "+(1<<nbr)*32.0/(1 << (nbr + sparsity)));
+ int[][] data = new int[N][];
+ int Max = (1 << (nbr + sparsity));
+ System.out.println("# generating random data...");
+ for (int k = 0; k < N; ++k)
+ data[k] = cdg.generateClustered(1 << nbr, Max);
+ System.out.println("# generating random data... ok.");
+ // building
+ testConciseSet(data,repeat,df);
+ testWAH32(data,repeat,df);
+ testEWAH64(data,repeat,df);
+ System.out.println();
+
+ }
+ }
+}

0 comments on commit 78679b8

Please sign in to comment.