Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Preparing new release

  • Loading branch information...
commit 629577a3243c4361f09295e5e3389a86eb242586 1 parent 60351c5
@lemire authored
View
4 CHANGELOG
@@ -1,3 +1,7 @@
+0.0.11 (Feb. 14th 2014)
+ - Fix rare bug in FastPFOR (reported by Stefan Ackermann (https://github.com/Stivo))
+ - Improved API documentation
+
0.0.10 (Jan. 25th 2014)
- cleaning the code and improving the documentation
View
21 src/main/java/me/lemire/integercompression/BinaryPacking.java
@@ -8,9 +8,30 @@
/**
* Scheme based on a commonly used idea: can be extremely fast.
+ * It encodes integers in blocks of 128 integers. For arrays containing
+ * an arbitrary number of integers, you should use it in conjunction
+ * with another CODEC:
+ *
+ * <pre>IntegerCODEC ic =
+ * new Composition(new BinaryPacking(), new VariableByte()).</pre>
*
* Note that this does not use differential coding: if you are working on sorted
* lists, use IntegratedBinaryPacking instead.
+ *
+ * <p>
+ * For details, please see
+ * </p>
+ * <p>
+ * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second
+ * through vectorization Software: Practice &amp; Experience
+ * http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract
+ * http://arxiv.org/abs/1209.2137
+ * </p>
+ * <p>
+ * Daniel Lemire, Leonid Boytsov, Nathan Kurz,
+ * SIMD Compression and the Intersection of Sorted Integers
+ * http://arxiv.org/abs/1401.6399
+ * </p>
*
* @author Daniel Lemire
*/
View
7 src/main/java/me/lemire/integercompression/DeltaZigzagBinaryPacking.java
@@ -8,6 +8,13 @@
/**
* BinaryPacking with Delta+Zigzag Encoding.
*
+ * It encodes integers in blocks of 128 integers. For arrays containing
+ * an arbitrary number of integers, you should use it in conjunction
+ * with another CODEC:
+ *
+ * <pre>IntegerCODEC ic = new Composition(new DeltaZigzagBinaryPacking(),
+ * new DeltaZigzagVariableByte()).</pre>
+ *
* @author MURAOKA Taro http://github.com/koron
*/
public final class DeltaZigzagBinaryPacking implements IntegerCODEC {
View
9 src/main/java/me/lemire/integercompression/FastPFOR.java
@@ -11,6 +11,11 @@
/**
* This is a patching scheme designed for speed.
+ * It encodes integers in blocks of 128 integers. For arrays containing
+ * an arbitrary number of integers, you should use it in conjunction
+ * with another CODEC:
+ *
+ * IntegerCODEC ic = new Composition(new FastPFOR(), new VariableByte()).
* <p/>
* For details, please see
* <p/>
@@ -189,7 +194,7 @@ private void encodePage(int[] in, IntWrapper inpos, int thissize,
bitmap |= (1 << (k - 1));
}
out[tmpoutpos++] = bitmap;
- for (int k = 1; k <= 31; ++k) {
+ for (int k = 1; k <= 32; ++k) {
if (dataPointers[k] != 0) {
out[tmpoutpos++] = dataPointers[k];// size
for (int j = 0; j < dataPointers[k]; j += 32) {
@@ -242,7 +247,7 @@ private void decodePage(int[] in, IntWrapper inpos, int[] out,
inexcept += bytesize / 4;
final int bitmap = in[inexcept++];
- for (int k = 1; k <= 31; ++k) {
+ for (int k = 1; k <= 32; ++k) {
if ((bitmap & (1 << (k - 1))) != 0) {
int size = in[inexcept++];
if (dataTobePacked[k].length < size)
View
23 src/main/java/me/lemire/integercompression/IntegratedBinaryPacking.java
@@ -13,6 +13,29 @@
* You should only use this scheme on sorted arrays. Use BinaryPacking if you
* have unsorted arrays.
*
+ * It encodes integers in blocks of 128 integers. For arrays containing
+ * an arbitrary number of integers, you should use it in conjunction
+ * with another CODEC:
+ *
+ * <pre>IntegratedIntegerCODEC is =
+ * new IntegratedComposition(new IntegratedBinaryPacking(),
+ * new IntegratedVariableByte())</pre>
+ *
+ * <p>
+ * For details, please see
+ * </p>
+ * <p>
+ * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second
+ * through vectorization Software: Practice &amp; Experience
+ * http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract
+ * http://arxiv.org/abs/1209.2137
+ * </p>
+ * <p>
+ * Daniel Lemire, Leonid Boytsov, Nathan Kurz,
+ * SIMD Compression and the Intersection of Sorted Integers
+ * http://arxiv.org/abs/1401.6399
+ * </p>
+ *
* @author Daniel Lemire
*
*/
View
16 src/main/java/me/lemire/integercompression/IntegratedFastPFOR.java
@@ -15,6 +15,13 @@
* differential coding as part of the compression.
* </p>
*
+ * It encodes integers in blocks of 128 integers. For arrays containing
+ * an arbitrary number of integers, you should use it in conjunction
+ * with another CODEC:
+ * <pre>IntegratedIntegerCODEC is =
+ * new IntegratedComposition(new IntegratedFastPFOR(),
+ * new IntegratedVariableByte())</pre>
+ *
* <p>
* For details, please see
* </p>
@@ -25,6 +32,11 @@
* http://arxiv.org/abs/1209.2137
* </p>
* <p>
+ * Daniel Lemire, Leonid Boytsov, Nathan Kurz,
+ * SIMD Compression and the Intersection of Sorted Integers
+ * http://arxiv.org/abs/1401.6399
+ * </p>
+ * <p>
* For multi-threaded applications, each thread should use its own
* IntegratedFastPFOR object.
* </p>
@@ -199,7 +211,7 @@ private void encodePage(int[] constin, IntWrapper constinpos,
bitmap |= (1 << (k - 1));
}
out[tmpoutpos++] = bitmap;
- for (int k = 1; k <= 31; ++k) {
+ for (int k = 1; k <= 32; ++k) {
if (dataPointers[k] != 0) {
out[tmpoutpos++] = dataPointers[k];// size
for (int j = 0; j < dataPointers[k]; j += 32) {
@@ -253,7 +265,7 @@ private void decodePage(int[] in, IntWrapper inpos, int[] out,
inexcept += bytesize / 4;
final int bitmap = in[inexcept++];
- for (int k = 1; k <= 31; ++k) {
+ for (int k = 1; k <= 32; ++k) {
if ((bitmap & (1 << (k - 1))) != 0) {
int size = in[inexcept++];
if (dataTobePacked[k].length < size)
View
1  src/main/java/me/lemire/integercompression/IntegratedVariableByte.java
@@ -16,7 +16,6 @@
* You should only use this scheme on sorted arrays. Use VariableByte if you
* have unsorted arrays.
*
- *
* @author Daniel Lemire
*/
public class IntegratedVariableByte implements IntegratedIntegerCODEC,
View
7 src/main/java/me/lemire/integercompression/NewPFD.java
@@ -17,6 +17,13 @@
* <p/>
* using Simple16 as the secondary coder.
*
+ * It encodes integers in blocks of 128 integers. For arrays containing
+ * an arbitrary number of integers, you should use it in conjunction
+ * with another CODEC:
+ *
+ * <pre>IntegerCODEC ic =
+ * new Composition(new NewPDF(), new VariableByte()).</pre>
+ *
* Note that this does not use differential coding: if you are working on sorted
* lists, you must compute the deltas separately. (Yes, this is true even though
* the "D" at the end of the name probably stands for delta.)
View
7 src/main/java/me/lemire/integercompression/NewPFDS16.java
@@ -17,6 +17,13 @@
* <p/>
* using Simple16 as the secondary coder.
*
+ * It encodes integers in blocks of 128 integers. For arrays containing
+ * an arbitrary number of integers, you should use it in conjunction
+ * with another CODEC:
+ *
+ * <pre>IntegerCODEC ic =
+ * new Composition(new PDFS16(), new VariableByte()).</pre>
+ *
* Note that this does not use differential coding: if you are working on sorted
* lists, you must compute the deltas separately.
*
View
6 src/main/java/me/lemire/integercompression/NewPFDS9.java
@@ -17,6 +17,12 @@
* <p/>
* using Simple9 as the secondary coder.
*
+ * It encodes integers in blocks of 128 integers. For arrays containing
+ * an arbitrary number of integers, you should use it in conjunction
+ * with another CODEC:
+ *
+ * <pre>IntegerCODEC ic = new Composition(new PDFS9(), new VariableByte()).</pre>
+ *
* Note that this does not use differential coding: if you are working on sorted
* lists, you must compute the deltas separately.
*
View
6 src/main/java/me/lemire/integercompression/OptPFD.java
@@ -16,6 +16,12 @@
* <p/>
* using Simple16 as the secondary coder.
*
+ * It encodes integers in blocks of 128 integers. For arrays containing
+ * an arbitrary number of integers, you should use it in conjunction
+ * with another CODEC:
+ *
+ * <pre>IntegerCODEC ic = new Composition(new OptPFD(), new VariableByte()).</pre>
+ *
* Note that this does not use differential coding: if you are working on sorted
* lists, you must compute the deltas separately. (Yes, this is true even though
* the "D" at the end of the name probably stands for delta.)
View
6 src/main/java/me/lemire/integercompression/OptPFDS16.java
@@ -17,6 +17,12 @@
* <p/>
* using Simple16 as the secondary coder.
*
+ * It encodes integers in blocks of 128 integers. For arrays containing
+ * an arbitrary number of integers, you should use it in conjunction
+ * with another CODEC:
+ *
+ * <pre>IntegerCODEC ic = new Composition(new OptPFDS16(), new VariableByte()).</pre>
+ *
* Note that this does not use differential coding: if you are working on sorted
* lists, you must compute the deltas separately.
*
View
5 src/main/java/me/lemire/integercompression/OptPFDS9.java
@@ -17,6 +17,11 @@
* <p/>
* using Simple9 as the secondary coder.
*
+ * It encodes integers in blocks of 128 integers. For arrays containing
+ * an arbitrary number of integers, you should use it in conjunction
+ * with another CODEC:
+ *
+ * <pre> IntegerCODEC ic = new Composition(new OptPFDS9(), new VariableByte()).</pre>
*
* Note that this does not use differential coding: if you are working on sorted
* lists, you must compute the deltas separately.
View
5 src/main/java/me/lemire/integercompression/XorBinaryPacking.java
@@ -5,7 +5,10 @@
package me.lemire.integercompression;
/**
- * XOR + BinaryPacking.
+ * BinaryPacking over XOR differential.
+ *
+ * <pre>IntegratedIntegerCODEC is =
+ * new Composition(new XorBinaryPacking(), new VariableByte())</pre>
*
* @author MURAOKA Taro http://github.com/koron
*/
View
97 src/test/java/me/lemire/integercompression/BasicTest.java
@@ -100,10 +100,12 @@ public void varyingLengthTest2() {
@Test
public void checkDeltaZigzagVB() {
DeltaZigzagVariableByte codec = new DeltaZigzagVariableByte();
+ DeltaZigzagVariableByte codeco = new DeltaZigzagVariableByte();
+
testZeroInZeroOut(codec);
- test(codec, 5, 10);
- test(codec, 5, 14);
- test(codec, 2, 18);
+ test(codec, codeco, 5, 10);
+ test(codec, codeco, 5, 14);
+ test(codec, codeco, 2, 18);
}
@Test
@@ -115,11 +117,15 @@ public void checkDeltaZigzagPacking() {
IntegerCODEC compo = new Composition(
new DeltaZigzagBinaryPacking(),
new VariableByte());
+ IntegerCODEC compo2 = new Composition(
+ new DeltaZigzagBinaryPacking(),
+ new VariableByte());
+
testZeroInZeroOut(compo);
testUnsorted(compo);
- test(compo, 5, 10);
- test(compo, 5, 14);
- test(compo, 2, 18);
+ test(compo, compo2, 5, 10);
+ test(compo, compo2, 5, 14);
+ test(compo, compo2, 2, 18);
}
@Test
@@ -146,9 +152,11 @@ public void checkXorBinaryPacking2() {
public void checkXorBinaryPacking3() {
IntegerCODEC c = new IntegratedComposition(new XorBinaryPacking(),
new IntegratedVariableByte());
- test(c, 5, 10);
- test(c, 5, 14);
- test(c, 2, 18);
+ IntegerCODEC co = new IntegratedComposition(new XorBinaryPacking(),
+ new IntegratedVariableByte());
+ test(c, co, 5, 10);
+ test(c, co, 5, 14);
+ test(c, co, 2, 18);
}
/**
@@ -310,7 +318,7 @@ private static void testZeroInZeroOut(IntegerCODEC c) {
assertEquals(0, outpos.intValue());
}
- private static void test(IntegerCODEC c, int N, int nbr) {
+ private static void test(IntegerCODEC c,IntegerCODEC co, int N, int nbr) {
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 31 - nbr; sparsity += 4) {
int[][] data = new int[N][];
@@ -318,7 +326,7 @@ private static void test(IntegerCODEC c, int N, int nbr) {
for (int k = 0; k < N; ++k) {
data[k] = cdg.generateClustered((1 << nbr), max);
}
- testCodec(c, data, max);
+ testCodec(c, co, data, max);
}
}
@@ -332,31 +340,40 @@ private static void test(int N, int nbr) {
}
testCodec(new IntegratedComposition(new IntegratedBinaryPacking(),
- new IntegratedVariableByte()), data, max);
- testCodec(new JustCopy(), data, max);
- testCodec(new VariableByte(), data, max);
- testCodec(new IntegratedVariableByte(), data, max);
+ new IntegratedVariableByte()),new IntegratedComposition(new IntegratedBinaryPacking(),
+ new IntegratedVariableByte()), data, max);
+ testCodec(new JustCopy(),new JustCopy(), data, max);
+ testCodec(new VariableByte(), new VariableByte(), data, max);
+ testCodec(new IntegratedVariableByte(),new IntegratedVariableByte(), data, max);
testCodec(new Composition(new BinaryPacking(), new VariableByte()),
+ new Composition(new BinaryPacking(), new VariableByte()),
data, max);
- testCodec(new Composition(new NewPFD(), new VariableByte()), data,
+ testCodec(new Composition(new NewPFD(), new VariableByte()),
+ new Composition(new NewPFD(), new VariableByte()), data,
max);
testCodec(new Composition(new NewPFDS9(), new VariableByte()),
+ new Composition(new NewPFDS9(), new VariableByte()),
data, max);
testCodec(new Composition(new NewPFDS16(), new VariableByte()),
+ new Composition(new NewPFDS16(), new VariableByte()),
data, max);
- testCodec(new Composition(new OptPFD(), new VariableByte()), data,
+ testCodec(new Composition(new OptPFD(), new VariableByte()),
+ new Composition(new OptPFD(), new VariableByte()),data,
max);
testCodec(new Composition(new OptPFDS9(), new VariableByte()),
+ new Composition(new OptPFDS9(), new VariableByte()),
data, max);
testCodec(new Composition(new OptPFDS16(), new VariableByte()),
+ new Composition(new OptPFDS16(), new VariableByte()),
data, max);
testCodec(new Composition(new FastPFOR(), new VariableByte()),
+ new Composition(new FastPFOR(), new VariableByte()),
data, max);
- testCodec(new Simple9(), data, max);
+ testCodec(new Simple9(),new Simple9(), data, max);
}
}
- private static void testCodec(IntegerCODEC c, int[][] data, int max) {
+ private static void testCodec(IntegerCODEC c,IntegerCODEC co, int[][] data, int max) {
int N = data.length;
int maxlength = 0;
for (int k = 0; k < N; ++k) {
@@ -383,7 +400,7 @@ private static void testCodec(IntegerCODEC c, int[][] data, int max) {
inpos.set(0);
outpos.set(1);
buffer[0] = backupdata[0];
- c.uncompress(dataout, inpos, thiscompsize - 1, buffer, outpos);
+ co.uncompress(dataout, inpos, thiscompsize - 1, buffer, outpos);
if (!(c instanceof IntegratedIntegerCODEC))
Delta.fastinverseDelta(buffer);
@@ -517,5 +534,43 @@ public void testUnsorted3(IntegerCODEC codec) {
compressed.length, recovered, recoffset);
assertArrayEquals(data, recovered);
}
-
+
+
+ @Test
+ public void fastPforTest() {
+ // proposed by Stefan Ackermann (https://github.com/Stivo)
+ FastPFOR codec1 = new FastPFOR();
+ FastPFOR codec2 = new FastPFOR();
+ int N = 128;
+ int[] data = new int[N];
+ for (int i = 0; i < N; i++)
+ data[i] = 0;
+ data[126] = -1;
+ int[] comp = TestUtils.compress(codec1,
+ Arrays.copyOf(data, N));
+ int[] answer = TestUtils.uncompress(codec2, comp, N);
+ for (int k = 0; k < N; ++k)
+ if (answer[k] != data[k])
+ throw new RuntimeException(
+ "bug "+k+" "+answer[k]+" != "+data[k]);
+ }
+ @Test
+ public void ifastPforTest() {
+ // inspired by Stefan Ackermann (https://github.com/Stivo)
+ IntegratedFastPFOR codec1 = new IntegratedFastPFOR();
+ IntegratedFastPFOR codec2 = new IntegratedFastPFOR();
+ int N = 128;
+ int[] data = new int[N];
+ for (int i = 0; i < N; i++)
+ data[i] = 0;
+ data[126] = -1;
+ int[] comp = TestUtils.compress(codec1,
+ Arrays.copyOf(data, N));
+ int[] answer = TestUtils.uncompress(codec2, comp, N);
+ for (int k = 0; k < N; ++k)
+ if (answer[k] != data[k])
+ throw new RuntimeException(
+ "bug "+k+" "+answer[k]+" != "+data[k]);
+ }
+
}
Please sign in to comment.
Something went wrong with that request. Please try again.