Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Putting back the moved files.

  • Loading branch information...
commit d7769049c6b30590e19b834d3083b6268b7b849a 1 parent c93421b
@lemire authored
View
2  build.xml
@@ -14,7 +14,7 @@
<target name="Benchmark" depends="compile">
<java
classpath="target/classes"
- classname="me.lemire.integercompression.${benchmark.target}"
+ classname="me.lemire.integercompression.benchmarktools.${benchmark.target}"
fork="true">
<jvmarg value="-server" />
</java>
View
674 src/main/java/me/lemire/integercompression/benchmarktools/Benchmark.java
@@ -0,0 +1,674 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.integercompression.benchmarktools;
+
+import com.kamikaze.pfordelta.PForDelta;
+
+import me.lemire.integercompression.BinaryPacking;
+import me.lemire.integercompression.ByteIntegerCODEC;
+import me.lemire.integercompression.Composition;
+import me.lemire.integercompression.Delta;
+import me.lemire.integercompression.DeltaZigzagBinaryPacking;
+import me.lemire.integercompression.DeltaZigzagVariableByte;
+import me.lemire.integercompression.FastPFOR;
+import me.lemire.integercompression.IntWrapper;
+import me.lemire.integercompression.IntegerCODEC;
+import me.lemire.integercompression.IntegratedBinaryPacking;
+import me.lemire.integercompression.IntegratedByteIntegerCODEC;
+import me.lemire.integercompression.IntegratedComposition;
+import me.lemire.integercompression.IntegratedFastPFOR;
+import me.lemire.integercompression.IntegratedIntegerCODEC;
+import me.lemire.integercompression.IntegratedVariableByte;
+import me.lemire.integercompression.JustCopy;
+import me.lemire.integercompression.NewPFD;
+import me.lemire.integercompression.NewPFDS16;
+import me.lemire.integercompression.NewPFDS9;
+import me.lemire.integercompression.OptPFD;
+import me.lemire.integercompression.OptPFDS16;
+import me.lemire.integercompression.OptPFDS9;
+import me.lemire.integercompression.Simple9;
+import me.lemire.integercompression.VariableByte;
+import me.lemire.integercompression.XorBinaryPacking;
+import me.lemire.integercompression.synth.ClusteredDataGenerator;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintWriter;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+/**
+ *
+ * Simple class meant to compare the speed of different schemes.
+ *
+ * @author Daniel Lemire
+ *
+ */
+public class Benchmark {
+
+ /**
+ * Standard benchmark
+ *
+ * @param csvLog
+ * Writer for CSV log.
+ * @param c
+ * the codec
+ * @param data
+ * arrays of input data
+ * @param repeat
+ * How many times to repeat the test
+ * @param verbose
+ * whether to output result on screen
+ */
+ private static void testCodec(PrintWriter csvLog, int sparsity,
+ IntegerCODEC c, int[][] data, int repeat, boolean verbose) {
+ if (verbose) {
+ System.out.println("# " + c.toString());
+ System.out
+ .println("# bits per int, compress speed (mis), decompression speed (mis) ");
+ }
+
+ int N = data.length;
+
+ int totalSize = 0;
+ int maxLength = 0;
+ for (int k = 0; k < N; ++k) {
+ totalSize += data[k].length;
+ if (data[k].length > maxLength) {
+ maxLength = data[k].length;
+ }
+ }
+
+ // 4x + 1024 to account for the possibility of some negative
+ // compression.
+ int[] compressBuffer = new int[4 * maxLength + 1024];
+ int[] decompressBuffer = new int[maxLength + 1024];
+
+ // These variables hold time in microseconds (10^-6).
+ long compressTime = 0;
+ long decompressTime = 0;
+
+ int size = 0;
+ IntWrapper inpos = new IntWrapper();
+ IntWrapper outpos = new IntWrapper();
+
+ for (int r = 0; r < repeat; ++r) {
+ size = 0;
+ for (int k = 0; k < N; ++k) {
+ int[] backupdata = Arrays.copyOf(data[k],
+ data[k].length);
+
+ // compress data.
+ long beforeCompress = System.nanoTime() / 1000;
+ inpos.set(1);
+ outpos.set(0);
+ if (!(c instanceof IntegratedIntegerCODEC)) {
+ Delta.delta(backupdata);
+ }
+ c.compress(backupdata, inpos, backupdata.length
+ - inpos.get(), compressBuffer, outpos);
+ long afterCompress = System.nanoTime() / 1000;
+
+ // measure time of compression.
+ compressTime += afterCompress - beforeCompress;
+ final int thiscompsize = outpos.get() + 1;
+ size += thiscompsize;
+
+ // extract (uncompress) data
+ long beforeDecompress = System.nanoTime() / 1000;
+ inpos.set(0);
+ outpos.set(1);
+ decompressBuffer[0] = backupdata[0];
+ c.uncompress(compressBuffer, inpos,
+ thiscompsize - 1, decompressBuffer,
+ outpos);
+ if (!(c instanceof IntegratedIntegerCODEC))
+ Delta.fastinverseDelta(decompressBuffer);
+ long afterDecompress = System.nanoTime() / 1000;
+
+ // measure time of extraction (uncompression).
+ decompressTime += afterDecompress
+ - beforeDecompress;
+ if (outpos.get() != data[k].length)
+ throw new RuntimeException(
+ "we have a bug (diff length) "
+ + c + " expected "
+ + data[k].length
+ + " got "
+ + outpos.get());
+
+ // verify: compare original array with
+ // compressed and
+ // uncompressed.
+ for (int m = 0; m < outpos.get(); ++m) {
+ if (decompressBuffer[m] != data[k][m]) {
+ throw new RuntimeException(
+ "we have a bug (actual difference), expected "
+ + data[k][m]
+ + " found "
+ + decompressBuffer[m]
+ + " at " + m);
+ }
+ }
+ }
+ }
+
+ if (verbose) {
+ double bitsPerInt = size * 32.0 / totalSize;
+ long compressSpeed = totalSize * repeat
+ / (compressTime);
+ long decompressSpeed = totalSize * repeat
+ / (decompressTime);
+ System.out.println(String.format(
+ "\t%1$.2f\t%2$d\t%3$d", bitsPerInt,
+ compressSpeed, decompressSpeed));
+ csvLog.format("\"%1$s\",%2$d,%3$.2f,%4$d,%5$d\n",
+ c.toString(), sparsity, bitsPerInt,
+ compressSpeed, decompressSpeed);
+ csvLog.flush();
+ }
+ }
+
+ /**
+ * Standard benchmark byte byte-aligned schemes
+ *
+ * @param csvLog
+ * Writer for CSV log.
+ * @param c
+ * the codec
+ * @param data
+ * arrays of input data
+ * @param repeat
+ * How many times to repeat the test
+ * @param verbose
+ * whether to output result on screen
+ */
+ private static void testByteCodec(PrintWriter csvLog, int sparsity,
+ ByteIntegerCODEC c, int[][] data, int repeat, boolean verbose) {
+ if (verbose) {
+ System.out.println("# " + c.toString());
+ System.out
+ .println("# bits per int, compress speed (mis), decompression speed (mis) ");
+ }
+
+ int N = data.length;
+
+ int totalSize = 0;
+ int maxLength = 0;
+ for (int k = 0; k < N; ++k) {
+ totalSize += data[k].length;
+ if (data[k].length > maxLength) {
+ maxLength = data[k].length;
+ }
+ }
+
+ byte[] compressBuffer = new byte[8 * maxLength + 1024];
+ int[] decompressBuffer = new int[maxLength + 1024];
+
+ // These variables hold time in microseconds (10^-6).
+ long compressTime = 0;
+ long decompressTime = 0;
+
+ int size = 0;
+ IntWrapper inpos = new IntWrapper();
+ IntWrapper outpos = new IntWrapper();
+
+ for (int r = 0; r < repeat; ++r) {
+ size = 0;
+ for (int k = 0; k < N; ++k) {
+ int[] backupdata = Arrays.copyOf(data[k],
+ data[k].length);
+
+ // compress data.
+ long beforeCompress = System.nanoTime() / 1000;
+ inpos.set(1);
+ outpos.set(0);
+ if (!(c instanceof IntegratedByteIntegerCODEC)) {
+ Delta.delta(backupdata);
+ }
+ c.compress(backupdata, inpos, backupdata.length
+ - inpos.get(), compressBuffer, outpos);
+ long afterCompress = System.nanoTime() / 1000;
+
+ // measure time of compression.
+ compressTime += afterCompress - beforeCompress;
+ final int thiscompsize = outpos.get() + 1;
+ size += thiscompsize;
+
+ // extract (uncompress) data
+ long beforeDecompress = System.nanoTime() / 1000;
+ inpos.set(0);
+ outpos.set(1);
+ decompressBuffer[0] = backupdata[0];
+ c.uncompress(compressBuffer, inpos,
+ thiscompsize - 1, decompressBuffer,
+ outpos);
+ if (!(c instanceof IntegratedByteIntegerCODEC))
+ Delta.fastinverseDelta(decompressBuffer);
+ long afterDecompress = System.nanoTime() / 1000;
+
+ // measure time of extraction (uncompression).
+ decompressTime += afterDecompress
+ - beforeDecompress;
+ if (outpos.get() != data[k].length)
+ throw new RuntimeException(
+ "we have a bug (diff length) "
+ + c + " expected "
+ + data[k].length
+ + " got "
+ + outpos.get());
+
+ // verify: compare original array with
+ // compressed and
+ // uncompressed.
+ for (int m = 0; m < outpos.get(); ++m) {
+ if (decompressBuffer[m] != data[k][m]) {
+ throw new RuntimeException(
+ "we have a bug (actual difference), expected "
+ + data[k][m]
+ + " found "
+ + decompressBuffer[m]
+ + " at " + m);
+ }
+ }
+ }
+ }
+
+ if (verbose) {
+ double bitsPerInt = size * 8.0 / totalSize;
+ long compressSpeed = totalSize * repeat
+ / (compressTime);
+ long decompressSpeed = totalSize * repeat
+ / (decompressTime);
+ System.out.println(String.format(
+ "\t%1$.2f\t%2$d\t%3$d", bitsPerInt,
+ compressSpeed, decompressSpeed));
+ csvLog.format("\"%1$s\",%2$d,%3$.2f,%4$d,%5$d\n",
+ c.toString(), sparsity, bitsPerInt,
+ compressSpeed, decompressSpeed);
+ csvLog.flush();
+ }
+ }
+
+ /**
+ * Main method.
+ *
+ * @param args
+ * command-line arguments
+ * @throws FileNotFoundException
+ */
+ public static void main(String args[]) throws FileNotFoundException {
+ System.out
+ .println("# benchmark based on the ClusterData model from:");
+ System.out.println("# Vo Ngoc Anh and Alistair Moffat. ");
+ System.out.println("# Index compression using 64-bit words.");
+ System.out
+ .println("# Softw. Pract. Exper.40, 2 (February 2010), 131-147. ");
+ System.out.println();
+
+ PrintWriter writer = null;
+ try {
+ File csvFile = new File(
+ String.format(
+ "benchmark-%1$tY%1$tm%1$tdT%1$tH%1$tM%1$tS.csv",
+ System.currentTimeMillis()));
+ writer = new PrintWriter(csvFile);
+ System.out
+ .println("# Results will be written into a CSV file: "
+ + csvFile.getName());
+ System.out.println();
+ test(writer, 20, 18, 10);
+ System.out.println();
+ System.out
+ .println("Results were written into a CSV file: "
+ + csvFile.getName());
+ } finally {
+ if (writer != null) {
+ writer.close();
+ }
+ }
+ }
+
+ /**
+ * Standard test for the Kamikaze library
+ *
+ * @param data
+ * input data
+ * @param repeat
+ * how many times to repeat
+ * @param verbose
+ * whether to output data on screen
+ */
+ public static void testKamikaze(int[][] data, int repeat,
+ boolean verbose) {
+ DecimalFormat df = new DecimalFormat("0.00");
+ DecimalFormat dfspeed = new DecimalFormat("0");
+ if (verbose)
+ System.out.println("# kamikaze PForDelta");
+ if (verbose)
+ System.out
+ .println("# bits per int, compress speed (mis), decompression speed (mis) ");
+ long bef, aft;
+ String line = "";
+ int N = data.length;
+ int totalsize = 0;
+ int maxlength = 0;
+ for (int k = 0; k < N; ++k) {
+ totalsize += data[k].length;
+ if (data[k].length > maxlength)
+ maxlength = data[k].length;
+ }
+ int[] buffer = new int[maxlength + 1024];
+ /*
+ * 4x + 1024 to account for the possibility of some negative
+ * compression
+ */
+ int size = 0;
+ int comptime = 0;
+ long decomptime = 0;
+ for (int r = 0; r < repeat; ++r) {
+ size = 0;
+ for (int k = 0; k < N; ++k) {
+ int outpos = 0;
+ int[] backupdata = Arrays.copyOf(data[k],
+ data[k].length);
+ //
+ bef = System.nanoTime() / 1000;
+ Delta.delta(backupdata);
+ ArrayList<int[]> dataout = new ArrayList<int[]>(
+ data[k].length / 128);
+ for (int K = 0; K < data[k].length; K += 128) {
+ final int[] compressedbuf = PForDelta
+ .compressOneBlockOpt(Arrays
+ .copyOfRange(
+ backupdata, K,
+ K + 128), 128);
+ dataout.add(compressedbuf);
+ outpos += compressedbuf.length;
+ }
+ aft = System.nanoTime() / 1000;
+ //
+ comptime += aft - bef;
+ final int thiscompsize = outpos;
+ size += thiscompsize;
+ //
+ bef = System.nanoTime() / 1000;
+ // buffer[0] = backupdata[0];
+ ArrayList<int[]> datauncomp = new ArrayList<int[]>(
+ dataout.size());
+ int deltaoffset = 0;
+
+ for (int[] compbuf : dataout) {
+ int[] tmpbuf = new int[128];
+ PForDelta.decompressOneBlock(tmpbuf,
+ compbuf, 128);
+ tmpbuf[0] += deltaoffset;
+ Delta.fastinverseDelta(tmpbuf);
+ deltaoffset = tmpbuf[127];
+ datauncomp.add(tmpbuf);
+ }
+ aft = System.nanoTime() / 1000;
+ //
+ decomptime += aft - bef;
+ if (datauncomp.size() * 128 != data[k].length)
+ throw new RuntimeException(
+ "we have a bug (diff length) "
+ + " expected "
+ + data[k].length
+ + " got "
+ + datauncomp.size()
+ * 128);
+ for (int m = 0; m < data[k].length; ++m)
+ if (datauncomp.get(m / 128)[m % 128] != data[k][m]) {
+ throw new RuntimeException(
+ "we have a bug (actual difference), expected "
+ + data[k][m]
+ + " found "
+ + buffer[m]
+ + " at " + m);
+ }
+
+ }
+ }
+
+ line += "\t" + df.format(size * 32.0 / totalsize);
+ line += "\t" + dfspeed.format(totalsize * repeat / (comptime));
+ line += "\t"
+ + dfspeed.format(totalsize * repeat / (decomptime));
+ if (verbose)
+ System.out.println(line);
+ }
+
+ /**
+ * Generate test data.
+ *
+ * @param N
+ * How many input arrays to generate
+ * @param nbr
+ * How big (in log2) should the arrays be
+ * @param sparsity
+ * How sparse test data generated
+ */
+ private static int[][] generateTestData(ClusteredDataGenerator dataGen,
+ int N, int nbr, int sparsity) {
+ final int[][] data = new int[N][];
+ final int dataSize = (1 << (nbr + sparsity));
+ for (int i = 0; i < N; ++i) {
+ data[i] = dataGen.generateClustered((1 << nbr),
+ dataSize);
+ }
+ return data;
+ }
+
+ /**
+ * Generates data and calls other tests.
+ *
+ * @param csvLog
+ * Writer for CSV log.
+ * @param N
+ * How many input arrays to generate
+ * @param nbr
+ * how big (in log2) should the arrays be
+ * @param repeat
+ * How many times should we repeat tests.
+ */
+ private static void test(PrintWriter csvLog, int N, int nbr, int repeat) {
+ csvLog.format("\"Algorithm\",\"Sparsity\",\"Bits per int\",\"Compress speed (MiS)\",\"Decompress speed (MiS)\"\n");
+ ClusteredDataGenerator cdg = new ClusteredDataGenerator();
+ final int max_sparsity = 31 - nbr;
+ for (int sparsity = 1; sparsity < max_sparsity; ++sparsity) {
+ System.out.println("# sparsity " + sparsity);
+ System.out.println("# generating random data...");
+ int[][] data = generateTestData(cdg, N, nbr, sparsity);
+ System.out.println("# generating random data... ok.");
+
+ // TODO: support CSV log output.
+ testKamikaze(data, repeat, false);
+ testKamikaze(data, repeat, false);
+ testKamikaze(data, repeat, true);
+ System.out.println();
+
+ testCodec(csvLog, sparsity, new IntegratedComposition(
+ new IntegratedBinaryPacking(),
+ new IntegratedVariableByte()), data, repeat,
+ false);
+ testCodec(csvLog, sparsity, new IntegratedComposition(
+ new IntegratedBinaryPacking(),
+ new IntegratedVariableByte()), data, repeat,
+ false);
+ testCodec(csvLog, sparsity, new IntegratedComposition(
+ new IntegratedBinaryPacking(),
+ new IntegratedVariableByte()), data, repeat,
+ true);
+ System.out.println();
+
+ testCodec(csvLog, sparsity, new JustCopy(), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new JustCopy(), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new JustCopy(), data,
+ repeat, true);
+ System.out.println();
+
+ testByteCodec(csvLog, sparsity, new VariableByte(),
+ data, repeat, false);
+ testByteCodec(csvLog, sparsity, new VariableByte(),
+ data, repeat, false);
+ testByteCodec(csvLog, sparsity, new VariableByte(),
+ data, repeat, true);
+ System.out.println();
+
+ testByteCodec(csvLog, sparsity,
+ new IntegratedVariableByte(), data, repeat,
+ false);
+ testByteCodec(csvLog, sparsity,
+ new IntegratedVariableByte(), data, repeat,
+ false);
+ testByteCodec(csvLog, sparsity,
+ new IntegratedVariableByte(), data, repeat,
+ true);
+ System.out.println();
+
+ testCodec(csvLog, sparsity, new Composition(
+ new BinaryPacking(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new BinaryPacking(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new BinaryPacking(), new VariableByte()), data,
+ repeat, true);
+ System.out.println();
+
+ testCodec(csvLog, sparsity, new Composition(
+ new NewPFD(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new NewPFD(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new NewPFD(), new VariableByte()), data,
+ repeat, true);
+ System.out.println();
+
+ testCodec(csvLog, sparsity, new Composition(
+ new NewPFDS9(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new NewPFDS9(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new NewPFDS9(), new VariableByte()), data,
+ repeat, true);
+ System.out.println();
+
+ testCodec(csvLog, sparsity, new Composition(
+ new NewPFDS16(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new NewPFDS16(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new NewPFDS16(), new VariableByte()), data,
+ repeat, true);
+ System.out.println();
+
+ testCodec(csvLog, sparsity, new Composition(
+ new OptPFD(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new OptPFD(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new OptPFD(), new VariableByte()), data,
+ repeat, true);
+ System.out.println();
+
+ testCodec(csvLog, sparsity, new Composition(
+ new OptPFDS9(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new OptPFDS9(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new OptPFDS9(), new VariableByte()), data,
+ repeat, true);
+ System.out.println();
+
+ testCodec(csvLog, sparsity, new Composition(
+ new OptPFDS16(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new OptPFDS16(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new OptPFDS16(), new VariableByte()), data,
+ repeat, true);
+ System.out.println();
+
+ {
+ IntegerCODEC c = new IntegratedComposition(
+ new IntegratedFastPFOR(),
+ new IntegratedVariableByte());
+ testCodec(csvLog, sparsity, c, data, repeat,
+ false);
+ testCodec(csvLog, sparsity, c, data, repeat,
+ false);
+ testCodec(csvLog, sparsity, c, data, repeat,
+ true);
+ System.out.println();
+ }
+
+ testCodec(csvLog, sparsity, new Composition(
+ new FastPFOR(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new FastPFOR(), new VariableByte()), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Composition(
+ new FastPFOR(), new VariableByte()), data,
+ repeat, true);
+ System.out.println();
+
+ testCodec(csvLog, sparsity, new Simple9(), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Simple9(), data,
+ repeat, false);
+ testCodec(csvLog, sparsity, new Simple9(), data,
+ repeat, true);
+ System.out.println();
+
+ {
+ IntegerCODEC c = new Composition(
+ new XorBinaryPacking(),
+ new VariableByte());
+ testCodec(csvLog, sparsity, c, data, repeat,
+ false);
+ testCodec(csvLog, sparsity, c, data, repeat,
+ false);
+ testCodec(csvLog, sparsity, c, data, repeat,
+ true);
+ System.out.println();
+ }
+
+ {
+ IntegerCODEC c = new Composition(
+ new DeltaZigzagBinaryPacking(),
+ new DeltaZigzagVariableByte());
+ testCodec(csvLog, sparsity, c, data, repeat,
+ false);
+ testCodec(csvLog, sparsity, c, data, repeat,
+ false);
+ testCodec(csvLog, sparsity, c, data, repeat,
+ true);
+ System.out.println();
+ }
+
+ }
+ }
+}
View
154 src/main/java/me/lemire/integercompression/benchmarktools/BenchmarkBitPacking.java
@@ -0,0 +1,154 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.integercompression.benchmarktools;
+
+import java.text.DecimalFormat;
+import java.util.Arrays;
+import java.util.Random;
+
+import me.lemire.integercompression.BitPacking;
+import me.lemire.integercompression.Delta;
+import me.lemire.integercompression.IntegratedBitPacking;
+
+/**
+ * Class used to benchmark the speed of bit packing. (For expert use.)
+ *
+ * @author Daniel Lemire
+ *
+ */
+public class BenchmarkBitPacking {
+
+ private static void test(boolean verbose) {
+ DecimalFormat dfspeed = new DecimalFormat("0");
+ final int N = 32;
+ final int times = 100000;
+ Random r = new Random(0);
+ int[] data = new int[N];
+ int[] compressed = new int[N];
+ int[] uncompressed = new int[N];
+ for (int bit = 0; bit < 31; ++bit) {
+ long comp = 0;
+ long compwm = 0;
+ long decomp = 0;
+ for (int t = 0; t < times; ++t) {
+ for (int k = 0; k < N; ++k) {
+ data[k] = r.nextInt(1 << bit);
+ }
+ long time1 = System.nanoTime();
+ BitPacking
+ .fastpack(data, 0, compressed, 0, bit);
+ long time2 = System.nanoTime();
+ BitPacking.fastpackwithoutmask(data, 0,
+ compressed, 0, bit);
+ long time3 = System.nanoTime();
+ BitPacking.fastunpack(compressed, 0,
+ uncompressed, 0, bit);
+ long time4 = System.nanoTime();
+ comp += time2 - time1;
+ compwm += time3 - time2;
+ decomp += time4 - time3;
+ }
+ if (verbose)
+ System.out.println("bit = "
+ + bit
+ + " comp. speed = "
+ + dfspeed.format(N * times * 1000.0
+ / (comp))
+ + " comp. speed wm = "
+ + dfspeed.format(N * times * 1000.0
+ / (compwm))
+ + " decomp. speed = "
+ + dfspeed.format(N * times * 1000.0
+ / (decomp)));
+ }
+ }
+
+ private static void testWithDeltas(boolean verbose) {
+ DecimalFormat dfspeed = new DecimalFormat("0");
+ final int N = 32;
+ final int times = 100000;
+ Random r = new Random(0);
+ int[] data = new int[N];
+ int[] compressed = new int[N];
+ int[] icompressed = new int[N];
+ int[] uncompressed = new int[N];
+ for (int bit = 1; bit < 31; ++bit) {
+ long comp = 0;
+ long decomp = 0;
+ long icomp = 0;
+ long idecomp = 0;
+ for (int t = 0; t < times; ++t) {
+ data[0] = r.nextInt(1 << bit);
+ for (int k = 1; k < N; ++k) {
+ data[k] = r.nextInt(1 << bit)
+ + data[k - 1];
+ }
+ int[] tmpdata = Arrays
+ .copyOf(data, data.length);
+ long time1 = System.nanoTime();
+ Delta.delta(tmpdata);
+ BitPacking.fastpackwithoutmask(tmpdata, 0,
+ compressed, 0, bit);
+ long time2 = System.nanoTime();
+ BitPacking.fastunpack(compressed, 0,
+ uncompressed, 0, bit);
+ Delta.fastinverseDelta(uncompressed);
+ long time3 = System.nanoTime();
+ if (!Arrays.equals(data, uncompressed))
+ throw new RuntimeException("bug");
+ comp += time2 - time1;
+ decomp += time3 - time2;
+ tmpdata = Arrays.copyOf(data, data.length);
+ time1 = System.nanoTime();
+ IntegratedBitPacking.integratedpack(0, tmpdata,
+ 0, icompressed, 0, bit);
+ time2 = System.nanoTime();
+ IntegratedBitPacking.integratedunpack(0,
+ icompressed, 0, uncompressed, 0, bit);
+ time3 = System.nanoTime();
+ if (!Arrays.equals(icompressed, compressed))
+ throw new RuntimeException("ibug "
+ + bit);
+ if (!Arrays.equals(data, uncompressed))
+ throw new RuntimeException("bug " + bit);
+ icomp += time2 - time1;
+ idecomp += time3 - time2;
+ }
+ if (verbose)
+ System.out.println("bit = "
+ + bit
+ + " comp. speed = "
+ + dfspeed.format(N * times * 1000.0
+ / (comp))
+ + " decomp. speed = "
+ + dfspeed.format(N * times * 1000.0
+ / (decomp))
+ + " icomp. speed = "
+ + dfspeed.format(N * times * 1000.0
+ / (icomp))
+ + " idecomp. speed = "
+ + dfspeed.format(N * times * 1000.0
+ / (idecomp)));
+ }
+ }
+
+ /**
+ * Main method
+ *
+ * @param args
+ * command-line arguments
+ */
+ public static void main(String[] args) {
+ System.out.println("Testing packing and delta ");
+ testWithDeltas(false);
+ testWithDeltas(true);
+ System.out.println("Testing packing alone ");
+ test(false);
+ test(true);
+ }
+
+}
View
294 src/main/java/me/lemire/integercompression/benchmarktools/BenchmarkOffsettedSeries.java
@@ -0,0 +1,294 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ */
+package me.lemire.integercompression.benchmarktools;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.util.Arrays;
+import java.util.Random;
+
+import me.lemire.integercompression.BinaryPacking;
+import me.lemire.integercompression.DeltaZigzagBinaryPacking;
+import me.lemire.integercompression.DeltaZigzagVariableByte;
+import me.lemire.integercompression.FastPFOR;
+import me.lemire.integercompression.IntWrapper;
+import me.lemire.integercompression.IntegerCODEC;
+import me.lemire.integercompression.IntegratedBinaryPacking;
+import me.lemire.integercompression.JustCopy;
+import me.lemire.integercompression.XorBinaryPacking;
+
+/**
+ * Simple synthetic benchmark
+ *
+ */
+public class BenchmarkOffsettedSeries {
+ private static final int DEFAULT_MEAN = 1 << 20;
+ private static final int DEFAULT_RANGE = 1 << 10;
+ private static final int DEFAULT_REPEAT = 5;
+ private static final int DEFAULT_WARMUP = 2;
+
+
+ /**
+ * Run benchmark.
+ *
+ * @param csvWriter
+ * Write for results in CSV.
+ * @param count
+ * Count of data chunks.
+ * @param length
+ * Length of a data chunk.
+ */
+ public static void run(final PrintWriter csvWriter, final int count, final int length) {
+ IntegerCODEC[] codecs = { new JustCopy(), new BinaryPacking(),
+ new DeltaZigzagBinaryPacking(),
+ new DeltaZigzagVariableByte(),
+ new IntegratedBinaryPacking(), new XorBinaryPacking(),
+ new FastPFOR(), };
+
+ csvWriter
+ .format("\"Dataset\",\"CODEC\",\"Bits per int\","
+ + "\"Compress speed (MiS)\",\"Decompress speed (MiS)\"\n");
+
+ benchmark(csvWriter, codecs, count, length, DEFAULT_MEAN,
+ DEFAULT_RANGE);
+ benchmark(csvWriter, codecs, count, length, DEFAULT_MEAN >> 5,
+ DEFAULT_RANGE);
+
+ IntegerCODEC[] codecs2 = { new JustCopy(), new BinaryPacking(),
+ new DeltaZigzagBinaryPacking(),
+ new DeltaZigzagVariableByte(),
+ new IntegratedBinaryPacking(), new XorBinaryPacking(),
+ new FastPFOR(), };
+
+ int freq = length / 4;
+ benchmarkSine(csvWriter, codecs2, count, length,
+ DEFAULT_MEAN >> 0, DEFAULT_RANGE >> 0, freq);
+ benchmarkSine(csvWriter, codecs2, count, length,
+ DEFAULT_MEAN >> 5, DEFAULT_RANGE >> 0, freq);
+ benchmarkSine(csvWriter, codecs2, count, length,
+ DEFAULT_MEAN >> 10, DEFAULT_RANGE >> 0, freq);
+ benchmarkSine(csvWriter, codecs2, count, length,
+ DEFAULT_MEAN >> 0, DEFAULT_RANGE >> 2, freq);
+ benchmarkSine(csvWriter, codecs2, count, length,
+ DEFAULT_MEAN >> 5, DEFAULT_RANGE >> 2, freq);
+ benchmarkSine(csvWriter, codecs2, count, length,
+ DEFAULT_MEAN >> 10, DEFAULT_RANGE >> 2, freq);
+ benchmarkSine(csvWriter, codecs2, count, length,
+ DEFAULT_MEAN >> 0, DEFAULT_RANGE >> 4, freq);
+ benchmarkSine(csvWriter, codecs2, count, length,
+ DEFAULT_MEAN >> 5, DEFAULT_RANGE >> 4, freq);
+ benchmarkSine(csvWriter, codecs2, count, length,
+ DEFAULT_MEAN >> 10, DEFAULT_RANGE >> 4, freq);
+ }
+
+ private static void benchmarkSine(final PrintWriter csvWriter,
+ final IntegerCODEC[] codecs, final int count, final int length, final int mean,
+ final int range, final int freq) {
+ String dataProp = String.format(
+ "(mean=%1$d range=%2$d freq=%2$d)", mean, range, freq);
+ int[][] data = generateSineDataChunks(0, count, length, mean,
+ range, freq);
+ benchmark(csvWriter, "Sine " + dataProp, codecs, data,
+ DEFAULT_REPEAT, DEFAULT_WARMUP);
+ benchmark(csvWriter, "Sine+delta " + dataProp, codecs, data,
+ DEFAULT_REPEAT, DEFAULT_WARMUP);
+ }
+
+ private static void benchmark(final PrintWriter csvWriter, final IntegerCODEC[] codecs,
+ final int count, final int length, final int mean, final int range) {
+ String dataProp = String.format("(mean=%1$d range=%2$d)", mean,
+ range);
+
+ int[][] randData = generateDataChunks(0, count, length, mean,
+ range);
+ int[][] deltaData = deltaDataChunks(randData);
+ int[][] sortedData = sortDataChunks(randData);
+ int[][] sortedDeltaData = deltaDataChunks(sortedData);
+
+ benchmark(csvWriter, "Random " + dataProp, codecs, randData,
+ DEFAULT_REPEAT, DEFAULT_WARMUP);
+ benchmark(csvWriter, "Random+delta " + dataProp, codecs,
+ deltaData, DEFAULT_REPEAT, DEFAULT_WARMUP);
+ benchmark(csvWriter, "Sorted " + dataProp, codecs, sortedData,
+ DEFAULT_REPEAT, DEFAULT_WARMUP);
+ benchmark(csvWriter, "Sorted+delta " + dataProp, codecs,
+ sortedDeltaData, DEFAULT_REPEAT, DEFAULT_WARMUP);
+ }
+
+ private static void benchmark(final PrintWriter csvWriter, final String dataName,
+ final IntegerCODEC[] codecs, final int[][] data, final int repeat, final int warmup) {
+ System.out.println("Processing: " + dataName);
+ for (IntegerCODEC codec : codecs) {
+ String codecName = codec.toString();
+ for (int i = 0; i < warmup; ++i) {
+ benchmark(null, null, null, codec, data, repeat);
+ }
+ benchmark(csvWriter, dataName, codecName, codec, data,
+ repeat);
+ }
+ }
+
+ private static void benchmark(PrintWriter csvWriter, String dataName,
+ String codecName, IntegerCODEC codec, int[][] data, int repeat) {
+ PerformanceLogger logger = new PerformanceLogger();
+
+ int maxLen = getMaxLen(data);
+ int[] compressBuffer = new int[4 * maxLen + 1024];
+ int[] decompressBuffer = new int[maxLen];
+
+ for (int i = 0; i < repeat; ++i) {
+ for (int[] array : data) {
+ int compSize = compress(logger, codec, array,
+ compressBuffer);
+ int decompSize = decompress(logger, codec,
+ compressBuffer, compSize,
+ decompressBuffer);
+ checkArray(array, decompressBuffer, decompSize,
+ codec);
+ }
+ }
+
+ if (csvWriter != null) {
+ csvWriter.format(
+ "\"%1$s\",\"%2$s\",%3$.2f,%4$.0f,%5$.0f\n",
+ dataName, codecName, logger.getBitPerInt(),
+ logger.getCompressSpeed(),
+ logger.getDecompressSpeed());
+ }
+ }
+
+ private static void checkArray(int[] expected, int[] actualArray,
+ int actualLen, IntegerCODEC codec) {
+ if (actualLen != expected.length) {
+ throw new RuntimeException("Length mismatch:"
+ + " expected=" + expected.length + " actual="
+ + actualLen + " codec=" + codec.toString());
+ }
+ for (int i = 0; i < expected.length; ++i) {
+ if (actualArray[i] != expected[i]) {
+ throw new RuntimeException("Value mismatch: "
+ + " where=" + i + " expected="
+ + expected[i] + " actual="
+ + actualArray[i] + " codec="
+ + codec.toString());
+ }
+ }
+ }
+
+ private static int compress(PerformanceLogger logger, IntegerCODEC codec,
+ int[] src, int[] dst) {
+ IntWrapper inpos = new IntWrapper();
+ IntWrapper outpos = new IntWrapper();
+ logger.compressionTimer.start();
+ codec.compress(src, inpos, src.length, dst, outpos);
+ logger.compressionTimer.end();
+ int outSize = outpos.get();
+ logger.addOriginalSize(src.length);
+ logger.addCompressedSize(outSize);
+ return outSize;
+ }
+
+ private static int decompress(PerformanceLogger logger, IntegerCODEC codec,
+ int[] src, int srcLen, int[] dst) {
+ IntWrapper inpos = new IntWrapper();
+ IntWrapper outpos = new IntWrapper();
+ logger.decompressionTimer.start();
+ codec.uncompress(src, inpos, srcLen, dst, outpos);
+ logger.decompressionTimer.end();
+ return outpos.get();
+ }
+
+ private static int getMaxLen(int[][] data) {
+ int maxLen = 0;
+ for (int[] array : data) {
+ if (array.length > maxLen) {
+ maxLen = array.length;
+ }
+ }
+ return maxLen;
+ }
+
+ private static int[][] generateSineDataChunks(long seed, int count,
+ int length, int mean, int range, int freq) {
+ int[][] chunks = new int[count][];
+ Random r = new Random(seed);
+ for (int i = 0; i < count; ++i) {
+ int[] chunk = chunks[i] = new int[length];
+ int phase = r.nextInt(2 * freq);
+ for (int j = 0; j < length; ++j) {
+ double angle = 2.0 * Math.PI * (j + phase)
+ / freq;
+ chunk[j] = (int) (mean + Math.sin(angle)
+ * range);
+ }
+ }
+ return chunks;
+ }
+
+ private static int[][] generateDataChunks(long seed, int count,
+ int length, int mean, int range) {
+ int offset = mean - range / 2;
+ int[][] chunks = new int[count][];
+ Random r = new Random(seed);
+ for (int i = 0; i < count; ++i) {
+ int[] chunk = chunks[i] = new int[length];
+ for (int j = 0; j < length; ++j) {
+ chunk[j] = r.nextInt(range) + offset;
+ }
+ }
+ return chunks;
+ }
+
+ private static int[][] deltaDataChunks(int[][] src) {
+ int[][] dst = new int[src.length][];
+ for (int i = 0; i < src.length; ++i) {
+ int[] s = src[i];
+ int[] d = dst[i] = new int[s.length];
+ int prev = 0;
+ for (int j = 0; j < s.length; ++j) {
+ d[j] = s[j] - prev;
+ prev = s[j];
+ }
+ }
+ return dst;
+ }
+
+ private static int[][] sortDataChunks(int[][] src) {
+ int[][] dst = new int[src.length][];
+ for (int i = 0; i < src.length; ++i) {
+ dst[i] = Arrays.copyOf(src[i], src[i].length);
+ Arrays.sort(dst[i]);
+ }
+ return dst;
+ }
+
+ /**
+ * @param args
+ * @throws Exception
+ */
+ public static void main(final String[] args) throws Exception {
+ File csvFile = new File(
+ String.format(
+ "benchmark-offsetted-%1$tY%1$tm%1$tdT%1$tH%1$tM%1$tS.csv",
+ System.currentTimeMillis()));
+ PrintWriter writer = null;
+ try {
+ writer = new PrintWriter(csvFile);
+ System.out
+ .println("# Results will be written into a CSV file: "
+ + csvFile.getName());
+ System.out.println();
+ BenchmarkOffsettedSeries.run(writer, 8 * 1024, 1280);
+ System.out.println();
+ System.out
+ .println("# Results were written into a CSV file: "
+ + csvFile.getName());
+ } finally {
+ if (writer != null) {
+ writer.close();
+ }
+ }
+ }
+}
View
75 src/main/java/me/lemire/integercompression/benchmarktools/PerformanceLogger.java
@@ -0,0 +1,75 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ */
+package me.lemire.integercompression.benchmarktools;
+
+/**
+ * PerformanceLogger for IntegerCODEC.
+ *
+ * @author MURAOKA Taro http://github.com/koron
+ */
+public final class PerformanceLogger {
+ static class Timer {
+ private long startNano;
+ private long duration = 0;
+
+ public Timer() {
+ }
+
+ public void start() {
+ this.startNano = System.nanoTime();
+ }
+
+ public long end() {
+ return this.duration += System.nanoTime()
+ - this.startNano;
+ }
+
+ public long getDuration() {
+ return this.duration;
+ }
+ }
+
+ final Timer compressionTimer = new Timer();
+
+ final Timer decompressionTimer = new Timer();
+
+ private long originalSize = 0;
+
+ private long compressedSize = 0;
+
+ long addOriginalSize(long value) {
+ return this.originalSize += value;
+ }
+
+ long addCompressedSize(long value) {
+ return this.compressedSize += value;
+ }
+
+ long getOriginalSize() {
+ return this.originalSize;
+ }
+
+ long getCompressedSize() {
+ return this.compressedSize;
+ }
+
+ double getBitPerInt() {
+ return this.compressedSize * 32.0 / this.originalSize;
+ }
+
+ private static double getMiS(long size, long nanoTime) {
+ return (size * 1e-6) / (nanoTime * 1.0e-9);
+ }
+
+ double getCompressSpeed() {
+ return getMiS(this.originalSize,
+ this.compressionTimer.getDuration());
+ }
+
+ double getDecompressSpeed() {
+ return getMiS(this.originalSize,
+ this.decompressionTimer.getDuration());
+ }
+}
Please sign in to comment.
Something went wrong with that request. Please try again.