Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Implemented compression - for now, the 'Prefix-copy' and 'Offset-copy…

…' chunk methods.
  • Loading branch information...
commit 811c0f57ab781206e8e18e9e5b8aca07991032eb 1 parent 15a5eba
@eriksoe eriksoe authored
View
162 src/main/java/com/trifork/deltazip/DeltaZip.java
@@ -8,9 +8,12 @@
import java.nio.channels.WritableByteChannel;
import java.nio.channels.Channels;
import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
import java.io.OutputStream;
import java.io.IOException;
+import java.util.ArrayList;
+
public class DeltaZip {
//==================== Constants =======================================
@@ -65,7 +68,8 @@ public DeltaZip(Access access) throws IOException {
/** Get the revision pointed to by the cursor. */
public ByteBuffer get() {
- return exposed_current_version;
+ return (exposed_current_version==null) ? null
+ : exposed_current_version.duplicate();
}
/** Tells whether there are older revisions. */
@@ -95,7 +99,7 @@ public AppendSpecification add(ByteBuffer new_version) throws IOException {
}
pack_uncompressed(new_version, baos);
- new_version.position(save_pos);
+ new_version.position(save_pos); // Restore as-was.
return new AppendSpecification(current_pos, baos.toByteArray());
}
@@ -184,7 +188,48 @@ public void compress(ByteBuffer org, byte[] ref_data, OutputStream dst) {
private static final int CHUNK_METHOD_OFFSET_COPY = 2;
public void compress(ByteBuffer org, byte[] ref_data, OutputStream dst) {
- //TODO
+ try {
+ ArrayList<ChunkOption> chunk_options = new ArrayList<ChunkOption>();
+ DataOutputStream dos = new DataOutputStream(dst);
+
+ int ref_data_offset = 0;
+ while (org.hasRemaining()) {
+ System.err.println("DB| Chunking from ("+org.position()+","+ref_data_offset+")");
+ System.err.println("DB| Remaining: ("+org.remaining()+","+(ref_data.length - ref_data_offset)+")");
+ chunk_options.clear();
+
+ int save_pos = org.position();
+ addIfApplicable(chunk_options, PrefixChunkOption.create(org, ref_data, ref_data_offset));
+ org.position(save_pos);
+ addIfApplicable(chunk_options, SuffixChunkOption.create(org, ref_data, ref_data_offset));
+ org.position(save_pos);
+
+ ChunkOption chunk_option = findBestCandidate(chunk_options);
+ chunk_option.write(dos);
+
+ System.err.println("DB| setting pos: "+org.position()+", "+org.limit()+", "+(save_pos + chunk_option.uncomp_size));
+ org.position(save_pos + chunk_option.uncomp_size);
+ ref_data_offset += chunk_option.rskip;
+ }
+ } catch (IOException ioe) {throw new RuntimeException(ioe);}
+ }
+
+ protected static void addIfApplicable(ArrayList<ChunkOption> list, ChunkOption option) {
+ if (option != null) list.add(option);
+ }
+
+ protected static ChunkOption findBestCandidate(Iterable<ChunkOption> chunk_options) {
+ double best_ratio = Double.MAX_VALUE;
+ ChunkOption best_candidate = null;
+ for (ChunkOption co : chunk_options) {
+ double candidate_ratio = co.ratio();
+ if (candidate_ratio < best_ratio) {
+ best_candidate = co;
+ best_ratio = candidate_ratio;
+ }
+ }
+ System.err.println("DB| choosing chunk option "+best_candidate+" with ratio "+best_ratio);
+ return best_candidate;
}
public byte[] uncompress(ByteBuffer org, byte[] ref_data, Inflater inflater) throws IOException {
@@ -256,7 +301,116 @@ protected static void inflate(Inflater inflater, ByteBuffer src, int comp_length
channel.write(src2);
ios.finish();
}
- }
+
+ static abstract class ChunkOption {
+ public final int comp_size, uncomp_size;
+ public final int rskip;
+
+ public ChunkOption(int comp_size, int uncomp_size, int rskip) {
+ this.comp_size = comp_size;
+ this.uncomp_size = uncomp_size;
+ this.rskip = rskip;
+ }
+
+ public double ratio() {
+ final int OVERHEAD_PENALTY_BYTES = 30;
+ return (comp_size + OVERHEAD_PENALTY_BYTES) / uncomp_size;
+ }
+
+ public final void write(DataOutputStream dos) throws IOException {
+ dos.write(chunkMethod());
+ dos.writeChar(comp_size);
+ writeCompData(dos);
+ }
+
+ public abstract int chunkMethod();
+ public abstract void writeCompData(DataOutputStream dos) throws IOException;
+ }
+
+ static class PrefixChunkOption extends ChunkOption {
+ static final int SIZE_LIMIT = (1<<16);
+
+ public static PrefixChunkOption create(ByteBuffer data, byte[] ref_data, int ref_data_offset) {
+ int start_pos = data.position();
+ int limit = Math.min(SIZE_LIMIT,
+ Math.min(data.remaining(), ref_data.length - ref_data_offset));
+ int i = 0;
+ if (limit>0) {
+ System.err.println("PrefixChunkOption.create(): first data byte is "+data.get(start_pos));
+ System.err.println("PrefixChunkOption.create(): first ref byte is "+ref_data[ref_data_offset]);
+ }
+ while (i < limit &&
+ data.get(start_pos + i) == ref_data[ref_data_offset + i]) {
+ i++;
+ }
+ int prefix_length = i;
+
+ return (prefix_length <= 0) ? null
+ : new PrefixChunkOption(prefix_length);
+ }
+
+ public PrefixChunkOption(int prefix_length) {
+ // Size of chunk contents is 2 bytes.
+ super(2, prefix_length, prefix_length);
+ }
+
+ public int chunkMethod() {return CHUNK_METHOD_PREFIX_COPY << 3;}
+
+ public void writeCompData(DataOutputStream dos) throws IOException {
+ int prefix_length = this.uncomp_size;
+ dos.writeShort((short)(prefix_length-1));
+ }
+
+ public String toString() {return "PrefixChunkOption("+uncomp_size+")";}
+ }
+
+ static class SuffixChunkOption extends ChunkOption {
+ static final int SIZE_LIMIT = (1<<16);
+
+ public static SuffixChunkOption create(ByteBuffer data, byte[] ref_data, int ref_data_offset) {
+ int end_pos = data.limit();
+ int remaining_data = data.remaining();
+ int remaining_ref = ref_data.length - ref_data_offset;
+ int offset = remaining_ref - remaining_data;
+
+ if (offset <= 0 || offset > SIZE_LIMIT) return null;
+
+ int limit = Math.min(remaining_data, remaining_ref);
+ int i = 0;
+ while (i < limit &&
+ data.get(end_pos - 1 - i) == ref_data[ref_data.length - 1 - i]) {
+ i++;
+ }
+ int suffix_length = i;
+ if (suffix_length < remaining_data) return null; // First part of data is not covered.
+ suffix_length = Math.min(suffix_length, SIZE_LIMIT);
+
+ return (suffix_length <= 0) ? null
+ : new SuffixChunkOption(offset, suffix_length);
+ }
+
+ public SuffixChunkOption(int offset, int suffix_length) {
+ // Size of chunk contents is 4 bytes.
+ super(4, suffix_length, offset+suffix_length);
+ }
+
+ public int chunkMethod() {return CHUNK_METHOD_OFFSET_COPY << 3;}
+
+ public void writeCompData(DataOutputStream dos) throws IOException {
+ int suffix_length = this.uncomp_size;
+ int offset = this.rskip - suffix_length;
+ dos.writeShort((short)(offset-1));
+ dos.writeShort((short)(suffix_length-1));
+ }
+
+ public String toString() {
+ int suffix_length = this.uncomp_size;
+ int offset = this.rskip - suffix_length;
+ return "SuffixChunkOption("+offset+","+suffix_length+")";
+ }
+ }
+
+ }// class ChunkedMethod
public static byte[] toByteArray(ByteBuffer org) {
if (org.hasArray()) return org.array();
View
12 src/test/java/com/trifork/deltazip/DeltaZipTest.java
@@ -64,11 +64,17 @@ public void test_two_revs_with(byte[] file, ByteBuffer exp_rev1, ByteBuffer exp_
@Test
public void test_add_get() throws Exception {
ByteBuffer rev1 = ByteBuffer.wrap("Hello".getBytes("ISO-8859-1"));
+ ByteBuffer rev1a = ByteBuffer.wrap("World!".getBytes("ISO-8859-1"));
ByteBuffer rev2 = ByteBuffer.wrap("Hello, World!".getBytes("ISO-8859-1"));
- System.err.println("DB| rev1: "+rev1);
- System.err.println("DB| rev1: "+rev1);
+
byte[] file0 = new byte[] {};
+ test_add_get_with(file0, rev1,rev2); // Would use 'prefix-copy' chunk.
+ test_add_get_with(file0, rev1a,rev2); // Would use 'offset-copy' chunk.
+ //test_add_get_with(file0, rev2,rev1); // Would use 'deflate' chunk.
+ }
+
+ public void test_add_get_with(byte[] file0, ByteBuffer rev1, ByteBuffer rev2) throws IOException {
ByteArrayAccess access0 = new ByteArrayAccess(file0);
DeltaZip dz0 = new DeltaZip(access0);
AppendSpecification app1 = dz0.add(rev1);
@@ -85,7 +91,7 @@ public void test_add_get() throws Exception {
DeltaZip dz2 = new DeltaZip(access2);
// Tests:
- assertEquals(dz1.get(), rev1); //rev1.rewind();
+ assertEquals(dz1.get(), rev1);
assertEquals(dz2.get(), rev2);
dz2.previous();
Please sign in to comment.
Something went wrong with that request. Please try again.