33import com .github .myibu .algorithm .data .Bits ;
44import com .github .myibu .algorithm .endode .GolombEncoder ;
55
6- import java .util .ArrayList ;
7- import java .util .Arrays ;
8- import java .util .List ;
6+ import java .util .*;
7+ import java .util .stream .Collectors ;
98
109/**
1110 * LZ77 compress algorithm
@@ -60,19 +59,18 @@ public int compress(byte[] in_data, int in_len, byte[] out_data) {
6059 byte [] lWindow = new byte [l ];
6160 int sp = 0 , lp = l , ip = 0 , op = 0 ;
6261 while (lWindow .length > 0 && ip < in_len ) {
63- // abracadabrad
6462 // update search buffer
6563 int sStart = 0 , sEnd = sp < s ? sp : s ;
6664 for (int i = sStart ; i < sEnd ; i ++) {
67- System .out .println ("ip=" + ip + ", i=" + i + ", sEnd=" + sEnd + ", sp=" + sp );
6865 sBuf [i ] = in_data [ip - i - 1 ];
6966 }
7067 // update look ahead window
7168 int lStart = 0 , lEnd = ip + l < in_len ? l : in_len - ip ;
7269 for (int i = lStart ; i < lEnd ; i ++) {
7370 lWindow [i ] = in_data [ip + i ];
7471 }
75- System .out .println ("all=abracadabrad, sBuf=" + new StringBuilder (new String (sBuf )).reverse ().toString () + ", lWindow=" + new String (lWindow ));
72+ //System.out.println("txt=" + new String() + new String(in_data) + ", sBuf="
73+ // + new StringBuilder(new String(sBuf)).reverse().toString() + ", lWindow=" + new String(lWindow));
7674
7775 int llStart = sEnd - 1 , rrStart = 0 , llEnd = 0 , rrEnd = (lp = lEnd );
7876 int minMatched = 1 , minIndex = 0 ;
@@ -87,64 +85,41 @@ public int compress(byte[] in_data, int in_len, byte[] out_data) {
8785 }
8886 llStart --;
8987 }
90- System .out .println ("minIndex=" + minIndex + ", all=abracadabrad, sBuf=" + new StringBuilder (new String (sBuf )).reverse ().toString () + ", lWindow=" + new String (lWindow ));
9188 // matched
9289 if (minIndex > 0 ) {
93- // byte[] tuple = String.format("(%d,%d,%s)", minIndex + 1, minMatched, new String(new byte[]{lWindow[minMatched]})).getBytes();
94- // System.arraycopy(tuple, 0, out_data, (op++) * tuple.length, tuple.length);
95- System .out .println (String .format ("(%d, %d, %s)" , minIndex + 1 , minMatched , new String (new byte []{lWindow [minMatched ]})));
9690 tuples .add (Arrays .asList ( minIndex + 1 , minMatched , (int )lWindow [minMatched ]));
9791 sp += (minMatched + 1 );
98- // if (sp > s) {
99- // sp = s-1;
100- // }
10192 ip += (minMatched + 1 );
10293 } else {
10394 sp ++;
104- // if (sp > s) {
105- // sp = s-1;
106- // }
10795 ip ++;
108- // byte[] tuple = String.format("(%d,%d,%s)", 0, 0, new String(new byte[]{lWindow[0]})).getBytes();
109- // System.arraycopy(tuple, 0, out_data, (op++) * tuple.length, tuple.length);
110- System .out .println (String .format ("(%d, %d, %s)" , 0 , 0 , new String (new byte []{lWindow [0 ]})));
11196 tuples .add (Arrays .asList (0 , 0 , (int )lWindow [0 ]));
11297 }
11398 }
114- System .out .println (tuples );
115- int sum = 0 ;
99+ // System.out.println(tuples);
100+ return doEncode (tuples , out_data );
101+ }
102+
103+ private int doEncode (List <List <Integer >> tuples , byte [] out_data ) {
104+ Bits finalRes = new Bits ();
116105 GolombEncoder encoder = new GolombEncoder ();
117106 for (List <Integer > tuple : tuples ) {
118107 Bits bits = new Bits ();
119- bits .append (encoder .encodeToBinary (tuple .get (0 ), (int )(Math .ceil (Math .log (s ) / Math .log (2 )))));
120- System .out .println ("1" + bits );
121- bits .append (encoder .encode (tuple .get (1 ), 5 ));
122- System .out .println ("2" + bits );
123- bits .append (Bits .ofByte ((byte )tuple .get (2 ).intValue ()));
124- System .out .println ("3" + bits );
125- sum += bits .length ();
108+ Bits bits1 = encoder .encodeToBinary (tuple .get (0 ), (int )(Math .ceil (Math .log (s ) / Math .log (2 ))));
109+ bits .append (bits1 );
110+ Bits bits2 = encoder .encode (tuple .get (1 ), l );
111+ bits .append (bits2 );
112+ Bits bits3 = Bits .ofByte ((byte )tuple .get (2 ).intValue ());
113+ bits .append (bits3 );
114+ // System.out.println("("+ bits1 + ", "+ bits2 + ", "+ bits3 + ")");
115+ finalRes .append (bits );
126116 }
127- System .out .println ("compressed length: " + sum );
128- return 0 ;
117+ byte [] fr = finalRes .toByteArray ();
118+ System .arraycopy (fr , 0 , out_data , 0 , fr .length );
119+ // System.out.println("bits: " + finalRes);
120+ return fr .length ;
129121 }
130122
131- // private int indexOf(int llStart, int rrStart, int llEnd, int rrEnd, byte[] sBuf, byte[] lWindow) {
132- // int minMatched = 1, minIndex = 0;
133- // while (llStart >= 0) {
134- // int matched = 0, left = llStart, right = rrStart;
135- // while (left >= 0 && right < rrEnd && sBuf[left--] == lWindow[right++]) {
136- // matched++;
137- // }
138- // if (matched >= minMatched) {
139- // minIndex = llStart;
140- // minMatched = matched;
141- // }
142- // llStart--;
143- // }
144- // System.out.println("minIndex=" + minIndex + ", all=abracadabrad, sBuf=" + new StringBuilder(new String(sBuf)).reverse().toString() + ", lWindow=" + new String(lWindow));
145- // return minIndex;
146- // }
147-
148123 /**
149124 * for each token (offset, length, symbol)
150125 * if offset = 0 then
@@ -162,6 +137,57 @@ public int compress(byte[] in_data, int in_len, byte[] out_data) {
162137 */
163138 @ Override
164139 public int decompress (byte [] in_data , int in_len , byte [] out_data ) {
165- return 0 ;
140+ int e1 = (int )(Math .ceil (Math .log (s ) / Math .log (2 )));
141+ GolombEncoder encoder = new GolombEncoder ();
142+ Set <Bits > allEncodeSeq = new HashSet <>();
143+ for (int i = 0 ; i <= l ; i ++) {
144+ allEncodeSeq .add (encoder .encode (i , l ));
145+ }
146+ List <Bits > sortedEncodeSeq = allEncodeSeq .stream ().sorted (Comparator .comparingInt (Bits ::length )).collect (Collectors .toList ());
147+ Bits bits = Bits .ofByte (in_data );
148+ int ip = 0 ;
149+ List <List <Integer >> tuples = new ArrayList <>();
150+ while (ip < bits .length ()) {
151+ Bits b1 = bits .subBits (ip , ip + e1 );
152+ ip = ip + e1 ;
153+ int offset = encoder .encodeToBinary (b1 );
154+ int length = -1 ;
155+ for (Bits sortedEncode : sortedEncodeSeq ) {
156+ if (ip + sortedEncode .length () < bits .length ()) {
157+ if (sortedEncode .equals (bits .subBits (ip , ip +sortedEncode .length ()))) {
158+ length = encoder .decode (sortedEncode , l );
159+ ip += sortedEncode .length ();
160+ break ;
161+ }
162+ }
163+ }
164+ if (length == -1 || ip +8 > bits .length ()) {
165+ break ;
166+ }
167+ int symbol = (int )bits .subBits (ip , ip +8 ).toByte ();
168+ tuples .add (Arrays .asList (offset , length , symbol ));
169+ ip += 8 ;
170+ }
171+ // System.out.println(tuples);
172+ return doDecode (tuples , out_data );
173+ }
174+
175+ private int doDecode (List <List <Integer >> tuples , byte [] out_data ) {
176+ Bits seq = new Bits ();
177+ for (List <Integer > tuple : tuples ) {
178+ int offset = tuple .get (0 ), length = tuple .get (1 ), symbol = tuple .get (2 );
179+ Bits sb = Bits .ofByte ((byte ) symbol );
180+ if (offset == 0 ) {
181+ seq .append (sb );
182+ } else {
183+ int start = seq .byteLength () < s ? seq .byteLength () - offset : s - offset ;
184+ seq .append (seq .subBits (start * 8 , (start + length ) * 8 )).append (sb );
185+ }
186+ }
187+ int len = seq .byteLength ();
188+ for (int i = 0 ; i < len ; i ++) {
189+ out_data [i ] = seq .getByte (i ).toByte ();
190+ }
191+ return len ;
166192 }
167193}
0 commit comments