Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 502 lines (440 sloc) 16.464 kb
c3d8254 Add a command-line tool that can generate and apply a binary delta betwe...
Mark Rowe authored
1 /*-
2 * Copyright 2003 - 2005 Colin Percival
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #if 0
28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c, v 1.1 2005/08/06 01:59:05 cperciva Exp $");
29 #endif
30
31 #include <sys/types.h>
32
33 #include <err.h>
34 #include <fcntl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39
40 #define MIN(x, y) (((x)<(y)) ? (x) : (y))
41
42 static void split(off_t *I, off_t *V, off_t start, off_t len, off_t h)
43 {
44 off_t i, j, k, x, tmp, jj, kk;
45
46 if (len < 16) {
47 for (k = start; k < start + len; k += j) {
48 j = 1; x = V[I[k] + h];
49 for (i = 1; k + i < start + len; i++) {
50 if (V[I[k + i] + h] < x) {
51 x = V[I[k + i] + h];
52 j = 0;
53 };
54 if (V[I[k + i] + h] == x) {
55 tmp = I[k + j]; I[k + j] = I[k + i]; I[k + i] = tmp;
56 j++;
57 };
58 };
59 for (i = 0; i < j; i++)
60 V[I[k + i]] = k + j - 1;
61 if (j == 1)
62 I[k] = -1;
63 };
64 return;
65 };
66
67 x = V[I[start + len/2] + h];
68 jj = 0; kk = 0;
69 for (i = start; i < start + len; i++) {
70 if (V[I[i] + h] < x)
71 jj++;
72 if (V[I[i] + h] == x)
73 kk++;
74 };
75 jj += start; kk += jj;
76
77 i = start; j = 0; k = 0;
78 while (i < jj) {
79 if (V[I[i] + h] < x) {
80 i++;
81 } else if (V[I[i] + h] == x) {
82 tmp = I[i]; I[i] = I[jj + j]; I[jj + j] = tmp;
83 j++;
84 } else {
85 tmp = I[i]; I[i] = I[kk + k]; I[kk + k] = tmp;
86 k++;
87 };
88 };
89
90 while (jj + j < kk) {
91 if (V[I[jj + j] + h] == x) {
92 j++;
93 } else {
94 tmp = I[jj + j]; I[jj + j] = I[kk + k]; I[kk + k] = tmp;
95 k++;
96 };
97 };
98
99 if (jj > start)
100 split(I, V, start, jj - start, h);
101
102 for (i = 0; i < kk - jj; i++)
103 V[I[jj + i]] = kk - 1;
104 if (jj == kk - 1)
105 I[jj] = -1;
106
107 if (start + len > kk)
108 split(I, V, kk, start + len - kk, h);
109 }
110
111 /* qsufsort(I, V, old, oldsize)
112 *
113 * Computes the suffix sort of the string at 'old' and stores the resulting
114 * indices in 'I', using 'V' as a temporary array for the computation. */
115 static void qsufsort(off_t *I, off_t *V, u_char *old, off_t oldsize)
116 {
117 off_t buckets[256];
118 off_t i, h, len;
119
120 /* count number of each byte */
121 for (i = 0; i < 256; i++)
122 buckets[i] = 0;
123 for (i = 0; i < oldsize; i++)
124 buckets[old[i]]++;
125 /* make buckets cumulative */
126 for (i = 1; i < 256; i++)
127 buckets[i] += buckets[i - 1];
128 /* shift right by one */
129 for (i = 255; i > 0; i--)
130 buckets[i] = buckets[i - 1];
131 buckets[0] = 0;
132 /* at this point, buckets[c] is the number of bytes in the old file with
133 * value less than c. */
134
135 /* set up the sort order of the suffixes based solely on the first
136 * character */
137 for (i = 0; i < oldsize; i++)
138 I[++buckets[old[i]]] = i;
139 I[0] = oldsize;
140 /* ? */
141 for (i = 0; i < oldsize; i++)
142 V[i] = buckets[old[i]];
143 V[oldsize] = 0;
144 /* forward any entries in the ordering which have the same initial
145 * character */
146 for (i = 1; i < 256; i++) {
147 if (buckets[i] == buckets[i - 1] + 1)
148 I[buckets[i]] = -1;
149 }
150 I[0] = -1;
151
152 for (h = 1; I[0] != -(oldsize + 1); h += h) {
153 len = 0;
154 for (i = 0; i < oldsize + 1;) {
155 if (I[i] < 0) {
156 len -= I[i];
157 i -= I[i];
158 } else {
159 if (len)
160 I[i - len] = -len;
161 len = V[I[i]] + 1 - i;
162 split(I, V, i, len, h);
163 i += len;
164 len = 0;
165 }
166 }
167 if (len)
168 I[i - len] = -len;
169 };
170
171 for (i = 0; i < oldsize + 1; i++) I[V[i]] = i;
172 }
173
174 /* matchlen(old, oldsize, new, newsize)
175 *
176 * Returns the length of the longest common prefix between 'old' and 'new'. */
177 static off_t matchlen(u_char *old, off_t oldsize, u_char *new, off_t newsize)
178 {
179 off_t i;
180
181 for (i = 0; (i < oldsize) && (i < newsize); i++)
182 {
183 if (old[i] != new[i])
184 break;
185 }
186
187 return i;
188 }
189
190 /* search(I, old, oldsize, new, newsize, st, en, pos)
191 *
192 * Searches for the longest prefix of 'new' that occurs in 'old', stores its
193 * offset in '*pos', and returns its length. 'I' should be the suffix sort of
194 * 'old', and 'st' and 'en' are the lowest and highest indices in the suffix
195 * sort to consider. If you're searching all suffixes, 'st = 0' and 'en =
196 * oldsize - 1'. */
197 static off_t search(off_t *I, u_char *old, off_t oldsize,
198 u_char *new, off_t newsize, off_t st, off_t en, off_t *pos)
199 {
200 off_t x, y;
201
202 if (en - st < 2) {
203 x = matchlen(old + I[st], oldsize - I[st], new, newsize);
204 y = matchlen(old + I[en], oldsize - I[en], new, newsize);
205
206 if (x > y) {
207 *pos = I[st];
208 return x;
209 } else {
210 *pos = I[en];
211 return y;
212 }
213 }
214
215 x = st + (en - st)/2;
216 if (memcmp(old + I[x], new, MIN(oldsize - I[x], newsize)) < 0) {
217 return search(I, old, oldsize, new, newsize, x, en, pos);
218 } else {
219 return search(I, old, oldsize, new, newsize, st, x, pos);
220 };
221 }
222
223 /* offtout(x, buf)
224 *
225 * Writes the off_t 'x' portably to the array 'buf'. */
226 static void offtout(off_t x, u_char *buf)
227 {
228 off_t y;
229
230 if (x < 0)
231 y = -x;
232 else
233 y = x;
234
235 buf[0] = y % 256;
236 y -= buf[0];
237 y = y/256; buf[1] = y%256; y -= buf[1];
238 y = y/256; buf[2] = y%256; y -= buf[2];
239 y = y/256; buf[3] = y%256; y -= buf[3];
240 y = y/256; buf[4] = y%256; y -= buf[4];
241 y = y/256; buf[5] = y%256; y -= buf[5];
242 y = y/256; buf[6] = y%256; y -= buf[6];
243 y = y/256; buf[7] = y%256;
244
245 if (x < 0)
246 buf[7] |= 0x80;
247 }
248
5975703 @andymatuschak Fixed some silly compiler warnings.
andymatuschak authored
249 int bsdiff(int argc, char *argv[]); // Added by AMM: suppresses a warning about the following not having a prototype.
250
c3d8254 Add a command-line tool that can generate and apply a binary delta betwe...
Mark Rowe authored
251 int bsdiff(int argc, char *argv[])
252 {
253 int fd;
254 u_char *old,*new; /* contents of old, new files */
255 off_t oldsize, newsize; /* length of old, new files */
256 off_t *I,*V; /* arrays used for suffix sort; I is ordering */
257 off_t scan; /* position of current match in old file */
258 off_t pos; /* position of current match in new file */
259 off_t len; /* length of current match */
260 off_t lastscan; /* position of previous match in old file */
261 off_t lastpos; /* position of previous match in new file */
262 off_t lastoffset; /* lastpos - lastscan */
263 off_t oldscore, scsc; /* temp variables in match search */
264 off_t s, Sf, lenf, Sb, lenb; /* temp vars in match extension */
265 off_t overlap, Ss, lens;
266 off_t i;
267 off_t dblen, eblen; /* length of diff, extra sections */
268 u_char *db,*eb; /* contents of diff, extra sections */
269 u_char buf[8];
270 u_char header[32];
271 FILE * pf;
272
273 if (argc != 4)
274 errx(1,"usage: %s oldfile newfile patchfile\n", argv[0]);
275
276 /* Allocate oldsize + 1 bytes instead of oldsize bytes to ensure
277 that we never try to malloc(0) and get a NULL pointer */
278 if (((fd = open(argv[1], O_RDONLY, 0)) < 0) ||
279 ((oldsize = lseek(fd, 0, SEEK_END)) == -1) ||
280 ((old = malloc(oldsize + 1)) == NULL) ||
281 (lseek(fd, 0, SEEK_SET) != 0) ||
282 (read(fd, old, oldsize) != oldsize) ||
283 (close(fd) == -1))
284 err(1,"%s", argv[1]);
285
286 if (((I = malloc((oldsize + 1) * sizeof(off_t))) == NULL) ||
287 ((V = malloc((oldsize + 1) * sizeof(off_t))) == NULL))
288 err(1, NULL);
289
290 /* Do a suffix sort on the old file. */
291 qsufsort(I, V, old, oldsize);
292
293 free(V);
294
295 /* Allocate newsize + 1 bytes instead of newsize bytes to ensure
296 that we never try to malloc(0) and get a NULL pointer */
297 if (((fd = open(argv[2], O_RDONLY, 0)) < 0) ||
298 ((newsize = lseek(fd, 0, SEEK_END)) == -1) ||
299 ((new = malloc(newsize + 1)) == NULL) ||
300 (lseek(fd, 0, SEEK_SET) != 0) ||
301 (read(fd, new, newsize) != newsize) ||
302 (close(fd) == -1))
303 err(1,"%s", argv[2]);
304
305 if (((db = malloc(newsize + 1)) == NULL) ||
306 ((eb = malloc(newsize + 1)) == NULL))
307 err(1, NULL);
308 dblen = 0;
309 eblen = 0;
310
311 /* Create the patch file */
312 if ((pf = fopen(argv[3], "w")) == NULL)
313 err(1, "%s", argv[3]);
314
315 /* Header is
39bb82b @vslavik Don't compress in bsdiff.
vslavik authored
316 0 8 "BSDIFN40"
317 8 8 length of ctrl block
318 16 8 length of diff block
c3d8254 Add a command-line tool that can generate and apply a binary delta betwe...
Mark Rowe authored
319 24 8 length of new file */
320 /* File is
321 0 32 Header
39bb82b @vslavik Don't compress in bsdiff.
vslavik authored
322 32 ?? ctrl block
323 ?? ?? diff block
324 ?? ?? extra block */
325 memcpy(header, "BSDIFN40", 8);
c3d8254 Add a command-line tool that can generate and apply a binary delta betwe...
Mark Rowe authored
326 offtout(0, header + 8);
327 offtout(0, header + 16);
328 offtout(newsize, header + 24);
329 if (fwrite(header, 32, 1, pf) != 1)
330 err(1, "fwrite(%s)", argv[3]);
331
332 /* Compute the differences, writing ctrl as we go */
333 scan = 0;
334 len = 0;
335 lastscan = 0;
336 lastpos = 0;
337 lastoffset = 0;
338 while (scan < newsize) {
339 oldscore = 0;
340
341 for (scsc = scan += len; scan < newsize; scan++) {
342 /* 'oldscore' is the number of characters that match between the
343 * substrings 'old[lastoffset + scan:lastoffset + scsc]' and
344 * 'new[scan:scsc]'. */
345 len = search(I, old, oldsize, new + scan, newsize - scan,
346 0, oldsize, &pos);
347
348 /* If this match extends further than the last one, add any new
349 * matching characters to 'oldscore'. */
350 for (; scsc < scan + len; scsc++) {
351 if ((scsc + lastoffset < oldsize) &&
352 (old[scsc + lastoffset] == new[scsc]))
353 oldscore++;
354 }
355
356 /* Choose this as our match if it contains more than eight
357 * characters that would be wrong if matched with a forward
358 * extension of the previous match instead. */
359 if (((len == oldscore) && (len != 0)) ||
360 (len > oldscore + 8))
361 break;
362
363 /* Since we're advancing 'scan' by 1, remove the character under it
364 * from 'oldscore' if it matches. */
365 if ((scan + lastoffset < oldsize) &&
366 (old[scan + lastoffset] == new[scan]))
367 oldscore--;
368 }
369
370 /* Skip this section if we found an exact match that would be
371 * better serviced by a forward extension of the previous match. */
372 if ((len != oldscore) || (scan == newsize)) {
373 /* Figure out how far forward the previous match should be
374 * extended... */
375 s = 0;
376 Sf = 0;
377 lenf = 0;
378 for (i = 0; (lastscan + i < scan) && (lastpos + i < oldsize);) {
379 if (old[lastpos + i] == new[lastscan + i])
380 s++;
381 i++;
382 if (s * 2 - i > Sf * 2 - lenf) {
383 Sf = s;
384 lenf = i;
385 }
386 }
387
388 /* ... and how far backwards the next match should be extended. */
389 lenb = 0;
390 if (scan < newsize) {
391 s = 0;
392 Sb = 0;
393 for (i = 1; (scan >= lastscan + i) && (pos >= i); i++) {
394 if (old[pos - i] == new[scan - i])
395 s++;
396 if (s * 2 - i > Sb * 2 - lenb) {
397 Sb = s;
398 lenb = i;
399 }
400 }
401 }
402
403 /* If there is an overlap between the extensions, find the best
404 * dividing point in the middle and reset 'lenf' and 'lenb'
405 * accordingly. */
406 if (lastscan + lenf > scan - lenb) {
407 overlap = (lastscan + lenf) - (scan - lenb);
408 s = 0;
409 Ss = 0;
410 lens = 0;
411 for (i = 0; i < overlap; i++) {
412 if (new[lastscan + lenf - overlap + i] ==
413 old[lastpos + lenf - overlap + i])
414 s++;
415 if (new[scan - lenb + i] == old[pos - lenb + i])
416 s--;
417 if (s > Ss) {
418 Ss = s;
419 lens = i + 1;
420 }
421 }
422
423 lenf += lens - overlap;
424 lenb -= lens;
425 }
426
427 /* Write the diff data for the last match to the diff section... */
428 for (i = 0; i < lenf; i++)
429 db[dblen + i] = new[lastscan + i] - old[lastpos + i];
430 /* ... and, if there's a gap between the extensions just
431 * calculated, write the data in that gap to the extra section. */
432 for (i = 0; i< (scan - lenb) - (lastscan + lenf); i++)
433 eb[eblen + i] = new[lastscan + lenf + i];
434
435 /* Update the diff and extra section lengths accordingly. */
436 dblen += lenf;
437 eblen += (scan - lenb) - (lastscan + lenf);
438
439 /* Write the following triple of integers to the control section:
440 * - length of the diff
441 * - length of the extra section
442 * - offset between the end of the diff and the start of the next
443 * diff, in the old file
444 */
445 offtout(lenf, buf);
39bb82b @vslavik Don't compress in bsdiff.
vslavik authored
446 if (fwrite(buf, 8, 1, pf) != 1)
447 errx(1, "fwrite");
c3d8254 Add a command-line tool that can generate and apply a binary delta betwe...
Mark Rowe authored
448
449 offtout((scan - lenb) - (lastscan + lenf), buf);
39bb82b @vslavik Don't compress in bsdiff.
vslavik authored
450 if (fwrite(buf, 8, 1, pf) != 1)
451 err(1, "fwrite");
c3d8254 Add a command-line tool that can generate and apply a binary delta betwe...
Mark Rowe authored
452
453 offtout((pos - lenb) - (lastpos + lenf), buf);
39bb82b @vslavik Don't compress in bsdiff.
vslavik authored
454 if (fwrite(buf, 8, 1, pf) != 1)
455 err(1, "fwrite");
c3d8254 Add a command-line tool that can generate and apply a binary delta betwe...
Mark Rowe authored
456
457 /* Update the variables describing the last match. Note that
458 * 'lastscan' is set to the start of the current match _after_ the
459 * backwards extension; the data in that extension will be written
460 * in the next pass. */
461 lastscan = scan - lenb;
462 lastpos = pos - lenb;
463 lastoffset = pos - scan;
464 }
465 }
466
467 /* Compute size of compressed ctrl data */
468 if ((len = ftello(pf)) == -1)
469 err(1, "ftello");
470 offtout(len - 32, header + 8);
471
39bb82b @vslavik Don't compress in bsdiff.
vslavik authored
472 /* Write diff data */
473 if (dblen && fwrite(db, dblen, 1, pf) != 1)
474 err(1, "fwrite");
c3d8254 Add a command-line tool that can generate and apply a binary delta betwe...
Mark Rowe authored
475
476 /* Compute size of compressed diff data */
477 if ((newsize = ftello(pf)) == -1)
478 err(1, "ftello");
479 offtout(newsize - len, header + 16);
480
39bb82b @vslavik Don't compress in bsdiff.
vslavik authored
481 /* Write extra data */
482 if (eblen && fwrite(eb, eblen, 1, pf) != 1)
483 err(1, "fwrite");
c3d8254 Add a command-line tool that can generate and apply a binary delta betwe...
Mark Rowe authored
484
485 /* Seek to the beginning, write the header, and close the file */
486 if (fseeko(pf, 0, SEEK_SET))
487 err(1, "fseeko");
488 if (fwrite(header, 32, 1, pf) != 1)
489 err(1, "fwrite(%s)", argv[3]);
490 if (fclose(pf))
491 err(1, "fclose");
492
493 /* Free the memory we used */
494 free(db);
495 free(eb);
496 free(I);
497 free(old);
498 free(new);
499
500 return 0;
501 }
Something went wrong with that request. Please try again.