Permalink
Newer
Older
100644 502 lines (440 sloc) 16.1 KB
1
/*-
2
* Copyright 2003 - 2005 Colin Percival
3
* All rights reserved
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted providing that the following conditions
7
* are met:
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
*
14
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24
* POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
#if 0
28
__FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c, v 1.1 2005/08/06 01:59:05 cperciva Exp $");
29
#endif
30
31
#include <sys/types.h>
32
33
#include <err.h>
34
#include <fcntl.h>
35
#include <stdio.h>
36
#include <stdlib.h>
37
#include <string.h>
38
#include <unistd.h>
39
40
#define MIN(x, y) (((x)<(y)) ? (x) : (y))
41
42
static void split(off_t *I, off_t *V, off_t start, off_t len, off_t h)
43
{
44
off_t i, j, k, x, tmp, jj, kk;
45
46
if (len < 16) {
47
for (k = start; k < start + len; k += j) {
48
j = 1; x = V[I[k] + h];
49
for (i = 1; k + i < start + len; i++) {
50
if (V[I[k + i] + h] < x) {
51
x = V[I[k + i] + h];
52
j = 0;
53
};
54
if (V[I[k + i] + h] == x) {
55
tmp = I[k + j]; I[k + j] = I[k + i]; I[k + i] = tmp;
56
j++;
57
};
58
};
59
for (i = 0; i < j; i++)
60
V[I[k + i]] = k + j - 1;
61
if (j == 1)
62
I[k] = -1;
63
};
64
return;
65
};
66
67
x = V[I[start + len/2] + h];
68
jj = 0; kk = 0;
69
for (i = start; i < start + len; i++) {
70
if (V[I[i] + h] < x)
71
jj++;
72
if (V[I[i] + h] == x)
73
kk++;
74
};
75
jj += start; kk += jj;
76
77
i = start; j = 0; k = 0;
78
while (i < jj) {
79
if (V[I[i] + h] < x) {
80
i++;
81
} else if (V[I[i] + h] == x) {
82
tmp = I[i]; I[i] = I[jj + j]; I[jj + j] = tmp;
83
j++;
84
} else {
85
tmp = I[i]; I[i] = I[kk + k]; I[kk + k] = tmp;
86
k++;
87
};
88
};
89
90
while (jj + j < kk) {
91
if (V[I[jj + j] + h] == x) {
92
j++;
93
} else {
94
tmp = I[jj + j]; I[jj + j] = I[kk + k]; I[kk + k] = tmp;
95
k++;
96
};
97
};
98
99
if (jj > start)
100
split(I, V, start, jj - start, h);
101
102
for (i = 0; i < kk - jj; i++)
103
V[I[jj + i]] = kk - 1;
104
if (jj == kk - 1)
105
I[jj] = -1;
106
107
if (start + len > kk)
108
split(I, V, kk, start + len - kk, h);
109
}
110
111
/* qsufsort(I, V, old, oldsize)
112
*
113
* Computes the suffix sort of the string at 'old' and stores the resulting
114
* indices in 'I', using 'V' as a temporary array for the computation. */
115
static void qsufsort(off_t *I, off_t *V, u_char *old, off_t oldsize)
116
{
117
off_t buckets[256];
118
off_t i, h, len;
119
120
/* count number of each byte */
121
for (i = 0; i < 256; i++)
122
buckets[i] = 0;
123
for (i = 0; i < oldsize; i++)
124
buckets[old[i]]++;
125
/* make buckets cumulative */
126
for (i = 1; i < 256; i++)
127
buckets[i] += buckets[i - 1];
128
/* shift right by one */
129
for (i = 255; i > 0; i--)
130
buckets[i] = buckets[i - 1];
131
buckets[0] = 0;
132
/* at this point, buckets[c] is the number of bytes in the old file with
133
* value less than c. */
134
135
/* set up the sort order of the suffixes based solely on the first
136
* character */
137
for (i = 0; i < oldsize; i++)
138
I[++buckets[old[i]]] = i;
139
I[0] = oldsize;
140
/* ? */
141
for (i = 0; i < oldsize; i++)
142
V[i] = buckets[old[i]];
143
V[oldsize] = 0;
144
/* forward any entries in the ordering which have the same initial
145
* character */
146
for (i = 1; i < 256; i++) {
147
if (buckets[i] == buckets[i - 1] + 1)
148
I[buckets[i]] = -1;
149
}
150
I[0] = -1;
151
152
for (h = 1; I[0] != -(oldsize + 1); h += h) {
153
len = 0;
154
for (i = 0; i < oldsize + 1;) {
155
if (I[i] < 0) {
156
len -= I[i];
157
i -= I[i];
158
} else {
159
if (len)
160
I[i - len] = -len;
161
len = V[I[i]] + 1 - i;
162
split(I, V, i, len, h);
163
i += len;
164
len = 0;
165
}
166
}
167
if (len)
168
I[i - len] = -len;
169
};
170
171
for (i = 0; i < oldsize + 1; i++) I[V[i]] = i;
172
}
173
174
/* matchlen(old, oldsize, new, newsize)
175
*
176
* Returns the length of the longest common prefix between 'old' and 'new'. */
177
static off_t matchlen(u_char *old, off_t oldsize, u_char *new, off_t newsize)
178
{
179
off_t i;
180
181
for (i = 0; (i < oldsize) && (i < newsize); i++)
182
{
183
if (old[i] != new[i])
184
break;
185
}
186
187
return i;
188
}
189
190
/* search(I, old, oldsize, new, newsize, st, en, pos)
191
*
192
* Searches for the longest prefix of 'new' that occurs in 'old', stores its
193
* offset in '*pos', and returns its length. 'I' should be the suffix sort of
194
* 'old', and 'st' and 'en' are the lowest and highest indices in the suffix
195
* sort to consider. If you're searching all suffixes, 'st = 0' and 'en =
196
* oldsize - 1'. */
197
static off_t search(off_t *I, u_char *old, off_t oldsize,
198
u_char *new, off_t newsize, off_t st, off_t en, off_t *pos)
199
{
200
off_t x, y;
201
202
if (en - st < 2) {
203
x = matchlen(old + I[st], oldsize - I[st], new, newsize);
204
y = matchlen(old + I[en], oldsize - I[en], new, newsize);
205
206
if (x > y) {
207
*pos = I[st];
208
return x;
209
} else {
210
*pos = I[en];
211
return y;
212
}
213
}
214
215
x = st + (en - st)/2;
216
if (memcmp(old + I[x], new, MIN(oldsize - I[x], newsize)) < 0) {
217
return search(I, old, oldsize, new, newsize, x, en, pos);
218
} else {
219
return search(I, old, oldsize, new, newsize, st, x, pos);
220
};
221
}
222
223
/* offtout(x, buf)
224
*
225
* Writes the off_t 'x' portably to the array 'buf'. */
226
static void offtout(off_t x, u_char *buf)
227
{
228
off_t y;
229
230
if (x < 0)
231
y = -x;
232
else
233
y = x;
234
235
buf[0] = y % 256;
236
y -= buf[0];
237
y = y/256; buf[1] = y%256; y -= buf[1];
238
y = y/256; buf[2] = y%256; y -= buf[2];
239
y = y/256; buf[3] = y%256; y -= buf[3];
240
y = y/256; buf[4] = y%256; y -= buf[4];
241
y = y/256; buf[5] = y%256; y -= buf[5];
242
y = y/256; buf[6] = y%256; y -= buf[6];
243
y = y/256; buf[7] = y%256;
244
245
if (x < 0)
246
buf[7] |= 0x80;
247
}
248
249
int bsdiff(int argc, char *argv[]); // Added by AMM: suppresses a warning about the following not having a prototype.
250
251
int bsdiff(int argc, char *argv[])
252
{
253
int fd;
254
u_char *old,*new; /* contents of old, new files */
255
off_t oldsize, newsize; /* length of old, new files */
256
off_t *I,*V; /* arrays used for suffix sort; I is ordering */
257
off_t scan; /* position of current match in old file */
258
off_t pos; /* position of current match in new file */
259
off_t len; /* length of current match */
260
off_t lastscan; /* position of previous match in old file */
261
off_t lastpos; /* position of previous match in new file */
262
off_t lastoffset; /* lastpos - lastscan */
263
off_t oldscore, scsc; /* temp variables in match search */
264
off_t s, Sf, lenf, Sb, lenb; /* temp vars in match extension */
265
off_t overlap, Ss, lens;
266
off_t i;
267
off_t dblen, eblen; /* length of diff, extra sections */
268
u_char *db,*eb; /* contents of diff, extra sections */
269
u_char buf[8];
270
u_char header[32];
271
FILE * pf;
272
273
if (argc != 4)
274
errx(1,"usage: %s oldfile newfile patchfile\n", argv[0]);
275
276
/* Allocate oldsize + 1 bytes instead of oldsize bytes to ensure
277
that we never try to malloc(0) and get a NULL pointer */
278
if (((fd = open(argv[1], O_RDONLY, 0)) < 0) ||
279
((oldsize = lseek(fd, 0, SEEK_END)) == -1) ||
280
((old = malloc(oldsize + 1)) == NULL) ||
281
(lseek(fd, 0, SEEK_SET) != 0) ||
282
(read(fd, old, oldsize) != oldsize) ||
283
(close(fd) == -1))
284
err(1,"%s", argv[1]);
285
286
if (((I = malloc((oldsize + 1) * sizeof(off_t))) == NULL) ||
287
((V = malloc((oldsize + 1) * sizeof(off_t))) == NULL))
288
err(1, NULL);
289
290
/* Do a suffix sort on the old file. */
291
qsufsort(I, V, old, oldsize);
292
293
free(V);
294
295
/* Allocate newsize + 1 bytes instead of newsize bytes to ensure
296
that we never try to malloc(0) and get a NULL pointer */
297
if (((fd = open(argv[2], O_RDONLY, 0)) < 0) ||
298
((newsize = lseek(fd, 0, SEEK_END)) == -1) ||
299
((new = malloc(newsize + 1)) == NULL) ||
300
(lseek(fd, 0, SEEK_SET) != 0) ||
301
(read(fd, new, newsize) != newsize) ||
302
(close(fd) == -1))
303
err(1,"%s", argv[2]);
304
305
if (((db = malloc(newsize + 1)) == NULL) ||
306
((eb = malloc(newsize + 1)) == NULL))
307
err(1, NULL);
308
dblen = 0;
309
eblen = 0;
310
311
/* Create the patch file */
312
if ((pf = fopen(argv[3], "w")) == NULL)
313
err(1, "%s", argv[3]);
314
315
/* Header is
316
0 8 "BSDIFN40"
317
8 8 length of ctrl block
318
16 8 length of diff block
319
24 8 length of new file */
320
/* File is
321
0 32 Header
322
32 ?? ctrl block
323
?? ?? diff block
324
?? ?? extra block */
325
memcpy(header, "BSDIFN40", 8);
326
offtout(0, header + 8);
327
offtout(0, header + 16);
328
offtout(newsize, header + 24);
329
if (fwrite(header, 32, 1, pf) != 1)
330
err(1, "fwrite(%s)", argv[3]);
331
332
/* Compute the differences, writing ctrl as we go */
333
scan = 0;
334
len = 0;
335
lastscan = 0;
336
lastpos = 0;
337
lastoffset = 0;
338
while (scan < newsize) {
339
oldscore = 0;
340
341
for (scsc = scan += len; scan < newsize; scan++) {
342
/* 'oldscore' is the number of characters that match between the
343
* substrings 'old[lastoffset + scan:lastoffset + scsc]' and
344
* 'new[scan:scsc]'. */
345
len = search(I, old, oldsize, new + scan, newsize - scan,
346
0, oldsize, &pos);
347
348
/* If this match extends further than the last one, add any new
349
* matching characters to 'oldscore'. */
350
for (; scsc < scan + len; scsc++) {
351
if ((scsc + lastoffset < oldsize) &&
352
(old[scsc + lastoffset] == new[scsc]))
353
oldscore++;
354
}
355
356
/* Choose this as our match if it contains more than eight
357
* characters that would be wrong if matched with a forward
358
* extension of the previous match instead. */
359
if (((len == oldscore) && (len != 0)) ||
360
(len > oldscore + 8))
361
break;
362
363
/* Since we're advancing 'scan' by 1, remove the character under it
364
* from 'oldscore' if it matches. */
365
if ((scan + lastoffset < oldsize) &&
366
(old[scan + lastoffset] == new[scan]))
367
oldscore--;
368
}
369
370
/* Skip this section if we found an exact match that would be
371
* better serviced by a forward extension of the previous match. */
372
if ((len != oldscore) || (scan == newsize)) {
373
/* Figure out how far forward the previous match should be
374
* extended... */
375
s = 0;
376
Sf = 0;
377
lenf = 0;
378
for (i = 0; (lastscan + i < scan) && (lastpos + i < oldsize);) {
379
if (old[lastpos + i] == new[lastscan + i])
380
s++;
381
i++;
382
if (s * 2 - i > Sf * 2 - lenf) {
383
Sf = s;
384
lenf = i;
385
}
386
}
387
388
/* ... and how far backwards the next match should be extended. */
389
lenb = 0;
390
if (scan < newsize) {
391
s = 0;
392
Sb = 0;
393
for (i = 1; (scan >= lastscan + i) && (pos >= i); i++) {
394
if (old[pos - i] == new[scan - i])
395
s++;
396
if (s * 2 - i > Sb * 2 - lenb) {
397
Sb = s;
398
lenb = i;
399
}
400
}
401
}
402
403
/* If there is an overlap between the extensions, find the best
404
* dividing point in the middle and reset 'lenf' and 'lenb'
405
* accordingly. */
406
if (lastscan + lenf > scan - lenb) {
407
overlap = (lastscan + lenf) - (scan - lenb);
408
s = 0;
409
Ss = 0;
410
lens = 0;
411
for (i = 0; i < overlap; i++) {
412
if (new[lastscan + lenf - overlap + i] ==
413
old[lastpos + lenf - overlap + i])
414
s++;
415
if (new[scan - lenb + i] == old[pos - lenb + i])
416
s--;
417
if (s > Ss) {
418
Ss = s;
419
lens = i + 1;
420
}
421
}
422
423
lenf += lens - overlap;
424
lenb -= lens;
425
}
426
427
/* Write the diff data for the last match to the diff section... */
428
for (i = 0; i < lenf; i++)
429
db[dblen + i] = new[lastscan + i] - old[lastpos + i];
430
/* ... and, if there's a gap between the extensions just
431
* calculated, write the data in that gap to the extra section. */
432
for (i = 0; i< (scan - lenb) - (lastscan + lenf); i++)
433
eb[eblen + i] = new[lastscan + lenf + i];
434
435
/* Update the diff and extra section lengths accordingly. */
436
dblen += lenf;
437
eblen += (scan - lenb) - (lastscan + lenf);
438
439
/* Write the following triple of integers to the control section:
440
* - length of the diff
441
* - length of the extra section
442
* - offset between the end of the diff and the start of the next
443
* diff, in the old file
444
*/
445
offtout(lenf, buf);
446
if (fwrite(buf, 8, 1, pf) != 1)
447
errx(1, "fwrite");
448
449
offtout((scan - lenb) - (lastscan + lenf), buf);
450
if (fwrite(buf, 8, 1, pf) != 1)
451
err(1, "fwrite");
452
453
offtout((pos - lenb) - (lastpos + lenf), buf);
454
if (fwrite(buf, 8, 1, pf) != 1)
455
err(1, "fwrite");
456
457
/* Update the variables describing the last match. Note that
458
* 'lastscan' is set to the start of the current match _after_ the
459
* backwards extension; the data in that extension will be written
460
* in the next pass. */
461
lastscan = scan - lenb;
462
lastpos = pos - lenb;
463
lastoffset = pos - scan;
464
}
465
}
466
467
/* Compute size of compressed ctrl data */
468
if ((len = ftello(pf)) == -1)
469
err(1, "ftello");
470
offtout(len - 32, header + 8);
471
472
/* Write diff data */
473
if (dblen && fwrite(db, dblen, 1, pf) != 1)
474
err(1, "fwrite");
475
476
/* Compute size of compressed diff data */
477
if ((newsize = ftello(pf)) == -1)
478
err(1, "ftello");
479
offtout(newsize - len, header + 16);
480
481
/* Write extra data */
482
if (eblen && fwrite(eb, eblen, 1, pf) != 1)
483
err(1, "fwrite");
484
485
/* Seek to the beginning, write the header, and close the file */
486
if (fseeko(pf, 0, SEEK_SET))
487
err(1, "fseeko");
488
if (fwrite(header, 32, 1, pf) != 1)
489
err(1, "fwrite(%s)", argv[3]);
490
if (fclose(pf))
491
err(1, "fclose");
492
493
/* Free the memory we used */
494
free(db);
495
free(eb);
496
free(I);
497
free(old);
498
free(new);
499
500
return 0;
501
}