Skip to content

Commit

Permalink
optimize diffcore-delta by sorting hash entries.
Browse files Browse the repository at this point in the history
Here's a test-patch. I don't guarantee anything, except that when I did
the timings I also did a "wc" on the result, and they matched..

Before:
	[torvalds@woody linux]$ time git diff -l0 --stat -C v2.6.22.. | wc
	   7104   28574  438020

	real    0m10.526s
	user    0m10.401s
	sys     0m0.136s

After:
	[torvalds@woody linux]$ time ~/git/git diff -l0 --stat -C v2.6.22.. | wc
	   7104   28574  438020

	real    0m8.876s
	user    0m8.761s
	sys     0m0.128s

but the diff is fairly simple, so if somebody will go over it and say
whether it's likely to be *correct* too, that 15% may well be worth it.

[ Side note, without rename detection, that diff takes just under three
  seconds for me, so in that sense the improvement to the rename detection
  itself is larger than the overall 15% - it brings the cost of just
  rename detection from 7.5s to 5.9s, which would be on the order of just
  over a 20% performance improvement. ]

Hmm. The patch depends on half-way subtle issues like the fact that the
hashtables are guaranteed to not be full => we're guaranteed to have zero
counts at the end => we don't need to do any steenking iterator count in
the loop. A few comments might in order.

		Linus
  • Loading branch information
torvalds authored and gitster committed Oct 4, 2007
1 parent 4c75136 commit eb4d0e3
Showing 1 changed file with 30 additions and 24 deletions.
54 changes: 30 additions & 24 deletions diffcore-delta.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,22 +46,6 @@ struct spanhash_top {
struct spanhash data[FLEX_ARRAY];
};

static struct spanhash *spanhash_find(struct spanhash_top *top,
unsigned int hashval)
{
int sz = 1 << top->alloc_log2;
int bucket = hashval & (sz - 1);
while (1) {
struct spanhash *h = &(top->data[bucket++]);
if (!h->cnt)
return NULL;
if (h->hashval == hashval)
return h;
if (sz <= bucket)
bucket = 0;
}
}

static struct spanhash_top *spanhash_rehash(struct spanhash_top *orig)
{
struct spanhash_top *new;
Expand Down Expand Up @@ -122,6 +106,20 @@ static struct spanhash_top *add_spanhash(struct spanhash_top *top,
}
}

static int spanhash_cmp(const void *a_, const void *b_)
{
const struct spanhash *a = a_;
const struct spanhash *b = b_;

/* A count of zero compares at the end.. */
if (!a->cnt)
return !b->cnt ? 0 : 1;
if (!b->cnt)
return -1;
return a->hashval < b->hashval ? -1 :
a->hashval > b->hashval ? 1 : 0;
}

static struct spanhash_top *hash_chars(struct diff_filespec *one)
{
int i, n;
Expand Down Expand Up @@ -158,6 +156,10 @@ static struct spanhash_top *hash_chars(struct diff_filespec *one)
n = 0;
accum1 = accum2 = 0;
}
qsort(hash->data,
1ul << hash->alloc_log2,
sizeof(hash->data[0]),
spanhash_cmp);
return hash;
}

Expand All @@ -169,7 +171,7 @@ int diffcore_count_changes(struct diff_filespec *src,
unsigned long *src_copied,
unsigned long *literal_added)
{
int i, ssz;
struct spanhash *s, *d;
struct spanhash_top *src_count, *dst_count;
unsigned long sc, la;

Expand All @@ -190,22 +192,26 @@ int diffcore_count_changes(struct diff_filespec *src,
}
sc = la = 0;

ssz = 1 << src_count->alloc_log2;
for (i = 0; i < ssz; i++) {
struct spanhash *s = &(src_count->data[i]);
struct spanhash *d;
s = src_count->data;
d = dst_count->data;
for (;;) {
unsigned dst_cnt, src_cnt;
if (!s->cnt)
continue;
break; /* we checked all in src */
while (d->cnt) {
if (d->hashval >= s->hashval)
break;
d++;
}
src_cnt = s->cnt;
d = spanhash_find(dst_count, s->hashval);
dst_cnt = d ? d->cnt : 0;
dst_cnt = d->hashval == s->hashval ? d->cnt : 0;
if (src_cnt < dst_cnt) {
la += dst_cnt - src_cnt;
sc += src_cnt;
}
else
sc += dst_cnt;
s++;
}

if (!src_count_p)
Expand Down

0 comments on commit eb4d0e3

Please sign in to comment.