Skip to content

Commit

Permalink
dir.c: unify is_excluded and is_path_excluded APIs
Browse files Browse the repository at this point in the history
The is_excluded and is_path_excluded APIs are very similar, except for a
few noteworthy differences:

is_excluded doesn't handle ignored directories, results for paths within
ignored directories are incorrect. This is probably based on the premise
that recursive directory scans should stop at ignored directories, which
is no longer true (in certain cases, read_directory_recursive currently
calls is_excluded *and* is_path_excluded to get correct ignored state).

is_excluded caches parsed .gitignore files of the last directory in struct
dir_struct. If the directory changes, it finds a common parent directory
and is very careful to drop only as much state as necessary. On the other
hand, is_excluded will also read and parse .gitignore files in already
ignored directories, which are completely irrelevant.

is_path_excluded correctly handles ignored directories by checking if any
component in the path is excluded. As it uses is_excluded internally, this
unfortunately forces is_excluded to drop and re-read all .gitignore files,
as there is no common parent directory for the root dir.

is_path_excluded tracks state in a separate struct path_exclude_check,
which is essentially a wrapper of dir_struct with two more fields. However,
as is_path_excluded also modifies dir_struct, it is not possible to e.g.
use multiple path_exclude_check structures with the same dir_struct in
parallel. The additional structure just unnecessarily complicates the API.

Teach is_excluded / prep_exclude about ignored directories: whenever
entering a new directory, first check if the entire directory is excluded.
Remember the excluded state in dir_struct. Don't traverse into already
ignored directories (i.e. don't read irrelevant .gitignore files).

Directories could also be excluded by exclude patterns specified on the
command line or .git/info/exclude, so we cannot simply skip prep_exclude
entirely if there's no .gitignore file name (dir_struct.exclude_per_dir).
Move this check to just before actually reading the file.

is_path_excluded is now equivalent to is_excluded, so we can simply
redirect to it (the public API is cleaned up in the next patch).

The performance impact of the additional ignored check per directory is
hardly noticeable when reading directories recursively (e.g. 'git status').
However, performance of git commands using the is_path_excluded API (e.g.
'git ls-files --cached --ignored --exclude-standard') is greatly improved
as this no longer re-reads .gitignore files on each call.

Here's some performance data from the linux and WebKit repos (best of 10
runs on a Debian Linux on SSD, core.preloadIndex=true):

       | ls-files -ci   |    status      | status --ignored
       | linux | WebKit | linux | WebKit | linux | WebKit
-------+-------+--------+-------+--------+-------+---------
before | 0.506 |  6.539 | 0.212 |  1.555 | 0.323 |  2.541
after  | 0.080 |  1.191 | 0.218 |  1.583 | 0.321 |  2.579
gain   | 6.325 |  5.490 | 0.972 |  0.982 | 1.006 |  0.985

Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
kblees authored and gitster committed Apr 15, 2013
1 parent 6cd5c58 commit 95c6f27
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 67 deletions.
107 changes: 43 additions & 64 deletions dir.c
Expand Up @@ -754,10 +754,6 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
struct exclude_stack *stk = NULL;
int current;

if ((!dir->exclude_per_dir) ||
(baselen + strlen(dir->exclude_per_dir) >= PATH_MAX))
return; /* too long a path -- ignore */

group = &dir->exclude_list_group[EXC_DIRS];

/* Pop the exclude lists from the EXCL_DIRS exclude_list_group
Expand All @@ -769,12 +765,17 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
break;
el = &group->el[dir->exclude_stack->exclude_ix];
dir->exclude_stack = stk->prev;
dir->exclude = NULL;
free((char *)el->src); /* see strdup() below */
clear_exclude_list(el);
free(stk);
group->nr--;
}

/* Skip traversing into sub directories if the parent is excluded */
if (dir->exclude)
return;

/* Read from the parent directories and push them down. */
current = stk ? stk->baselen : -1;
while (current < baselen) {
Expand All @@ -793,22 +794,43 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
}
stk->prev = dir->exclude_stack;
stk->baselen = cp - base;
stk->exclude_ix = group->nr;
el = add_exclude_list(dir, EXC_DIRS, NULL);
memcpy(dir->basebuf + current, base + current,
stk->baselen - current);
strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir);
/*
* dir->basebuf gets reused by the traversal, but we
* need fname to remain unchanged to ensure the src
* member of each struct exclude correctly
* back-references its source file. Other invocations
* of add_exclude_list provide stable strings, so we
* strdup() and free() here in the caller.
*/
el = add_exclude_list(dir, EXC_DIRS, strdup(dir->basebuf));
stk->exclude_ix = group->nr - 1;
add_excludes_from_file_to_list(dir->basebuf,
dir->basebuf, stk->baselen,
el, 1);

/* Abort if the directory is excluded */
if (stk->baselen) {
int dt = DT_DIR;
dir->basebuf[stk->baselen - 1] = 0;
dir->exclude = last_exclude_matching_from_lists(dir,
dir->basebuf, stk->baselen - 1,
dir->basebuf + current, &dt);
dir->basebuf[stk->baselen - 1] = '/';
if (dir->exclude) {
dir->basebuf[stk->baselen] = 0;
dir->exclude_stack = stk;
return;
}
}

/* Try to read per-directory file unless path is too long */
if (dir->exclude_per_dir &&
stk->baselen + strlen(dir->exclude_per_dir) < PATH_MAX) {
strcpy(dir->basebuf + stk->baselen,
dir->exclude_per_dir);
/*
* dir->basebuf gets reused by the traversal, but we
* need fname to remain unchanged to ensure the src
* member of each struct exclude correctly
* back-references its source file. Other invocations
* of add_exclude_list provide stable strings, so we
* strdup() and free() here in the caller.
*/
el->src = strdup(dir->basebuf);
add_excludes_from_file_to_list(dir->basebuf,
dir->basebuf, stk->baselen, el, 1);
}
dir->exclude_stack = stk;
current = stk->baselen;
}
Expand All @@ -831,6 +853,9 @@ static struct exclude *last_exclude_matching(struct dir_struct *dir,

prep_exclude(dir, pathname, basename-pathname);

if (dir->exclude)
return dir->exclude;

return last_exclude_matching_from_lists(dir, pathname, pathlen,
basename, dtype_p);
}
Expand All @@ -853,13 +878,10 @@ void path_exclude_check_init(struct path_exclude_check *check,
struct dir_struct *dir)
{
check->dir = dir;
check->exclude = NULL;
strbuf_init(&check->path, 256);
}

void path_exclude_check_clear(struct path_exclude_check *check)
{
strbuf_release(&check->path);
}

/*
Expand All @@ -875,49 +897,6 @@ struct exclude *last_exclude_matching_path(struct path_exclude_check *check,
const char *name, int namelen,
int *dtype)
{
int i;
struct strbuf *path = &check->path;
struct exclude *exclude;

/*
* we allow the caller to pass namelen as an optimization; it
* must match the length of the name, as we eventually call
* is_excluded() on the whole name string.
*/
if (namelen < 0)
namelen = strlen(name);

/*
* If path is non-empty, and name is equal to path or a
* subdirectory of path, name should be excluded, because
* it's inside a directory which is already known to be
* excluded and was previously left in check->path.
*/
if (path->len &&
path->len <= namelen &&
!memcmp(name, path->buf, path->len) &&
(!name[path->len] || name[path->len] == '/'))
return check->exclude;

strbuf_setlen(path, 0);
for (i = 0; name[i]; i++) {
int ch = name[i];

if (ch == '/') {
int dt = DT_DIR;
exclude = last_exclude_matching(check->dir,
path->buf, &dt);
if (exclude) {
check->exclude = exclude;
return exclude;
}
}
strbuf_addch(path, ch);
}

/* An entry in the index; cannot be a directory with subentries */
strbuf_setlen(path, 0);

return last_exclude_matching(check->dir, name, dtype);
}

Expand Down
6 changes: 3 additions & 3 deletions dir.h
Expand Up @@ -110,9 +110,11 @@ struct dir_struct {
*
* exclude_stack points to the top of the exclude_stack, and
* basebuf contains the full path to the current
* (sub)directory in the traversal.
* (sub)directory in the traversal. Exclude points to the
* matching exclude struct if the directory is excluded.
*/
struct exclude_stack *exclude_stack;
struct exclude *exclude;
char basebuf[PATH_MAX];
};

Expand Down Expand Up @@ -156,8 +158,6 @@ extern int match_pathname(const char *, int,
*/
struct path_exclude_check {
struct dir_struct *dir;
struct exclude *exclude;
struct strbuf path;
};
extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *);
extern void path_exclude_check_clear(struct path_exclude_check *);
Expand Down

0 comments on commit 95c6f27

Please sign in to comment.