Skip to content

Commit

Permalink
gvfs: use a hashmap to back gitignore/sparse checkout
Browse files Browse the repository at this point in the history
The always_exclude mechanism is used both to implement the excludes
defined in, say, .gitignore, and the sparse checkout feature.

Due to its use of wildcards, any sizable list of entries will slow down
every path-related operation dramatically.

This is particularly noticeable with GVFS, where the sparse-checkout
file is populated and updated frequently by the backend.

In GVFS' case, the use of wildcards is limited to same-directory
wildcards (i.e. something of the form `/this/directory/*`), though,
therefore we can speed up everything dramatically by using exact matches
backed by a hashmap.

Note: the use of a hashmap disallows more free-form sparse-checkout
entries such as `*.txt`. Using a hashmap restricts us to only allow
exact file names, same-directory, and negative entries in the
sparse-checkout file, but with GVFS that is all we need.

This patch introduces the option to make it so.

Signed-off-by: Eric Mecklenburg <ermeckle@microsoft.com>
  • Loading branch information
ermeckle authored and dscho committed May 12, 2017
1 parent 4f2ed6d commit 75fb913
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 9 deletions.
6 changes: 6 additions & 0 deletions Documentation/config.txt
Expand Up @@ -920,6 +920,12 @@ core.gvfs::
is first accessed and brought down to the client. Git.exe can't
currently tell the first access vs subsequent accesses so this
flag just blocks them from occurring at all.
GVFS_ALWAYS_EXCLUDE_HASHMAP::
Bit value 512
Changes always_exclude to use a hashmap to speed up lookups.
Only works with exact matches and same-folder wildcards (pa/th/*)
and supports negatives. Behavior depends on GVFS's usage pattern
and is unlikely to work correctly otherwise.
--

core.sparseCheckout::
Expand Down
103 changes: 94 additions & 9 deletions dir.c
Expand Up @@ -16,6 +16,7 @@
#include "utf8.h"
#include "varint.h"
#include "ewah/ewok.h"
#include "gvfs.h"

/*
* Tells read_directory_recursive how a file or directory should be treated.
Expand Down Expand Up @@ -49,6 +50,22 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
int check_only, const struct pathspec *pathspec);
static int get_dtype(struct dirent *de, const char *path, int len);

static int path_hashmap_cmp(const void *a, const void *b, const void *key)
{
const struct exclude *e1 = a;
const struct exclude *e2 = b;

return strncmp(e1->pattern, e2->pattern, e1->patternlen);
}

static int path_hashmap_icmp(const void *a, const void *b, const void *key)
{
const struct exclude *e1 = a;
const struct exclude *e2 = b;

return strnicmp(e1->pattern, e2->pattern, e1->patternlen);
}

int fspathcmp(const char *a, const char *b)
{
return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
Expand Down Expand Up @@ -844,9 +861,11 @@ struct exclude_list *add_exclude_list(struct dir_struct *dir,
* Used to set up core.excludesfile and .git/info/exclude lists.
*/
static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
int setup_hashmap,
struct sha1_stat *sha1_stat)
{
struct exclude_list *el;
int i;
/*
* catch setup_standard_excludes() that's called before
* dir->untracked is assigned. That function behaves
Expand All @@ -857,12 +876,27 @@ static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
el = add_exclude_list(dir, EXC_FILE, fname);
if (add_excludes(fname, "", 0, el, 0, sha1_stat) < 0)
die("cannot use %s as an exclude file", fname);

if (setup_hashmap && el->nr) {
hashmap_init(&el->pattern_hash,
ignore_case ? path_hashmap_icmp : path_hashmap_cmp,
el->nr);

for (i = el->nr - 1; 0 <= i; i--) {
struct exclude *x = el->excludes[i];
hashmap_entry_init(&x->ent,
ignore_case ?
strihash(x->pattern) :
strhash(x->pattern));
hashmap_add(&el->pattern_hash, &x->ent);
}
}
}

void add_excludes_from_file(struct dir_struct *dir, const char *fname)
{
dir->unmanaged_exclude_files++; /* see validate_untracked_cache() */
add_excludes_from_file_1(dir, fname, NULL);
add_excludes_from_file_1(dir, fname, 0, NULL);
}

int match_basename(const char *basename, int basenamelen,
Expand Down Expand Up @@ -948,6 +982,19 @@ int match_pathname(const char *pathname, int pathlen,
WM_PATHNAME) == 0;
}

static struct exclude *find_exclude_matching_hash(const char *pattern,
int pattern_len,
struct exclude_list *el)
{
struct exclude search;

hashmap_entry_init(&search,
ignore_case ? strihash(pattern) : strhash(pattern));
search.pattern = pattern;
search.patternlen = pattern_len;
return hashmap_get(&el->pattern_hash, &search, NULL);
}

/*
* Scan the given exclude list in reverse to see whether pathname
* should be ignored. The first match (i.e. the last on the list), if
Expand All @@ -963,6 +1010,42 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname,
struct exclude *exc = NULL; /* undecided */
int i;

if (el->pattern_hash.size) {
/*
* We cannot search for every possible rule that matches the
* current path because there are countless odd permutations
* with wildcards. Instead we search for most common cases
* and fall through to the old logic if we fail.
*/
static struct strbuf sb = STRBUF_INIT;
const char *slash;
struct exclude *match;

/* Check exact match with leading slash "/a/b/c" */
strbuf_reset(&sb);
strbuf_addch(&sb, '/');
strbuf_add(&sb, pathname, pathlen);
match = find_exclude_matching_hash(sb.buf, sb.len, el);
if (match)
return match;

/* Check wildcard match with leading slash "/a/b/ *" */
slash = strrchr(pathname, '/');
strbuf_reset(&sb);
strbuf_addch(&sb, '/');
if (slash)
strbuf_add(&sb, pathname, slash - pathname + 1);
strbuf_addch(&sb, '*');
match = find_exclude_matching_hash(sb.buf, sb.len, el);
if (match)
return match;

/* Check general wildcard "*" */
match = find_exclude_matching_hash("*", 1, el);
if (match)
return match;
}

if (!el->nr)
return NULL; /* undefined */

Expand All @@ -980,9 +1063,9 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname,

if (x->flags & EXC_FLAG_NODIR) {
if (match_basename(basename,
pathlen - (basename - pathname),
exclude, prefix, x->patternlen,
x->flags)) {
pathlen - (basename - pathname),
exclude, prefix, x->patternlen,
x->flags)) {
exc = x;
break;
}
Expand All @@ -991,8 +1074,8 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname,

assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
if (match_pathname(pathname, pathlen,
x->base, x->baselen ? x->baselen - 1 : 0,
exclude, prefix, x->patternlen, x->flags)) {
x->base, x->baselen ? x->baselen - 1 : 0,
exclude, prefix, x->patternlen, x->flags)) {
exc = x;
break;
}
Expand Down Expand Up @@ -2284,27 +2367,29 @@ static GIT_PATH_FUNC(git_path_info_exclude, "info/exclude")

void setup_standard_excludes(struct dir_struct *dir)
{
int always_exclude_hashmap = gvfs_config_is_set(GVFS_ALWAYS_EXCLUDE_HASHMAP);

dir->exclude_per_dir = ".gitignore";

/* always_exclude */
if (startup_info->have_repository) {
const char *path = git_path_info_always_exclude();
if (!access_or_warn(path, R_OK, 0))
add_excludes_from_file_1(dir, path, NULL);
add_excludes_from_file_1(dir, path, always_exclude_hashmap, NULL );
}

/* core.excludesfile defaulting to $XDG_HOME/git/ignore */
if (!excludes_file)
excludes_file = xdg_config_home("ignore");
if (excludes_file && !access_or_warn(excludes_file, R_OK, 0))
add_excludes_from_file_1(dir, excludes_file,
add_excludes_from_file_1(dir, excludes_file, 0,
dir->untracked ? &dir->ss_excludes_file : NULL);

/* per repository user preference */
if (startup_info->have_repository) {
const char *path = git_path_info_exclude();
if (!access_or_warn(path, R_OK, 0))
add_excludes_from_file_1(dir, path,
add_excludes_from_file_1(dir, path, 0,
dir->untracked ? &dir->ss_info_exclude : NULL);
}
}
Expand Down
1 change: 1 addition & 0 deletions dir.h
Expand Up @@ -17,6 +17,7 @@ struct dir_entry {
#define EXC_FLAG_NEGATIVE 16

struct exclude {
/* This must be the first element for hashmaps to work */
struct hashmap_entry ent;
/*
* This allows callers of last_exclude_matching() etc.
Expand Down
1 change: 1 addition & 0 deletions gvfs.h
Expand Up @@ -20,6 +20,7 @@
#define GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS (1 << 6)
#define GVFS_SKIP_MERGE_IN_CHECKOUT (1 << 7)
#define GVFS_DEFAULT_MERGE_OPTIONS (1 << 8)
#define GVFS_ALWAYS_EXCLUDE_HASHMAP (1 << 9)

void gvfs_load_config_value(const char *value);
int gvfs_config_is_set(int mask);
Expand Down
138 changes: 138 additions & 0 deletions t/t3009-ls-files-always-exclude-hashmap.sh
@@ -0,0 +1,138 @@
#!/bin/sh

test_description='always_exclude hashmap tests'

. ./test-lib.sh

test_expect_success 'setup' '
mkdir -p dir1/dir2 &&
>a &&
>dir1/a &&
>dir1/b &&
>dir1/dir2/a &&
cat >.gitignore <<\EOF
standard
with_hashmap
EOF
'

test_expect_success 'status with positive simple exact match' '
cat >.git/info/always_exclude <<\EOF &&
a
EOF
git status -u >standard &&
git config --local core.gvfs 512 &&
git status -u >with_hashmap &&
git config --local core.gvfs 0 &&
test_cmp standard with_hashmap
'

test_expect_success 'status with negative simple exact match' '
cat >.git/info/always_exclude <<\EOF &&
!a
EOF
git status -u >standard &&
git config --local core.gvfs 512 &&
git status -u >with_hashmap &&
git config --local core.gvfs 0 &&
test_cmp standard with_hashmap
'

test_expect_success 'status with matching exact match' '
cat >.git/info/always_exclude <<\EOF &&
/dir1/a
EOF
git status -u >standard &&
git config --local core.gvfs 512 &&
git status -u >with_hashmap &&
git config --local core.gvfs 0 &&
test_cmp standard with_hashmap
'

test_expect_success 'status with non-matching exact match' '
cat >.git/info/always_exclude <<\EOF &&
/dir1/dir
EOF
git status -u >standard &&
git config --local core.gvfs 512 &&
git status -u >with_hashmap &&
git config --local core.gvfs 0 &&
test_cmp standard with_hashmap
'

test_expect_success 'status with matching wildcard match' '
cat >.git/info/always_exclude <<\EOF &&
/dir1/*
EOF
git status -u >standard &&
git config --local core.gvfs 512 &&
git status -u >with_hashmap &&
git config --local core.gvfs 0 &&
test_cmp standard with_hashmap
'

test_expect_success 'status with non-matching wildcard match' '
cat >.git/info/always_exclude <<\EOF &&
/dir1/dir3/*
EOF
git status -u >standard &&
git config --local core.gvfs 512 &&
git status -u >with_hashmap &&
git config --local core.gvfs 0 &&
test_cmp standard with_hashmap
'

test_expect_success 'status with everything excluded' '
cat >.git/info/always_exclude <<\EOF &&
*
EOF
git status -u >standard &&
git config --local core.gvfs 512 &&
git status -u >with_hashmap &&
git config --local core.gvfs 0 &&
test_cmp standard with_hashmap
'

test_expect_success 'status with some excluded' '
cat >.git/info/always_exclude <<\EOF &&
*
!/*
EOF
git status -u >standard &&
git config --local core.gvfs 512 &&
git status -u >with_hashmap &&
git config --local core.gvfs 0 &&
test_cmp standard with_hashmap
'

test_expect_success 'status with less excluded' '
cat >.git/info/always_exclude <<\EOF &&
*
!/*
!/dir1
!/dir1/*
EOF
git status -u >standard &&
git config --local core.gvfs 512 &&
git status -u >with_hashmap &&
git config --local core.gvfs 0 &&
test_cmp standard with_hashmap
'

test_expect_success 'status with nothing excluded' '
cat >.git/info/always_exclude <<\EOF &&
*
!/*
!/dir1
!/dir1/*
!/dir1/dir2
!/dir1/dir2/*
EOF
git status -u >standard &&
git config --local core.gvfs 512 &&
git status -u >with_hashmap &&
git config --local core.gvfs 0 &&
test_cmp standard with_hashmap
'

test_done

0 comments on commit 75fb913

Please sign in to comment.