Skip to content

Commit

Permalink
read-cache: load cache extensions on a worker thread
Browse files Browse the repository at this point in the history
This patch helps address the CPU cost of loading the index by loading
the cache extensions on a worker thread in parallel with loading the cache
entries.

In some cases, loading the extensions takes longer than loading the
cache entries so this patch utilizes the new EOIE to start the thread to
load the extensions before loading all the cache entries in parallel.

This is possible because the current extensions don't access the cache
entries in the index_state structure so are OK that they don't all exist
yet.

The CACHE_EXT_TREE, CACHE_EXT_RESOLVE_UNDO, and CACHE_EXT_UNTRACKED
extensions don't even get a pointer to the index so don't have access to the
cache entries.

CACHE_EXT_LINK only uses the index_state to initialize the split index.
CACHE_EXT_FSMONITOR only uses the index_state to save the fsmonitor last
update and dirty flags.

I used p0002-read-cache.sh to generate some performance data:

	Test w/100,000 files reduced the time by 0.53%
	Test w/1,000,000 files reduced the time by 27.78%

Signed-off-by: Ben Peart <benpeart@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
benpeart authored and gitster committed Oct 11, 2018
1 parent c780b9c commit abb4bb8
Showing 1 changed file with 79 additions and 16 deletions.
95 changes: 79 additions & 16 deletions read-cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "split-index.h"
#include "utf8.h"
#include "fsmonitor.h"
#include "thread-utils.h"

/* Mask for the name length in ce_flags in the on-disk index */

Expand Down Expand Up @@ -1890,6 +1891,44 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset);

struct load_index_extensions
{
#ifndef NO_PTHREADS
pthread_t pthread;
#endif
struct index_state *istate;
const char *mmap;
size_t mmap_size;
unsigned long src_offset;
};

static void *load_index_extensions(void *_data)
{
struct load_index_extensions *p = _data;
unsigned long src_offset = p->src_offset;

while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
/* After an array of active_nr index entries,
* there can be arbitrary number of extended
* sections, each of which is prefixed with
* extension name (4-byte) and section length
* in 4-byte network byte order.
*/
uint32_t extsize = get_be32(p->mmap + src_offset + 4);
if (read_index_extension(p->istate,
p->mmap + src_offset,
p->mmap + src_offset + 8,
extsize) < 0) {
munmap((void *)p->mmap, p->mmap_size);
die(_("index file corrupt"));
}
src_offset += 8;
src_offset += extsize;
}

return NULL;
}

/* remember to discard_cache() before reading a different cache! */
int do_read_index(struct index_state *istate, const char *path, int must_exist)
{
Expand All @@ -1900,6 +1939,11 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
const char *mmap;
size_t mmap_size;
const struct cache_entry *previous_ce = NULL;
struct load_index_extensions p;
size_t extension_offset = 0;
#ifndef NO_PTHREADS
int nr_threads;
#endif

if (istate->initialized)
return istate->cache_nr;
Expand Down Expand Up @@ -1936,6 +1980,30 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache));
istate->initialized = 1;

p.istate = istate;
p.mmap = mmap;
p.mmap_size = mmap_size;

#ifndef NO_PTHREADS
nr_threads = git_config_get_index_threads();
if (!nr_threads)
nr_threads = online_cpus();

if (nr_threads > 1) {
extension_offset = read_eoie_extension(mmap, mmap_size);
if (extension_offset) {
int err;

p.src_offset = extension_offset;
err = pthread_create(&p.pthread, NULL, load_index_extensions, &p);
if (err)
die(_("unable to create load_index_extensions thread: %s"), strerror(err));

nr_threads--;
}
}
#endif

if (istate->version == 4) {
mem_pool_init(&istate->ce_mem_pool,
estimate_cache_size_from_compressed(istate->cache_nr));
Expand All @@ -1960,22 +2028,17 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
istate->timestamp.sec = st.st_mtime;
istate->timestamp.nsec = ST_MTIME_NSEC(st);

while (src_offset <= mmap_size - the_hash_algo->rawsz - 8) {
/* After an array of active_nr index entries,
* there can be arbitrary number of extended
* sections, each of which is prefixed with
* extension name (4-byte) and section length
* in 4-byte network byte order.
*/
uint32_t extsize;
extsize = get_be32(mmap + src_offset + 4);
if (read_index_extension(istate,
mmap + src_offset,
mmap + src_offset + 8,
extsize) < 0)
goto unmap;
src_offset += 8;
src_offset += extsize;
/* if we created a thread, join it otherwise load the extensions on the primary thread */
#ifndef NO_PTHREADS
if (extension_offset) {
int ret = pthread_join(p.pthread, NULL);
if (ret)
die(_("unable to join load_index_extensions thread: %s"), strerror(ret));
}
#endif
if (!extension_offset) {
p.src_offset = src_offset;
load_index_extensions(&p);
}
munmap((void *)mmap, mmap_size);
return istate->cache_nr;
Expand Down

0 comments on commit abb4bb8

Please sign in to comment.