Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a streaming indexer when fetching #628

Closed
wants to merge 12 commits into from
138 changes: 66 additions & 72 deletions examples/network/fetch.c
Expand Up @@ -3,95 +3,89 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>

static int rename_packfile(char *packname, git_indexer *idx)
{
char path[GIT_PATH_MAX], oid[GIT_OID_HEXSZ + 1], *slash;
int ret;

strcpy(path, packname);
slash = strrchr(path, '/');

if (!slash)
return GIT_EINVALIDARGS;

memset(oid, 0x0, sizeof(oid));
// The name of the packfile is given by it's hash which you can get
// with git_indexer_hash after the index has been written out to
// disk. Rename the packfile to its "real" name in the same
// directory as it was originally (libgit2 stores it in the folder
// where the packs go, so a rename in place is the right thing to do here
git_oid_fmt(oid, git_indexer_hash(idx));
ret = sprintf(slash + 1, "pack-%s.pack", oid);
if(ret < 0)
return GIT_EOSERR;
struct dl_data {
git_remote *remote;
git_off_t *bytes;
git_indexer_stats *stats;
int ret;
int finished;
};

printf("Renaming pack to %s\n", path);
return rename(packname, path);
static void *download(void *ptr)
{
struct dl_data *data = (struct dl_data *)ptr;

// Connect to the remote end specifying that we want to fetch
// information from it.
if (git_remote_connect(data->remote, GIT_DIR_FETCH) < 0) {
data->ret = -1;
goto exit;
}

// Download the packfile and index it. This function updates the
// amount of received data and the indexer stats which lets you
// inform the user about progress.
if (git_remote_download(data->remote, data->bytes, data->stats) < 0) {
data->ret = -1;
goto exit;
}

data->ret = 0;

exit:
data->finished = 1;
pthread_exit(&data->ret);
}

int fetch(git_repository *repo, int argc, char **argv)
{
git_remote *remote = NULL;
git_indexer *idx = NULL;
git_off_t bytes = 0;
git_indexer_stats stats;
int error;
char *packname = NULL;
pthread_t worker;
struct dl_data data;

// Get the remote and connect to it
// Figure out whether it's a named remote or a URL
printf("Fetching %s\n", argv[1]);
error = git_remote_new(&remote, repo, argv[1], NULL);
if (error < GIT_SUCCESS)
return error;

error = git_remote_connect(remote, GIT_DIR_FETCH);
if (error < GIT_SUCCESS)
return error;

// Download the packfile from the server. As we don't know its hash
// yet, it will get a temporary filename
error = git_remote_download(&packname, remote);
if (error < GIT_SUCCESS)
return error;

// No error and a NULL packname means no packfile was needed
if (packname != NULL) {
printf("The packname is %s\n", packname);

// Create a new instance indexer
error = git_indexer_new(&idx, packname);
if (error < GIT_SUCCESS)
return error;

// This should be run in paralel, but it'd be too complicated for the example
error = git_indexer_run(idx, &stats);
if (error < GIT_SUCCESS)
return error;

printf("Received %d objects\n", stats.total);

// Write the index file. The index will be stored with the
// correct filename
error = git_indexer_write(idx);
if (error < GIT_SUCCESS)
return error;

error = rename_packfile(packname, idx);
if (error < GIT_SUCCESS)
return error;
if (git_remote_load(&remote, repo, argv[1]) < 0) {
if (git_remote_new(&remote, repo, argv[1], NULL) < 0)
return -1;
}

// Set up the information for the background worker thread
data.remote = remote;
data.bytes = &bytes;
data.stats = &stats;
data.ret = 0;
data.finished = 0;
memset(&stats, 0, sizeof(stats));

pthread_create(&worker, NULL, download, &data);

// Loop while the worker thread is still running. Here we show processed
// and total objects in the pack and the amount of received
// data. Most frontends will probably want to show a percentage and
// the download rate.
do {
usleep(10000);
printf("\rReceived %d/%d objects in %d bytes", stats.processed, stats.total, bytes);
} while (!data.finished);
printf("\rReceived %d/%d objects in %d bytes\n", stats.processed, stats.total, bytes);

// Update the references in the remote's namespace to point to the
// right commits. This may be needed even if there was no packfile
// to download, which can happen e.g. when the branches have been
// changed but all the neede objects are available locally.
error = git_remote_update_tips(remote);
if (error < GIT_SUCCESS)
return error;
if (git_remote_update_tips(remote) < 0)
return -1;

free(packname);
git_indexer_free(idx);
git_remote_free(remote);

return GIT_SUCCESS;
return 0;

on_error:
git_remote_free(remote);
return -1;
}
55 changes: 55 additions & 0 deletions examples/network/index-pack.c
Expand Up @@ -12,6 +12,61 @@ int index_cb(const git_indexer_stats *stats, void *data)
}

int index_pack(git_repository *repo, int argc, char **argv)
{
git_indexer_stream *idx;
git_indexer_stats stats = {0, 0};
int error, fd;
char hash[GIT_OID_HEXSZ + 1] = {0};
ssize_t read_bytes;
char buf[512];

if (argc < 2) {
fprintf(stderr, "I need a packfile\n");
return EXIT_FAILURE;
}

if (git_indexer_stream_new(&idx, ".git") < 0) {
puts("bad idx");
return -1;
}

if ((fd = open(argv[1], 0)) < 0) {
perror("open");
return -1;
}

do {
read_bytes = read(fd, buf, sizeof(buf));
if (read_bytes < 0)
break;

if ((error = git_indexer_stream_add(idx, buf, read_bytes, &stats)) < 0)
goto cleanup;

printf("\rIndexing %d of %d", stats.processed, stats.total);
} while (read_bytes > 0);

if (read_bytes < 0) {
error = -1;
perror("failed reading");
goto cleanup;
}

if ((error = git_indexer_stream_finalize(idx, &stats)) < 0)
goto cleanup;

printf("\rIndexing %d of %d\n", stats.processed, stats.total);

git_oid_fmt(hash, git_indexer_stream_hash(idx));
puts(hash);

cleanup:
close(fd);
git_indexer_stream_free(idx);
return error;
}

int index_pack_old(git_repository *repo, int argc, char **argv)
{
git_indexer *indexer;
git_indexer_stats stats;
Expand Down
46 changes: 39 additions & 7 deletions include/git2/indexer.h
Expand Up @@ -13,16 +13,48 @@
GIT_BEGIN_DECL

/**
* This is passed as the first argument to the callback to allow the
* user to see the progress.
* Create a new streaming indexer instance
*
* @param out where to store the inexer instance
* @param path to the gitdir (metadata directory)
*/
GIT_EXTERN(int) git_indexer_stream_new(git_indexer_stream **out, const char *gitdir);

/**
* Add data to the indexer
*
* @param idx the indexer
* @param data the data to add
* @param size the size of the data
* @param stats stat storage
*/
typedef struct git_indexer_stats {
unsigned int total;
unsigned int processed;
} git_indexer_stats;
GIT_EXTERN(int) git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_indexer_stats *stats);

/**
* Finalize the pack and index
*
* Resolve any pending deltas and write out the index file
*
* @param idx the indexer
*/
GIT_EXTERN(int) git_indexer_stream_finalize(git_indexer_stream *idx, git_indexer_stats *stats);

/**
* Get the packfile's hash
*
* A packfile's name is derived from the sorted hashing of all object
* names. This is only correct after the index has been finalized.
*
* @param idx the indexer instance
*/
GIT_EXTERN(const git_oid *) git_indexer_stream_hash(git_indexer_stream *idx);

typedef struct git_indexer git_indexer;
/**
* Free the indexer and its resources
*
* @param idx the indexer to free
*/
GIT_EXTERN(void) git_indexer_stream_free(git_indexer_stream *idx);

/**
* Create a new indexer instance
Expand Down
2 changes: 1 addition & 1 deletion include/git2/remote.h
Expand Up @@ -150,7 +150,7 @@ GIT_EXTERN(int) git_remote_ls(git_remote *remote, git_headlist_cb list_cb, void
* @param filename where to store the temproray filename
* @return GIT_SUCCESS or an error code
*/
GIT_EXTERN(int) git_remote_download(char **filename, git_remote *remote);
GIT_EXTERN(int) git_remote_download(git_remote *remote, git_off_t *bytes, git_indexer_stats *stats);

/**
* Check whether the remote is connected
Expand Down
9 changes: 9 additions & 0 deletions include/git2/types.h
Expand Up @@ -171,6 +171,15 @@ typedef struct git_remote git_remote;

typedef struct git_remote_head git_remote_head;

/** Information about the progress of the indexer */
typedef struct git_indexer_stats {
unsigned int total;
unsigned int processed;
} git_indexer_stats;

typedef struct git_indexer git_indexer;
typedef struct git_indexer_stream git_indexer_stream;

/** @} */
GIT_END_DECL

Expand Down