Permalink
Browse files

remote-curl: auto-detect bundles when fetching refs

You can't currently fetch from a network bundle, like:

  git fetch http://example.com/foo.bundle

This patch takes the first (and biggest) step towards that
working: it auto-detects when fetching refs results in a
bundle, and automatically spools the bundle to disk and
fetches from it.

There are a few important design decisions to note:

  1. We auto-detect the bundle based on content, not based
     on a special token in the URL (like ending in
     ".bundle"). This lets the server side be flexible with
     its URLs (e.g., "http://example.com/bundle?repo=foo").

  2. When fetching refs, we don't actually fetch $URL, but
     start with $URL/info/refs, looking for smart or dumb
     http. Some servers, when file "foo.bundle" exists, will
     serve it to the client when "foo.bundle/info/refs" is
     requested. Therefore we may be "surprised" to receive a
     bundle when we thought we were just getting the list of
     refs, and need to handle it appropriately.

  3. We spool the bundle to disk, and then run "index-pack
     --fix-thin" to create a packfile. That means we will
     momentarily use twice the size of the bundle in local
     disk space. Avoiding this would mean piping directly to
     "index-pack --fix-thin".  However, if we want to be
     able to resume the transfer of the bundle after an
     interruption, then we need to save the bundle's pack.

     In theory a smart index-pack that was interrupted could
     write out its partial results along with a count of how
     many bytes it actually consumed (i.e., where to resume
     next time), and then pick up where it left off when fed
     the rest of the data. But index-pack isn't that smart
     yet, so let's start off with spooling.

No tests yet, as apache is not one of the "surprising"
servers from (2), and our test harness is based around that
(though just with this patch, you can fetch from surprising
servers like lighttpd).
  • Loading branch information...
peff committed Nov 2, 2011
1 parent 64751cf commit 17e2409df37edd0c49ef7d35f47a7695f9608900
Showing with 114 additions and 4 deletions.
  1. +114 −4 remote-curl.c
@@ -12,6 +12,7 @@
#include "credential.h"
#include "sha1-array.h"
#include "send-pack.h"
#include "bundle.h"

static struct remote *remote;
/* always ends with a trailing slash */
@@ -165,6 +166,10 @@ struct discovery {
struct ref *refs;
struct sha1_array shallow;
unsigned proto_git : 1;

char *bundle_filename;
int bundle_fd;
struct bundle_header bundle_header;
};
static struct discovery *last_discovery;

@@ -225,6 +230,22 @@ static struct ref *parse_info_refs(struct discovery *heads)
return refs;
}

static void ensure_bundle_open(struct discovery *heads)
{
if (heads->bundle_fd >= 0)
return;
heads->bundle_fd = read_bundle_header(heads->bundle_filename,
&heads->bundle_header);
if (heads->bundle_fd < 0)
die("could not read bundle from %s", url.buf);
}

static struct ref *parse_bundle_refs(struct discovery *heads)
{
ensure_bundle_open(heads);
return bundle_header_to_refs(&heads->bundle_header);
}

static void free_discovery(struct discovery *d)
{
if (d) {
@@ -233,6 +254,12 @@ static void free_discovery(struct discovery *d)
free(d->shallow.sha1);
free(d->buf_alloc);
free_refs(d->refs);
if (d->bundle_fd >= 0)
close(d->bundle_fd);
if (d->bundle_filename) {
unlink(d->bundle_filename);
free(d->bundle_filename);
}
free(d);
}
}
@@ -266,21 +293,82 @@ static int show_http_message(struct strbuf *type, struct strbuf *charset,

struct get_refs_cb_data {
struct strbuf *out;

int is_bundle;
const char *tmpname;
FILE *fh;
};

static size_t get_refs_callback(char *buf, size_t sz, size_t n, void *vdata)
{
struct get_refs_cb_data *data = vdata;
strbuf_add(data->out, buf, sz * n);
struct strbuf *out = data->out;

if (data->is_bundle > 0)
return fwrite(buf, sz, n, data->fh);

strbuf_add(out, buf, sz * n);

if (data->is_bundle == 0)
return sz * n;

data->is_bundle = is_bundle_buf(out->buf, out->len);
if (data->is_bundle > 0) {
data->fh = fopen(data->tmpname, "wb");
if (!data->fh)
die_errno("unable to open %s", data->tmpname);
if (fwrite(out->buf, 1, out->len, data->fh) < out->len)
die_errno("unable to write to %s", data->tmpname);
}
return sz * n;
}

static int get_refs_from_url(const char *url, struct strbuf *out,
struct http_get_options *options)
struct http_get_options *options,
const char *tmpname, int *is_bundle)
{
struct get_refs_cb_data data;
int ret;

data.out = out;
return http_get_callback(url, get_refs_callback, &data, 0, options);
data.is_bundle = -1;
data.tmpname = tmpname;
data.fh = NULL;

ret = http_get_callback(url, get_refs_callback, &data, 0, options);

if (data.fh) {
if (fclose(data.fh))
die_errno("unable to write to %s", data.tmpname);
}

*is_bundle = data.is_bundle > 0;
return ret;
}

static const char *url_to_bundle_tmpfile(const char *url)
{
struct strbuf buf = STRBUF_INIT;
int last_was_quoted = 1;
const char *ret;

strbuf_addstr(&buf, "tmp_bundle_");
for (; *url; url++) {
if (isalpha(*url) || isdigit(*url)) {
strbuf_addch(&buf, *url);
last_was_quoted = 0;
}
else if (!last_was_quoted) {
strbuf_addch(&buf, '_');
last_was_quoted = 1;
}
}
if (last_was_quoted)
strbuf_setlen(&buf, buf.len - 1);

ret = git_path("objects/%s", buf.buf);
strbuf_release(&buf);
return ret;
}

static struct discovery *discover_refs(const char *service, int for_push)
@@ -294,11 +382,15 @@ static struct discovery *discover_refs(const char *service, int for_push)
struct discovery *last = last_discovery;
int http_ret, maybe_smart = 0;
struct http_get_options http_options;
const char *filename;
int is_bundle;

if (last && !strcmp(service, last->service))
return last;
free_discovery(last);

filename = url_to_bundle_tmpfile(url.buf);

strbuf_addf(&refs_url, "%sinfo/refs", url.buf);
if ((starts_with(url.buf, "http://") || starts_with(url.buf, "https://")) &&
git_env_bool("GIT_SMART_HTTP", 1)) {
@@ -319,7 +411,8 @@ static struct discovery *discover_refs(const char *service, int for_push)
http_options.no_cache = 1;
http_options.keep_error = 1;

http_ret = get_refs_from_url(refs_url.buf, &buffer, &http_options);
http_ret = get_refs_from_url(refs_url.buf, &buffer, &http_options,
filename, &is_bundle);
switch (http_ret) {
case HTTP_OK:
break;
@@ -341,6 +434,7 @@ static struct discovery *discover_refs(const char *service, int for_push)
last->service = service;
last->buf_alloc = strbuf_detach(&buffer, &last->len);
last->buf = last->buf_alloc;
last->bundle_fd = -1;

strbuf_addf(&exp, "application/x-%s-advertisement", service);
if (maybe_smart &&
@@ -372,6 +466,12 @@ static struct discovery *discover_refs(const char *service, int for_push)

if (last->proto_git)
last->refs = parse_git_refs(last, for_push);
else if (is_bundle) {
if (for_push)
die("cannot push into a remote bundle");
last->bundle_filename = xstrdup(filename);
last->refs = parse_bundle_refs(last);
}
else
last->refs = parse_info_refs(last);

@@ -848,11 +948,21 @@ static int fetch_git(struct discovery *heads,
return err;
}

static int fetch_bundle(struct discovery *d,
int nr_heads, struct ref **to_fetch)
{
ensure_bundle_open(d);
return unbundle(&d->bundle_header, d->bundle_fd,
options.progress ? BUNDLE_VERBOSE : 0);
}

static int fetch(int nr_heads, struct ref **to_fetch)
{
struct discovery *d = discover_refs("git-upload-pack", 0);
if (d->proto_git)
return fetch_git(d, nr_heads, to_fetch);
else if (d->bundle_filename)
return fetch_bundle(d, nr_heads, to_fetch);
else
return fetch_dumb(nr_heads, to_fetch);
}

0 comments on commit 17e2409

Please sign in to comment.