Permalink
Browse files

Stream pkg_summary downloads via libarchive.

This splits download_summary() into an initialise function (sum_open())
and three archive_read_open() callbacks (sum_start(), sum_read(),
sum_close()).  libarchive can handle EOF and read failures, so this also
simplifies things a little.

One side-effect of this change is that we no longer need individual
progress meters for the download and the database updates (in fact they
would clash), so we now only use the libfetch progress meter.

As we are now updating the database while the file is being fetched, if
the fetch is incomplete we delete the remote summary database rather
than leaving it in an inconsistent state.

With these changes applied on 64-bit SmartOS we see a 2MB reduction in
RSS usage, and a 1.5 second faster runtime for a fresh "pkgin update".
  • Loading branch information...
jperkin committed Aug 12, 2015
1 parent 0f5cdd1 commit 72cd8345953c083cb4b79a180e8be1b38db9d8d9
Showing with 111 additions and 104 deletions.
  1. +80 −49 download.c
  2. +0 −2 messages.h
  3. +16 −4 pkgin.h
  4. +15 −49 summary.c
View
@@ -36,27 +36,26 @@
int fetchTimeout = 15; /* wait 15 seconds before timeout */
size_t fetch_buffer = 1024;
-Dlfile *
-download_summary(char *str_url, time_t *db_mtime)
+/*
+ * Open a pkg_summary and if newer than local return an open libfetch
+ * connection to it.
+ */
+Sumfile *
+sum_open(char *str_url, time_t *db_mtime)
{
- /* from pkg_install/files/admin/audit.c */
- Dlfile *file;
- char *p;
- size_t buf_len, buf_fetched;
- ssize_t cur_fetched;
- off_t statsize;
- struct url_stat st;
- struct url *url;
- fetchIO *f = NULL;
+ Sumfile *sum = NULL;
+ fetchIO *f = NULL;
+ struct url *url;
+ struct url_stat st;
url = fetchParseURL(str_url);
if (url == NULL || (f = fetchXGet(url, &st, "")) == NULL)
- return NULL;
+ goto nofetch;
if (st.size == -1) { /* could not obtain file size */
*db_mtime = 0; /* not -1, don't force update */
- return NULL;
+ goto nofetch;
}
if (st.mtime <= *db_mtime) {
@@ -65,67 +64,99 @@ download_summary(char *str_url, time_t *db_mtime)
* local summary up-to-date
*/
*db_mtime = -1;
-
- fetchIO_close(f);
-
- return NULL;
+ goto nofetch;
}
*db_mtime = st.mtime;
- if ((p = strrchr(str_url, '/')) != NULL)
- p++;
- else
- p = (char *)str_url; /* should not happen */
-
#ifndef _MINIX /* XXX: SSIZE_MAX fails under MINIX */
/* st.size is an off_t, it will be > SSIZE_MAX on 32 bits systems */
if (sizeof(st.size) == sizeof(SSIZE_MAX) && st.size > SSIZE_MAX - 1)
err(EXIT_FAILURE, "file is too large");
#endif
- buf_len = st.size;
- XMALLOC(file, sizeof(Dlfile));
- XMALLOC(file->buf, buf_len + 1);
+ XMALLOC(sum, sizeof(Sumfile));
+
+ sum->fd = f;
+ sum->url = url;
+ sum->size = st.size;
+ goto out;
+nofetch:
+ if (url)
+ fetchFreeURL(url);
+ if (f)
+ fetchIO_close(f);
+out:
+ return sum;
+}
- buf_fetched = 0;
- statsize = 0;
+/*
+ * archive_read_open open callback. As we already have an open
+ * libfetch handler all we need to do is print the download messages.
+ */
+int
+sum_start(struct archive *a, void *data)
+{
+ Sumfile *sum = data;
+ char *p;
if (!parsable) { /* human readable output */
+ if ((p = strrchr(sum->url->doc, '/')) != NULL)
+ p++;
+ else
+ p = (char *)sum->url->doc; /* should not happen */
+
printf(MSG_DOWNLOADING, p);
fflush(stdout);
-
- start_progress_meter(p, buf_len, &statsize);
+ start_progress_meter(p, sum->size, &sum->pos);
} else
printf(MSG_DOWNLOAD_START);
- while (buf_fetched < buf_len) {
- cur_fetched = fetchIO_read(f, file->buf + buf_fetched,
- fetch_buffer);
- if (cur_fetched == 0)
- errx(EXIT_FAILURE, "truncated file");
- else if (cur_fetched == -1)
- errx(EXIT_FAILURE, "failure during fetch of file: %s",
- fetchLastErrString);
-
- buf_fetched += cur_fetched;
- statsize += cur_fetched;
- }
+ return ARCHIVE_OK;
+}
+
+/*
+ * archive_read_open read callback. Read the next chunk of data from libfetch
+ * and update the read position for the progress meter.
+ */
+ssize_t
+sum_read(struct archive *a, void *data, const void **buf)
+{
+ Sumfile *sum = data;
+ ssize_t fetched;
+
+ *buf = sum->buf;
+
+ fetched = fetchIO_read(sum->fd, sum->buf, sizeof(sum->buf));
+
+ if (fetched == -1)
+ errx(EXIT_FAILURE, "failure during fetch of file: %s",
+ fetchLastErrString);
+
+ sum->pos += fetched;
+
+ return fetched;
+}
+
+/*
+ * archive_read_open close callback. Stop the progress meter and close the
+ * libfetch handler.
+ */
+int
+sum_close(struct archive *a, void *data)
+{
+ Sumfile *sum = data;
if (!parsable)
stop_progress_meter();
else
printf(MSG_DOWNLOAD_END);
- file->buf[buf_len] = '\0';
- file->size = buf_len;
-
- if (file->buf[0] == '\0')
- errx(EXIT_FAILURE, "empty download, exiting.\n");
-
- fetchIO_close(f);
+ fetchIO_close(sum->fd);
+ fetchFreeURL(sum->url);
+ XFREE(sum);
- return file;
+ return ARCHIVE_OK;
}
/*
View
@@ -121,8 +121,6 @@ in order to remove packages from the autoremove list, flag those with the `keep'
/* summary.c */
#define MSG_READING_LOCAL_SUMMARY "reading local summary...\n"
-#define MSG_UPDATING_DB "updating database: 0%%"
-#define MSG_UPDATING_DB_PCT "\b\b\b\b%3d%%"
#define MSG_CLEANING_DB_FROM_REPO "cleaning database from %s entries...\n"
#define MSG_PROCESSING_LOCAL_SUMMARY "processing local summary...\n"
#define MSG_DB_IS_UP_TO_DATE "database for %s is up-to-date\n"
View
20 pkgin.h
@@ -40,6 +40,8 @@
#include <sys/queue.h>
#endif
+#include <archive.h>
+#include <archive_entry.h>
#include <fetch.h>
#include <errno.h>
#include "messages.h"
@@ -127,10 +129,17 @@
#define TRACE(fmt...) if (tracefp != NULL) fprintf(tracefp, fmt)
-typedef struct Dlfile {
- char *buf;
+/**
+ * \struct Sumfile
+ * \brief Remote pkg_summary information
+ */
+typedef struct Sumfile {
+ fetchIO *fd;
+ struct url *url;
+ char buf[65536];
size_t size;
-} Dlfile;
+ off_t pos;
+} Sumfile;
/**
* \struct Deptree
@@ -227,7 +236,10 @@ extern FILE *tracefp;
extern Preflist **preflist;
/* download.c*/
-Dlfile *download_summary(char *, time_t *);
+Sumfile *sum_open(char *, time_t *);
+int sum_start(struct archive *, void *);
+ssize_t sum_read(struct archive *, void *, const void **);
+int sum_close(struct archive *, void *);
ssize_t download_pkg(char *, FILE *);
/* summary.c */
int update_db(int, char **);
View
@@ -36,8 +36,6 @@
#include "tools.h"
#include "pkgin.h"
-#include <archive.h>
-#include <archive_entry.h>
static const struct Summary {
const int type;
@@ -86,6 +84,7 @@ static void freecols(void);
static void free_insertlist(void);
static void insert_local_summary(FILE *);
static void insert_remote_summary(struct archive *, char *);
+static void delete_remote_tbl(struct Summary, char *);
static void prepare_insert(int, struct Summary);
int colnames(void *, int, char **, char **);
@@ -98,15 +97,14 @@ int force_fetch = 0;
static const char *const sumexts[] = { "bz2", "gz", NULL };
/*
- * Download a remote summary into memory and return an open
- * libarchive handler to it.
+ * Open a remote summary and return an open libarchive handler to it.
*/
static struct archive *
fetch_summary(char *cur_repo)
{
struct archive *a;
struct archive_entry *ae;
- Dlfile *file = NULL;
+ Sumfile *sum = NULL;
time_t sum_mtime;
int i;
char buf[BUFSIZ];
@@ -120,14 +118,14 @@ fetch_summary(char *cur_repo)
snprintf(buf, BUFSIZ, "%s/%s.%s",
cur_repo, PKG_SUMMARY, sumexts[i]);
- if ((file = download_summary(buf, &sum_mtime)) != NULL)
+ if ((sum = sum_open(buf, &sum_mtime)) != NULL)
break; /* pkg_summary found and not up-to-date */
if (sum_mtime < 0) /* pkg_summary found, but up-to-date */
return NULL;
}
- if (file == NULL)
+ if (sum == NULL)
errx(EXIT_FAILURE, MSG_COULDNT_FETCH, buf);
pkgindb_dovaquery(UPDATE_REPO_MTIME, (long long)sum_mtime, cur_repo);
@@ -137,35 +135,17 @@ fetch_summary(char *cur_repo)
if (archive_read_support_filter_all(a) != ARCHIVE_OK ||
archive_read_support_format_raw(a) != ARCHIVE_OK ||
- archive_read_open_memory(a, file->buf, file->size) != ARCHIVE_OK)
- errx(EXIT_FAILURE, "Cannot open in-memory pkg_summary: %s",
+ archive_read_open(a, sum, sum_start, sum_read, sum_close) != ARCHIVE_OK)
+ errx(EXIT_FAILURE, "Cannot open pkg_summary: %s",
archive_error_string(a));
if (archive_read_next_header(a, &ae) != ARCHIVE_OK)
- errx(EXIT_FAILURE, "Cannot read in-memory pkg_summary: %s",
+ errx(EXIT_FAILURE, "Cannot read pkg_summary: %s",
archive_error_string(a));
return a;
}
-/**
- * progress percentage
- */
-static void
-progress(char c)
-{
- const char *alnum = ALNUM;
- int i, alnumlen = strlen(alnum);
- float percent = 0;
-
- for (i = 0; i < alnumlen; i++)
- if (c == alnum[i])
- percent = ((float)(i + 1)/ (float)alnumlen) * 100;
-
- printf(MSG_UPDATING_DB_PCT, (int)percent);
- fflush(stdout);
-}
-
static void
freecols()
{
@@ -278,11 +258,12 @@ parse_entry(struct Summary sum, int pkgid, char *line)
if (check_machine_arch && strncmp(line, "MACHINE_ARCH=", 13) == 0) {
if (strncmp(CHECK_MACHINE_ARCH, val,
strlen(CHECK_MACHINE_ARCH))) {
+ alarm(0); /* Stop the progress meter */
printf(MSG_ARCH_DONT_MATCH, val, CHECK_MACHINE_ARCH);
if (!check_yesno(DEFAULT_NO))
exit(EXIT_FAILURE);
check_machine_arch = 0;
- printf("\r"MSG_UPDATING_DB);
+ alarm(1); /* Restart progress XXX: UPDATE_INTERVAL */
}
return;
}
@@ -364,13 +345,6 @@ parse_entry(struct Summary sum, int pkgid, char *line)
*v++ = '\0';
add_to_slist("PKGNAME", val);
add_to_slist("PKGVERS", v);
-
- /*
- * Update progress counter based on position
- * in alphabet of first PKGNAME character.
- */
- if (!parsable)
- progress(val[0]);
} else
add_to_slist(cols.name[i], val);
@@ -451,11 +425,6 @@ insert_remote_summary(struct archive *a, char *cur_repo)
pkgindb_doquery("BEGIN;", NULL, NULL);
- if (!parsable) {
- printf(MSG_UPDATING_DB);
- fflush(stdout);
- }
-
/*
* Main loop. Read in archive, split into package records and parse
* each entry, then insert packge. If we are in the middle of a
@@ -527,14 +496,11 @@ insert_remote_summary(struct archive *a, char *cur_repo)
pkgindb_doquery("COMMIT;", NULL, NULL);
- if (!parsable) {
- printf("\n");
- fflush(stdout);
- }
-
- if (r != ARCHIVE_OK)
+ if (r != ARCHIVE_OK) {
+ delete_remote_tbl(sumsw[REMOTE_SUMMARY], cur_repo);
errx(EXIT_FAILURE, "Short read of pkg_summary: %s",
archive_error_string(a));
+ }
archive_read_close(a);
}
@@ -685,6 +651,8 @@ update_remotedb(void)
/* loop through PKG_REPOS */
for (prepos = pkg_repos; *prepos != NULL; prepos++) {
+ printf(MSG_PROCESSING_REMOTE_SUMMARY, *prepos);
+
/* load remote pkg_summary */
if ((a = fetch_summary(*prepos)) == NULL) {
printf(MSG_DB_IS_UP_TO_DATE, *prepos);
@@ -701,8 +669,6 @@ update_remotedb(void)
cleaned = 1;
}
- printf(MSG_PROCESSING_REMOTE_SUMMARY, *prepos);
-
/* delete remote* associated to this repository */
delete_remote_tbl(sumsw[REMOTE_SUMMARY], *prepos);
/* update remote* table for this repository */

0 comments on commit 72cd834

Please sign in to comment.