From 94e570861f5ff9a8096c1b56c9fca2e61cb54be5 Mon Sep 17 00:00:00 2001 From: Panu Matilainen Date: Wed, 8 May 2019 17:06:21 +0300 Subject: [PATCH] Run file classification in parallel threads Our file classification is not exactly in large quantities. Fedora's kernel-debuginfo-common has circa 25000 files, and classifying them serially takes about a minute on my rusty old T520. With parallelization this goes down to ~24s. It's all remarkably simple, except for the fact that libmagic is not thread-safe so we need separate magic handles for each of our threads. This will leak those libmagic handles on error situations, I don't see any obvious, nice way to handle that. --- build/rpmfc.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/build/rpmfc.c b/build/rpmfc.c index f36da59849..5f3f56872f 100644 --- a/build/rpmfc.c +++ b/build/rpmfc.c @@ -677,6 +677,7 @@ static void rpmfcAttributes(rpmfc fc, int ix, const char *ftype, const char *ful /* Add attributes on libmagic type & path pattern matches */ if (matches(&(*attr)->incl, ftype, path, is_executable)) { argvAddTokens(&fc->fattrs[ix], (*attr)->name); + #pragma omp critical(fahash) fattrHashAddEntry(fc->fahash, attr-fc->atypes, ix); } } @@ -1063,7 +1064,7 @@ static int initAttrs(rpmfc fc) rpmRC rpmfcClassify(rpmfc fc, ARGV_t argv, rpm_mode_t * fmode) { int msflags = MAGIC_CHECK | MAGIC_COMPRESS | MAGIC_NO_CHECK_TOKENS; - magic_t ms = NULL; + int nerrors = 0; rpmRC rc = RPMRC_FAIL; if (fc == NULL) { @@ -1094,18 +1095,23 @@ rpmRC rpmfcClassify(rpmfc fc, ARGV_t argv, rpm_mode_t * fmode) /* Build (sorted) file class dictionary. */ fc->cdict = rpmstrPoolCreate(); - ms = magic_open(msflags); + #pragma omp parallel + { + /* libmagic is not thread-safe, each thread needs to a private handle */ + magic_t ms = magic_open(msflags); + if (ms == NULL) { rpmlog(RPMLOG_ERR, _("magic_open(0x%x) failed: %s\n"), msflags, strerror(errno)); - goto exit; + #pragma omp cancel parallel } if (magic_load(ms, NULL) == -1) { rpmlog(RPMLOG_ERR, _("magic_load failed: %s\n"), magic_error(ms)); - goto exit; + #pragma omp cancel parallel } + #pragma omp for reduction(+:nerrors) for (int ix = 0; ix < fc->nfiles; ix++) { rpmsid ftypeId; const char * ftype; @@ -1148,7 +1154,8 @@ rpmRC rpmfcClassify(rpmfc fc, ARGV_t argv, rpm_mode_t * fmode) s, mode, magic_error(ms)); /* only executable files are critical to dep extraction */ if (is_executable) { - goto exit; + nerrors++; + #pragma omp cancel for } /* unrecognized non-executables get treated as "data" */ ftype = "data"; @@ -1171,21 +1178,28 @@ rpmRC rpmfcClassify(rpmfc fc, ARGV_t argv, rpm_mode_t * fmode) /* Add to file class dictionary and index array */ if (fcolor != RPMFC_WHITE && (fcolor & RPMFC_INCLUDE)) { ftypeId = rpmstrPoolId(fc->cdict, ftype, 1); + #pragma omp atomic fc->fknown++; } else { ftypeId = rpmstrPoolId(fc->cdict, "", 1); + #pragma omp atomic fc->fwhite++; } /* Pool id's start from 1, for headers we want it from 0 */ fc->fcdictx[ix] = ftypeId - 1; } - rc = RPMRC_OK; + + if (ms != NULL) + magic_close(ms); + + } /* omp parallel */ + + if (nerrors == 0) + rc = RPMRC_OK; exit: /* No more additions after this, freeze pool to minimize memory use */ rpmstrPoolFreeze(fc->cdict, 0); - if (ms != NULL) - magic_close(ms); return rc; }