Skip to content

Commit

Permalink
Replace hardlink discovery rpmhash with STL
Browse files Browse the repository at this point in the history
This was another tricky one due to various things: the behavior here
depends on an undocumented rpmhash implementation detail, namely that
multiple values per key are preserved in the insertion order. This is
not true for unordered_multimap, the order is implementation defined.
Also unlike rpmhash, unordered_multimap does not have a method for
retrieving the number of keys, so whether hardlinks were discovered
needs to be tracked differently.

We avoid both of these problems by realizing that the arrays generated
in the second step are exactly the same as we calculated in the first
round already. So we collect the indexes to the smart pointer vectors
in the discovery stage already, utilizing .emplace() to avoid unnecessary
instantiation/destruction or extra lookup. With that, we know there are
hardlinks in the file set if any key has more than one index associated.
A vector obviously keeps its order when pushing back to it, and finally
we save a round of data structure copying when we just transfer the
relevant ones to the file index keyed hash we use for hardlink lookups
elsewhere, and compiler takes care of all the bookkeeping.
  • Loading branch information
pmatilai committed May 21, 2024
1 parent 815f917 commit 080fa89
Showing 1 changed file with 32 additions and 38 deletions.
70 changes: 32 additions & 38 deletions lib/rpmfi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1364,35 +1364,25 @@ static int indexSane(rpmtd xd, rpmtd yd, rpmtd zd)
struct fileid_s {
rpm_dev_t id_dev;
rpm_ino_t id_ino;
};

#define HASHTYPE fileidHash
#define HTKEYTYPE struct fileid_s
#define HTDATATYPE int
#include "rpmhash.H"
#include "rpmhash.C"
#undef HASHTYPE
#undef HTKEYTYPE
#undef HTDATATYPE

static unsigned int fidHashFunc(struct fileid_s a)
{
return a.id_ino + (a.id_dev<<16) + (a.id_dev>>16);
}
bool operator == (const fileid_s & other) const {
return (id_dev == other.id_dev && id_ino == other.id_ino);
}
};

static int fidCmp(struct fileid_s a, struct fileid_s b)
{
return !((a.id_dev == b.id_dev) && (a.id_ino == b.id_ino));
}
struct fidHash {
size_t operator() (const fileid_s & fid) const {
return fid.id_ino + (fid.id_dev<<16) + (fid.id_dev>>16);
}
};

static void rpmfilesBuildNLink(rpmfiles fi, Header h)
{
struct fileid_s f_id;
fileidHash files;
std::unordered_map<fileid_s,std::shared_ptr<hardlinks>,fidHash> files;
rpm_dev_t * fdevs = NULL;
struct rpmtd_s td;
int fc = 0;
int totalfc = rpmfilesFC(fi);
bool havelinks = false;

if (!fi->finodes)
return;
Expand All @@ -1401,40 +1391,44 @@ static void rpmfilesBuildNLink(rpmfiles fi, Header h)
if (!fdevs)
return;

files = fileidHashCreate(totalfc, fidHashFunc, fidCmp, NULL, NULL);
/* Collect file indexes sharing the same dev:ino pair (if any) */
for (int i=0; i < totalfc; i++) {
if (!S_ISREG(rpmfilesFMode(fi, i)) ||
(rpmfilesFFlags(fi, i) & RPMFILE_GHOST) ||
fi->finodes[i] <= 0) {
continue;
}
fc++;
f_id.id_dev = fdevs[i];
f_id.id_ino = fi->finodes[i];
fileidHashAddEntry(files, f_id, i);
fileid_s f_id = { fdevs[i], fi->finodes[i] };
/* Shared pointer vector, this will be reused in the below */
auto entry = files.emplace(
std::make_pair(f_id, std::make_shared<hardlinks>())).first;
entry->second->push_back(i);
if (entry->second->size() > 1)
havelinks = true;
}
if (fileidHashNumKeys(files) != fc) {
/* Hard links */

/* Collect the hardlink sets to into a hash keyed by file index */
if (havelinks) {
fi->nlinks = new nlinkHash {};
for (int i=0; i < totalfc; i++) {
int fcnt;
int * data;
if (!S_ISREG(rpmfilesFMode(fi, i)) ||
(rpmfilesFFlags(fi, i) & RPMFILE_GHOST)) {
continue;
}
f_id.id_dev = fdevs[i];
f_id.id_ino = fi->finodes[i];
fileidHashGetEntry(files, f_id, &data, &fcnt, NULL);
if (fcnt > 1 && fi->nlinks->find(i) == fi->nlinks->end()) {
auto ixs = std::make_shared<hardlinks> (data, data+fcnt);
for (int j = 0; j < fcnt; j++)
fi->nlinks->insert({data[j], ixs});

/* Transfer the hardlink sets from above to the new hash */
fileid_s f_id = { fdevs[i], fi->finodes[i] };
auto entry = files.find(f_id);
if (entry != files.end()) {
auto const & links = entry->second;
if (links->size() > 1) {
for (int j : (*links))
fi->nlinks->insert({j, links});
}
}
}
}
_free(fdevs);
files = fileidHashFree(files);
err:
return;
}
Expand Down

0 comments on commit 080fa89

Please sign in to comment.