Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce addAlias method. #833

Merged
merged 7 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/zim/item.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ namespace zim

#ifdef ZIM_PRIVATE
cluster_index_type getClusterIndex() const;
blob_index_type getBlobIndex() const;
#endif

private: // functions
Expand Down
30 changes: 30 additions & 0 deletions include/zim/writer/creator.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,36 @@ namespace zim
const std::string& targetpath,
const Hints& hints = Hints());


/**
* Add a alias of a existing entry.
*
* The existing entry pointed by `targetPath` is cloned and updated with
* `path` and `title`.
*
* The alias entry will shared the same type (redirection or item)
* and namespace than `targetPath`.
*
* If the `targetPath` is a item, the new entry will be item pointing
* to the same data than `targetPath` item. (Not a redirection to `targetPath`).
* However, the alias entry is not counted in the media type counter
* and it is not fulltext indexed (only title indexed).
mgautierfr marked this conversation as resolved.
Show resolved Hide resolved
*
* Hints can be given to influence creator handling (front article, ...)
* as it is done for redirection.
*
* @param path the path of the alias
* @param title the title of the alias
* @param targetPath the path of the aliased entry.
* @param hints hints associated to the alias.
*/
void addAlias(
const std::string& path,
const std::string& title,
const std::string& targetPath,
const Hints& hints = Hints()
);

/**
* Finalize the zim creation.
*/
Expand Down
5 changes: 5 additions & 0 deletions src/item.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,8 @@ cluster_index_type Item::getClusterIndex() const
{
return m_dirent->getClusterNumber().v;
}

blob_index_type Item::getBlobIndex() const
{
return m_dirent->getBlobNumber().v;
}
19 changes: 19 additions & 0 deletions src/writer/_dirent.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ namespace zim
ns(ns)
{};
Redirect(Redirect&& r) = default;
Redirect(const Redirect& r) = default;
~Redirect() {};
TinyString targetPath;
NS ns;
Expand Down Expand Up @@ -96,6 +97,21 @@ namespace zim
resolved(std::move(r)),
tag(DirentInfo::RESOLVED)
{}
DirentInfo(const DirentInfo& other):
tag(other.tag)
{
switch (tag) {
case DIRECT:
new(&direct) Direct(other.direct);
break;
case REDIRECT:
new(&redirect) Redirect(other.redirect);
break;
case RESOLVED:
new(&resolved) Resolved(other.resolved);
break;
}
}
DirentInfo::Direct& getDirect() {
ASSERT(tag, ==, DIRECT);
return direct;
Expand Down Expand Up @@ -153,6 +169,9 @@ namespace zim
// Creator for a "redirection" dirent
Dirent(NS ns, const std::string& path, const std::string& title, NS targetNs, const std::string& targetPath);

// Creator for a "alias" dirent. Reuse the namespace of the targeted Dirent.
Dirent(const std::string& path, const std::string& title, const Dirent& target);

// Creator for "temporary" dirent, used to search for dirent in container.
// We use them in url ordered container so we only need to set the namespace and the path.
// Other value are irrelevant.
Expand Down
24 changes: 24 additions & 0 deletions src/writer/creator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,23 @@ namespace zim
data->handle(dirent, hints);
}

void Creator::addAlias(const std::string& path, const std::string& title, const std::string& targetPath, const Hints& hints)
{
checkError();
Dirent tmpDirent(NS::C, targetPath);
auto existing_dirent_it = data->dirents.find(&tmpDirent);

if (existing_dirent_it == data->dirents.end()) {
std::ostringstream ss;
ss << "Impossible to alias C/" << targetPath << " as C/" << path << std::endl;
ss << "C/" << targetPath << " doesn't exist." << std::endl;
throw InvalidEntry(ss.str());
}

auto dirent = data->createAliasDirent(path, title, **existing_dirent_it);
data->handle(dirent, hints);
}

void Creator::finishZimCreation()
{
checkError();
Expand Down Expand Up @@ -598,6 +615,13 @@ namespace zim
return dirent;
}

Dirent* CreatorData::createAliasDirent(const std::string& path, const std::string& title, const Dirent& target)
{
auto dirent = pool.getAliasDirent(path, title, target);
addDirent(dirent);
return dirent;
}

Cluster* CreatorData::closeCluster(bool compressed)
{
Cluster *cluster;
Expand Down
1 change: 1 addition & 0 deletions src/writer/creatordata.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ namespace zim
Dirent* createDirent(NS ns, const std::string& path, const std::string& mimetype, const std::string& title);
Dirent* createItemDirent(const Item* item);
Dirent* createRedirectDirent(NS ns, const std::string& path, const std::string& title, NS targetNs, const std::string& targetPath);
Dirent* createAliasDirent(const std::string& path, const std::string& title, const Dirent& target);
Cluster* closeCluster(bool compressed);

void setEntryIndexes();
Expand Down
11 changes: 11 additions & 0 deletions src/writer/dirent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ Dirent::Dirent(NS ns, const std::string& path, const std::string& title, NS targ
frontArticle(false)
{}

Dirent::Dirent(const std::string& path, const std::string& title, const Dirent& target)
: pathTitle(path, title),
mimeType(target.mimeType),
idx(0),
info(target.info),
offset(0),
_ns(target._ns),
removed(false),
frontArticle(false)
{}

NS Dirent::getRedirectNs() const {
return info.getRedirect().ns;
}
Expand Down
24 changes: 16 additions & 8 deletions src/writer/direntPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ namespace zim
delete [] (reinterpret_cast<char*>(pool));
}

/* Return a *NOT constructed* pointer to a dirent */
Dirent* getDirentSlot() {
if (direntIndex == 0xFFFF) {
allocate_new_pool();
}
auto dirent = pools.back() + direntIndex++;
return dirent;
}

public:
DirentPool() :
Expand All @@ -65,22 +73,22 @@ namespace zim
}

Dirent* getClassicDirent(NS ns, const std::string& path, const std::string& title, uint16_t mimetype) {
if (direntIndex == 0xFFFF) {
allocate_new_pool();
}
auto dirent = pools.back() + direntIndex++;
auto dirent = getDirentSlot();
new (dirent) Dirent(ns, path, title, mimetype);
return dirent;
}

Dirent* getRedirectDirent(NS ns, const std::string& path, const std::string& title, NS targetNs, const std::string& targetPath) {
if (direntIndex == 0xFFFF) {
allocate_new_pool();
}
auto dirent = pools.back() + direntIndex++;
auto dirent = getDirentSlot();
new (dirent) Dirent(ns, path, title, targetNs, targetPath);
return dirent;
}

Dirent* getAliasDirent(const std::string& path, const std::string& title, const Dirent& target) {
auto dirent = getDirentSlot();
new (dirent) Dirent(path, title, target);
return dirent;
}
};
}
}
Expand Down
8 changes: 7 additions & 1 deletion src/writer/tinyString.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@ namespace zim
t.m_data = nullptr;
t.m_size = 0;
};
TinyString(const TinyString& t) = delete;
TinyString(const TinyString& t) :
m_data(new char[(uint16_t)t.m_size]),
m_size(t.m_size)
{
std::memcpy(m_data, t.m_data, m_size);
}

~TinyString() {
if (m_data) {
delete[] m_data;
Expand Down
29 changes: 20 additions & 9 deletions test/creator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,13 @@ TEST(ZimCreator, createZim)
// Be sure that title order is not the same that url order
item = std::make_shared<TestItem>("foo2", "AFoo", "Foo2Content");
creator.addItem(item);
creator.addAlias("foo_bis", "The same Foo", "foo2");
creator.addMetadata("Title", "This is a title");
creator.addIllustration(48, "PNGBinaryContent48");
creator.addIllustration(96, "PNGBinaryContent96");
creator.setMainPath("foo");
creator.addRedirection("foo3", "FooRedirection", "foo"); // No a front article.
creator.addRedirection("foo3", "FooRedirection", "foo"); // Not a front article.
creator.addAlias("foo_ter", "The same redirection", "foo3", {{ zim::writer::FRONT_ARTICLE, true}}); // a clone of the previous redirect, but as a front article.
creator.addRedirection("foo4", "FooRedirection", "NoExistant", {{zim::writer::FRONT_ARTICLE, true}}); // Invalid redirection, must be removed by creator
creator.finishZimCreation();

Expand All @@ -200,15 +202,15 @@ TEST(ZimCreator, createZim)
header.read(*reader);
ASSERT_TRUE(header.hasMainPage());
#if defined(ENABLE_XAPIAN)
entry_index_type nb_entry = 12; // counter + 2*illustration + xapiantitleIndex + xapianfulltextIndex + foo + foo2 + foo3 + Title + mainPage + titleListIndexes*2
entry_index_type nb_entry = 14; // counter + 2*illustration + xapiantitleIndex + xapianfulltextIndex + foo + foo2 + foo_bis + foo3 + foo_ter + Title + mainPage + titleListIndexes*2
int xapian_mimetype = 0;
int listing_mimetype = 1;
int png_mimetype = 2;
int html_mimetype = 3;
int plain_mimetype = 4;
int plainutf8_mimetype = 5;
#else
entry_index_type nb_entry = 10; // counter + 2*illustration + foo + foo2 + foo3 + Title + mainPage + titleListIndexes*2
entry_index_type nb_entry = 12; // counter + 2*illustration + foo + foo_bis + foo2 + foo3 + foo_ter + Title + mainPage + titleListIndexes*2
int listing_mimetype = 0;
int png_mimetype = 1;
int html_mimetype = 2;
Expand All @@ -235,6 +237,12 @@ TEST(ZimCreator, createZim)
dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
test_redirect_dirent(dirent, 'C', "foo3", "FooRedirection", entry_index_t(0));

dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
test_article_dirent(dirent, 'C', "foo_bis", "The same Foo", html_mimetype, cluster_index_t(0), foo2BlobIndex);

dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
test_redirect_dirent(dirent, 'C', "foo_ter", "The same redirection", entry_index_t(0));

dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
test_article_dirent(dirent, 'M', "Counter", None, plain_mimetype, cluster_index_t(0), None);
auto counterBlobIndex = dirent->getBlobNumber();
Expand Down Expand Up @@ -297,7 +305,7 @@ TEST(ZimCreator, createZim)
clusterOffset = offset_t(reader->read_uint<offset_type>(offset_t(clusterPtrPos + 8)));
cluster = Cluster::read(*reader, clusterOffset);
ASSERT_EQ(cluster->getCompression(), Cluster::Compression::None);
ASSERT_EQ(cluster->count(), blob_index_t(nb_entry-6)); // 6 entries are either compressed or redirections
ASSERT_EQ(cluster->count(), blob_index_t(nb_entry-8)); // 7 entries are either compressed or redirections + 1 entry is a clone of content

ASSERT_EQ(header.getTitleIdxPos(), (clusterOffset+cluster->getBlobOffset(v0BlobIndex)).v);

Expand All @@ -314,20 +322,23 @@ TEST(ZimCreator, createZim)
6, 0, 0, 0,
7, 0, 0, 0,
8, 0, 0, 0,
9, 0, 0, 0
9, 0, 0, 0,
10, 0, 0, 0,
11, 0, 0, 0
#if defined(ENABLE_XAPIAN)
,10, 0, 0, 0
,11, 0, 0, 0
,12, 0, 0, 0
,13, 0, 0, 0
#endif
};
ASSERT_EQ(blob0Data, expectedBlob0Data);

blob = cluster->getBlob(v1BlobIndex);
ASSERT_EQ(blob.size(), 2*sizeof(title_index_t));
ASSERT_EQ(blob.size(), 3*sizeof(title_index_t));
std::vector<char> blob1Data(blob.data(), blob.end());
std::vector<char> expectedBlob1Data = {
1, 0, 0, 0,
0, 0, 0, 0
0, 0, 0, 0,
4, 0, 0, 0 // "The same redirection" is the 5th entry "by title order"
};
ASSERT_EQ(blob1Data, expectedBlob1Data);

Expand Down
15 changes: 11 additions & 4 deletions test/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ TEST(Search, indexFullPath)

auto item = std::make_shared<TestItem>("testPath", "text/html", "Test Article", "This is a test article");
creator.addItem(item);
creator.addAlias("anotherPath", "Another Test Article", "testPath");

creator.setMainPath("testPath");
creator.addMetadata("Title", "Test zim");
Expand All @@ -76,9 +77,14 @@ TEST(Search, indexFullPath)
auto search = searcher.search(query);

ASSERT_NE(0, search.getEstimatedMatches());
auto result = search.getResults(0, archive.getEntryCount());
ASSERT_EQ(result.begin().getPath(), "testPath");
ASSERT_EQ(result.begin().getDbData().substr(0, 2), "C/");
auto results = search.getResults(0, archive.getEntryCount());

auto result = results.begin();
ASSERT_EQ(result.getPath(), "testPath");
ASSERT_EQ(result.getDbData().substr(0, 2), "C/");

result++;
ASSERT_EQ(result, results.end());
}

TEST(Search, fulltextSnippet)
Expand Down Expand Up @@ -118,6 +124,7 @@ TEST(Search, multiSearch)
creator.addItem(std::make_shared<TestItem>("path2", "text/html", "Test Article001", "This is a test article. Super. temp0"));
creator.addItem(std::make_shared<TestItem>("path3", "text/html", "Test Article2", "This is a test article. Super."));
creator.addItem(std::make_shared<TestItem>("path4", "text/html", "Test Article23", "This is a test article. bis."));
creator.addAlias("path5", "Test Article5", "path0"); // Should not be fulltext indexed

creator.setMainPath("path0");
creator.finishZimCreation();
Expand All @@ -133,7 +140,7 @@ TEST(Search, multiSearch)
zim::Query query("test article");
auto search0 = searcher.search(query);

ASSERT_EQ(archive.getEntryCount(), (unsigned int)search0.getEstimatedMatches());
ASSERT_EQ(5U, (unsigned int)search0.getEstimatedMatches());
auto result0 = search0.getResults(0, 2);
ASSERT_EQ(result0.size(), 2);
auto it0 = result0.begin();
Expand Down
Loading