Skip to content

Commit

Permalink
Merge pull request #802 from kiwix/include_tags_in_free_text_library_…
Browse files Browse the repository at this point in the history
…search

Included tags in free text catalog search
  • Loading branch information
mgautierfr committed Aug 10, 2022
2 parents 8e6d893 + cff143b commit d737db6
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 5 deletions.
7 changes: 6 additions & 1 deletion src/library.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,13 @@ void Library::updateBookDB(const Book& book)
indexer.index_text(normalizeText(book.getName()), 1, "XN");
indexer.index_text(normalizeText(book.getCategory()), 1, "XC");

for ( const auto& tag : split(normalizeText(book.getTags()), ";") )
for ( const auto& tag : split(normalizeText(book.getTags()), ";") ) {
doc.add_boolean_term("XT" + tag);
if ( tag[0] != '_' ) {
indexer.increase_termpos();
indexer.index_text(tag);
}
}

const std::string idterm = "Q" + book.getId();
doc.add_boolean_term(idterm);
Expand Down
4 changes: 2 additions & 2 deletions test/data/library.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
publisher="Kiwix"
date="2020-03-31"
name="wikipedia_en_ray_charles"
tags="unittest;wikipedia;_category:wikipedia;_pictures:no;_videos:no;_details:no;_ftindex:yes"
tags="public_tag_without_a_value;_private_tag_without_a_value;wikipedia;_category:wikipedia;_pictures:no;_videos:no;_details:no;_ftindex:yes"
articleCount="284"
mediaCount="2"
size="556"
Expand All @@ -28,7 +28,7 @@
publisher="Kiwix"
date="2020-03-31"
name="wikipedia_ru_ray_charles"
tags="unittest;wikipedia;_pictures:no;_videos:no;_details:no"
tags="public_tag_with_a_value:value_of_a_public_tag;_private_tag_with_a_value:value_of_a_private_tag;wikipedia;_pictures:no;_videos:no;_details:no"
articleCount="284"
mediaCount="2"
size="123"
Expand Down
3 changes: 3 additions & 0 deletions test/library.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,9 +550,11 @@ TEST_F(LibraryTest, filterByQuery)

// by default, filtering by query assumes partial query
EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki"),
"An example ZIM archive", // due to the "wikibooks" tag
"Encyclopédie de la Tunisie",
"Granblue Fantasy Wiki",
"Géographie par Wikipédia",
"Mathématiques", // due to the "wikipedia" tag
"Ray Charles",
"Wikiquote"
);
Expand Down Expand Up @@ -714,6 +716,7 @@ TEST_F(LibraryTest, filterByMultipleCriteria)
EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wikipedia"),
"Encyclopédie de la Tunisie",
"Géographie par Wikipédia",
"Mathématiques", // due to the "wikipedia" tag
"Ray Charles"
);

Expand Down
74 changes: 72 additions & 2 deletions test/library_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ std::string maskVariableOPDSFeedData(std::string s)
" <name>wikipedia_en_ray_charles</name>\n" \
" <flavour></flavour>\n" \
" <category>wikipedia</category>\n" \
" <tags>unittest;wikipedia;_category:wikipedia;_pictures:no;_videos:no;_details:no;_ftindex:yes</tags>\n" \
" <tags>public_tag_without_a_value;_private_tag_without_a_value;wikipedia;_category:wikipedia;_pictures:no;_videos:no;_details:no;_ftindex:yes</tags>\n" \
" <articleCount>284</articleCount>\n" \
" <mediaCount>2</mediaCount>\n" \
" <link rel=\"http://opds-spec.org/image/thumbnail\"\n" \
Expand All @@ -131,7 +131,7 @@ std::string maskVariableOPDSFeedData(std::string s)
" <name>wikipedia_ru_ray_charles</name>\n" \
" <flavour></flavour>\n" \
" <category></category>\n" \
" <tags>unittest;wikipedia;_pictures:no;_videos:no;_details:no</tags>\n" \
" <tags>public_tag_with_a_value:value_of_a_public_tag;_private_tag_with_a_value:value_of_a_private_tag;wikipedia;_pictures:no;_videos:no;_details:no</tags>\n" \
" <articleCount>284</articleCount>\n" \
" <mediaCount>2</mediaCount>\n" \
" <link type=\"text/html\" href=\"/ROOT/zimfile\" />\n" \
Expand Down Expand Up @@ -711,3 +711,73 @@ TEST_F(LibraryServerTest, catalog_v2_partial_entries)
"</feed>\n"
);
}

#define EXPECT_SEARCH_RESULTS(SEARCH_TERM, RESULT_COUNT, OPDS_ENTRIES) \
{ \
const auto r = zfs1_->GET("/ROOT/catalog/search?q=" SEARCH_TERM); \
EXPECT_EQ(r->status, 200); \
EXPECT_EQ(maskVariableOPDSFeedData(r->body), \
OPDS_FEED_TAG \
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n" \
" <title>Filtered zims (q=" SEARCH_TERM ")</title>\n" \
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n" \
" <totalResults>" #RESULT_COUNT "</totalResults>\n" \
" <startIndex>0</startIndex>\n" \
" <itemsPerPage>" #RESULT_COUNT "</itemsPerPage>\n" \
CATALOG_LINK_TAGS \
\
OPDS_ENTRIES \
\
"</feed>\n" \
); \
}

TEST_F(LibraryServerTest, catalog_search_includes_public_tags)
{
EXPECT_SEARCH_RESULTS("public_tag_without_a_value",
1,
RAY_CHARLES_CATALOG_ENTRY
);

EXPECT_SEARCH_RESULTS("public_tag_with_a_value",
1,
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
);

// prefix search works on tag names
EXPECT_SEARCH_RESULTS("public_tag",
2,
RAY_CHARLES_CATALOG_ENTRY
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
);

EXPECT_SEARCH_RESULTS("value_of_a_public_tag",
1,
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
);

// prefix search works on tag values
EXPECT_SEARCH_RESULTS("value_of",
1,
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
);
}

#define EXPECT_ZERO_RESULTS(SEARCH_TERM) EXPECT_SEARCH_RESULTS(SEARCH_TERM, 0, )

TEST_F(LibraryServerTest, catalog_search_on_tags_is_not_an_any_substring_match)
{
EXPECT_ZERO_RESULTS("tag_with")
EXPECT_ZERO_RESULTS("alue_of_a_public_tag")
}

TEST_F(LibraryServerTest, catalog_search_excludes_hidden_tags)
{
EXPECT_ZERO_RESULTS("_private_tag_without_a_value");
EXPECT_ZERO_RESULTS("private_tag_without_a_value");
EXPECT_ZERO_RESULTS("value_of_a_private_tag");

#undef EXPECT_ZERO_RESULTS
}

#undef EXPECT_SEARCH_RESULTS

0 comments on commit d737db6

Please sign in to comment.