Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Full URI-encoding of URLs returned by kiwix-serve #890

Merged
merged 11 commits into from
Feb 9, 2023
32 changes: 24 additions & 8 deletions src/server/internalServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,17 @@ inline std::string normalizeRootUrl(std::string rootUrl)
return rootUrl.empty() ? rootUrl : "/" + rootUrl;
}

std::string
fullURL2LocalURL(const std::string& fullUrl, const std::string& rootLocation)
{
assert(rootLocation.size() > 0 && rootLocation.back() == '/');
if ( kiwix::startsWith(fullUrl, rootLocation) ) {
return fullUrl.substr(rootLocation.size() - 1);
} else {
return "";
}
}

Filter get_search_filter(const RequestContext& request, const std::string& prefix="")
{
auto filter = kiwix::Filter().valid(true).local(true);
Expand Down Expand Up @@ -404,6 +415,7 @@ InternalServer::InternalServer(Library* library,
m_addr(addr),
m_port(port),
m_root(normalizeRootUrl(root)),
m_rootPrefixOfDecodedURL(m_root + "/"),
m_nbThreads(nbThreads),
m_multizimSearchLimit(multizimSearchLimit),
m_verbose(verbose),
Expand All @@ -418,7 +430,9 @@ InternalServer::InternalServer(Library* library,
searchCache(getEnvVar<int>("KIWIX_SEARCH_CACHE_SIZE", DEFAULT_CACHE_SIZE)),
suggestionSearcherCache(getEnvVar<int>("KIWIX_SUGGESTION_SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))),
m_customizedResources(new CustomizedResources)
{}
{
m_root = urlEncode(m_root);
}

InternalServer::~InternalServer() = default;

Expand Down Expand Up @@ -494,7 +508,7 @@ static MHD_Result staticHandlerCallback(void* cls,
}

MHD_Result InternalServer::handlerCallback(struct MHD_Connection* connection,
const char* url,
const char* fullUrl,
const char* method,
const char* version,
const char* upload_data,
Expand All @@ -505,8 +519,10 @@ MHD_Result InternalServer::handlerCallback(struct MHD_Connection* connection,
if (m_verbose.load() ) {
printf("======================\n");
printf("Requesting : \n");
printf("full_url : %s\n", url);
printf("full_url : %s\n", fullUrl);
}

const auto url = fullURL2LocalURL(fullUrl, m_rootPrefixOfDecodedURL);
RequestContext request(connection, m_root, url, method, version);

if (m_verbose.load() ) {
Expand All @@ -527,7 +543,7 @@ MHD_Result InternalServer::handlerCallback(struct MHD_Connection* connection,
printf("========== INTERNAL ERROR !! ============\n");
if (!m_verbose.load()) {
printf("Requesting : \n");
printf("full_url : %s\n", url);
printf("full_url : %s\n", fullUrl);
request.print_debug_info();
}
}
Expand Down Expand Up @@ -607,7 +623,7 @@ std::unique_ptr<Response> InternalServer::handle_request(const RequestContext& r
if (isEndpointUrl(url, "catch"))
return handle_catch(request);

std::string contentUrl = m_root + "/content" + url;
std::string contentUrl = m_root + "/content" + urlEncode(url);
const std::string query = request.get_query();
if ( ! query.empty() )
contentUrl += "?" + query;
Expand Down Expand Up @@ -1030,9 +1046,9 @@ ParameterizedMessage suggestSearchMsg(const std::string& searchURL, const std::s
std::unique_ptr<Response>
InternalServer::build_redirect(const std::string& bookName, const zim::Item& item) const
{
const auto path = kiwix::urlEncode(item.getPath());
const auto redirectUrl = m_root + "/content/" + bookName + "/" + path;
return Response::build_redirect(*this, redirectUrl);
const auto contentPath = "/content/" + bookName + "/" + item.getPath();
const auto url = m_root + kiwix::urlEncode(contentPath);
return Response::build_redirect(*this, url);
}

std::unique_ptr<Response> InternalServer::handle_content(const RequestContext& request)
Expand Down
3 changes: 2 additions & 1 deletion src/server/internalServer.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ class InternalServer {
private: // data
std::string m_addr;
int m_port;
std::string m_root;
std::string m_root; // URI-encoded
std::string m_rootPrefixOfDecodedURL; // URI-decoded
int m_nbThreads;
unsigned int m_multizimSearchLimit;
std::atomic_bool m_verbose;
Expand Down
26 changes: 4 additions & 22 deletions src/server/request_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,32 +49,15 @@ RequestMethod str2RequestMethod(const std::string& method) {
else return RequestMethod::OTHER;
}

std::string
fullURL2LocalURL(const std::string& full_url, const std::string& rootLocation)
{
if (rootLocation.empty()) {
// nothing special to handle.
return full_url;
} else if (full_url == rootLocation) {
return "/";
} else if (full_url.size() > rootLocation.size() &&
full_url.substr(0, rootLocation.size()+1) == rootLocation + "/") {
return full_url.substr(rootLocation.size());
} else {
return "";
}
}

} // unnamed namespace

RequestContext::RequestContext(struct MHD_Connection* connection,
std::string _rootLocation,
const std::string& _url,
const std::string& _rootLocation, // URI-encoded
const std::string& unrootedUrl, // URI-decoded
const std::string& _method,
const std::string& version) :
rootLocation(_rootLocation),
full_url(_url),
url(fullURL2LocalURL(_url, _rootLocation)),
url(unrootedUrl),
method(str2RequestMethod(_method)),
version(version),
requestIndex(s_requestIndex++),
Expand Down Expand Up @@ -153,7 +136,6 @@ void RequestContext::print_debug_info() const {
printf("\n");
}
printf("Parsed : \n");
printf("full_url: %s\n", full_url.c_str());
printf("url : %s\n", url.c_str());
printf("acceptEncodingGzip : %d\n", acceptEncodingGzip);
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
Expand Down Expand Up @@ -191,7 +173,7 @@ std::string RequestContext::get_url_part(int number) const {
}

std::string RequestContext::get_full_url() const {
return full_url;
return rootLocation + urlEncode(url);
}

std::string RequestContext::get_root_path() const {
Expand Down
5 changes: 2 additions & 3 deletions src/server/request_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ class IndexError: public std::runtime_error {};
class RequestContext {
public: // functions
RequestContext(struct MHD_Connection* connection,
std::string rootLocation,
const std::string& url,
const std::string& rootLocation, // URI-encoded
const std::string& unrootedUrl, // URI-decoded
const std::string& method,
const std::string& version);
~RequestContext();
Expand Down Expand Up @@ -138,7 +138,6 @@ class RequestContext {

private: // data
std::string rootLocation;
std::string full_url;
std::string url;
RequestMethod method;
std::string version;
Expand Down
4 changes: 2 additions & 2 deletions src/server/response.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ HTTP404Response::HTTP404Response(const InternalServer& server,

HTTPErrorResponse& HTTP404Response::operator+(UrlNotFoundMsg /*unused*/)
{
const std::string requestUrl = m_request.get_full_url();
const std::string requestUrl = urlDecode(m_request.get_full_url(), false);
return *this + ParameterizedMessage("url-not-found", {{"url", requestUrl}});
}

Expand Down Expand Up @@ -234,7 +234,7 @@ HTTP400Response::HTTP400Response(const InternalServer& server,

HTTPErrorResponse& HTTP400Response::operator+(InvalidUrlMsg /*unused*/)
{
std::string requestUrl = m_request.get_full_url();
std::string requestUrl = urlDecode(m_request.get_full_url(), false);
const auto query = m_request.get_query();
if (!query.empty()) {
requestUrl += "?" + encodeDiples(query);
Expand Down
File renamed without changes.
25 changes: 21 additions & 4 deletions test/data/create_corner_cases_zim_file
Original file line number Diff line number Diff line change
@@ -1,7 +1,24 @@
#!/usr/bin/env bash

cd "$(dirname "$0")"
rm -f corner_cases.zim

# The following symbols (that would be nice to include in testing) are not
# allowed under NTFS and/or FAT32 filesystems, and would result in the
# impossibility to git clone (or rather checkout) the libkiwix repository under
# Windows:
#
# ?
# =
# + (that's a pity, since the + symbol in a ZIM filename is replaced with the
# text 'plus' when the ZIM file is added to kiwix-serve's library and it
# would be nice to test that functionality)
#
# Assuming that tests are NOT run under Windows, above symbols can be included
# in testing if the file is renamed while copying to the build directory (see
# test/meson.build), though that would make maintenance slightly more confusing.
Comment on lines +16 to +18
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will compile libkiwix on windows and it would be a good thing to test it also.

One thing will could do is rename the file only on linux and don't rename it on windows.
(And adapt the code accordingly). But in this case, that would make maintenance more than slightly more confusing.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we do anything about it now/in this PR?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, we can leave it at it is.

zimfilename='corner_cases#&.zim'

rm -f "$zimfilename"
zimwriterfs --withoutFTIndex --dont-check-arguments \
-w empty.html \
-I empty.png \
Expand All @@ -11,6 +28,6 @@ zimwriterfs --withoutFTIndex --dont-check-arguments \
-c "" \
-p "" \
corner_cases \
corner_cases.zim \
&& echo 'corner_cases.zim was successfully created' \
|| echo '!!! Failed to create corner_cases.zim !!!' >&2
"$zimfilename" \
&& echo "$zimfilename was successfully created" \
|| echo '!!! Failed to create' "$zimfilename" '!!!' >&2
Loading