From 29aa1f63c7e0c5448d61188b013896c5d3ff4f74 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 3 Nov 2020 15:59:47 +0100 Subject: [PATCH 1/6] Implement `to_string` in tests for android. `std::to_string` is not available on android. We used to not compile tests on android as we are crosscompiling. But if gtest is available tests will be compiled anyway. --- test/tools.h | 9 +++++++++ test/zimfile.cpp | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/test/tools.h b/test/tools.h index b13506ad4..fb6f4f873 100644 --- a/test/tools.h +++ b/test/tools.h @@ -84,6 +84,15 @@ class TempFile std::string path() const { return path_; } }; +template +std::string to_string(const T& value) +{ + std::ostringstream ss; + ss << value; + return ss.str(); +} + + template zim::Buffer write_to_buffer(const T& object) { diff --git a/test/zimfile.cpp b/test/zimfile.cpp index 53203e468..f8e3dd256 100644 --- a/test/zimfile.cpp +++ b/test/zimfile.cpp @@ -85,8 +85,8 @@ TEST(ZimFile, openingAnInvalidZimFileFails) for ( int count = 0; count < 100; count += 10 ) { const TestContext ctx{ {"prefix", prefix.size() ? "yes" : "no" }, - {"byte", std::to_string(byte) }, - {"count", std::to_string(count) } + {"byte", zim::unittests::to_string(byte) }, + {"count", zim::unittests::to_string(count) } }; const std::string zimfileContent = prefix + std::string(count, byte); const auto tmpfile = makeTempFile("invalid_zim_file", zimfileContent); @@ -120,7 +120,7 @@ TEST(ZimFile, nastyEmptyZimFile) const std::string correctContent = emptyZimFileContent(); for ( int offset = 0; offset < 80; ++offset ) { if ( isNastyOffset(offset) ) { - const TestContext ctx{ {"offset", std::to_string(offset) } }; + const TestContext ctx{ {"offset", zim::unittests::to_string(offset) } }; std::string nastyContent(correctContent); nastyContent[offset] = '\xff'; const auto tmpfile = makeTempFile("wrong_checksum_empty_zim_file", nastyContent); From 88360d32202e8c5febcf748f1d7cad9d57988101 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 3 Nov 2020 16:00:02 +0100 Subject: [PATCH 2/6] Add missing include in debug.h --- src/debug.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/debug.h b/src/debug.h index a73dd161a..3c4f1db99 100644 --- a/src/debug.h +++ b/src/debug.h @@ -22,6 +22,7 @@ #include #include +#include #include #if defined (NDEBUG) From a23ea9c0a7fd3ccbe52ce9eeec4a6633b9b5e5dc Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 3 Nov 2020 16:02:05 +0100 Subject: [PATCH 3/6] Use c++11 std::mutex instead of pthread mutex. --- src/concurrent_cache.h | 9 +++-- src/fileimpl.cpp | 80 ++++++++++++++++++++--------------------- src/fileimpl.h | 5 ++- src/writer/cluster.cpp | 12 ++----- src/writer/cluster.h | 4 +-- src/writer/queue.h | 45 +++++++++-------------- src/writer/workers.cpp | 6 ++-- src/xapian/htmlparse.cc | 7 ++-- 8 files changed, 72 insertions(+), 96 deletions(-) diff --git a/src/concurrent_cache.h b/src/concurrent_cache.h index 980f9ad98..501e079ed 100644 --- a/src/concurrent_cache.h +++ b/src/concurrent_cache.h @@ -23,7 +23,7 @@ #include "lrucache.h" #include -#include +#include namespace zim { @@ -47,7 +47,6 @@ class ConcurrentCache public: // types explicit ConcurrentCache(size_t maxEntries) : impl_(maxEntries) - , lock_(PTHREAD_MUTEX_INITIALIZER) {} // Gets the entry corresponding to the given key. If the entry is not in the @@ -63,9 +62,9 @@ class ConcurrentCache Value getOrPut(const Key& key, F f) { std::promise valuePromise; - pthread_mutex_lock(&lock_); + std::unique_lock l(lock_); const auto x = impl_.getOrPut(key, valuePromise.get_future().share()); - pthread_mutex_unlock(&lock_); + l.unlock(); if ( x.miss() ) { valuePromise.set_value(f()); } @@ -75,7 +74,7 @@ class ConcurrentCache private: // data Impl impl_; - pthread_mutex_t lock_; + std::mutex lock_; }; } // namespace zim diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp index 52f145d3a..cb3bb3170 100644 --- a/src/fileimpl.cpp +++ b/src/fileimpl.cpp @@ -22,7 +22,6 @@ #include "_dirent.h" #include "file_compound.h" #include "buffer_reader.h" -#include #include #include #include @@ -72,10 +71,8 @@ sectionSubReader(const FileReader& zimReader, const std::string& sectionName, : zimFile(new FileCompound(fname)), zimReader(new FileReader(zimFile)), bufferDirentZone(256), - bufferDirentLock(PTHREAD_MUTEX_INITIALIZER), filename(fname), direntCache(envValue("ZIM_DIRENTCACHE", DIRENT_CACHE_SIZE)), - direntCacheLock(PTHREAD_MUTEX_INITIALIZER), clusterCache(envValue("ZIM_CLUSTERCACHE", CLUSTER_CACHE_SIZE)), cacheUncompressedCluster(envValue("ZIM_CACHEUNCOMPRESSEDCLUSTER", false)) { @@ -284,25 +281,25 @@ sectionSubReader(const FileReader& zimReader, const std::string& sectionName, if (idx >= getCountArticles()) throw ZimFileFormatError("article index out of range"); - pthread_mutex_lock(&direntCacheLock); - auto v = direntCache.get(idx.v); - if (v.hit()) { - log_debug("dirent " << idx << " found in cache; hits " - << direntCache.getHits() << " misses " - << direntCache.getMisses() << " ratio " - << direntCache.hitRatio() * 100 << "% fillfactor " + std::lock_guard l(direntCacheLock); + auto v = direntCache.get(idx.v); + if (v.hit()) + { + log_debug("dirent " << idx << " found in cache; hits " + << direntCache.getHits() << " misses " + << direntCache.getMisses() << " ratio " + << direntCache.hitRatio() * 100 << "% fillfactor " + << direntCache.fillfactor()); + return v.value(); + } + + log_debug("dirent " << idx << " not found in cache; hits " + << direntCache.getHits() << " misses " << direntCache.getMisses() + << " ratio " << direntCache.hitRatio() * 100 << "% fillfactor " << direntCache.fillfactor()); - pthread_mutex_unlock(&direntCacheLock); - return v.value(); } - log_debug("dirent " << idx << " not found in cache; hits " - << direntCache.getHits() << " misses " << direntCache.getMisses() - << " ratio " << direntCache.hitRatio() * 100 << "% fillfactor " - << direntCache.fillfactor()); - pthread_mutex_unlock(&direntCacheLock); - offset_t indexOffset = readOffset(*urlPtrOffsetReader, idx.v); // We don't know the size of the dirent because it depends of the size of // the title, url and extra parameters. @@ -311,35 +308,34 @@ sectionSubReader(const FileReader& zimReader, const std::string& sectionName, // Let's do try, catch and retry while chosing a smart value for the buffer size. // Most dirent will be "Article" entry (header's size == 16) without extra parameters. // Let's hope that url + title size will be < 256 and if not try again with a bigger size. - - pthread_mutex_lock(&bufferDirentLock); - zsize_t bufferSize = zsize_t(256); - // On very small file, the offset + 256 is higher than the size of the file, - // even if the file is valid. - // So read only to the end of the file. - auto totalSize = zimReader->size(); - if (indexOffset.v + 256 > totalSize.v) bufferSize = zsize_t(totalSize.v-indexOffset.v); std::shared_ptr dirent; - while (true) { - bufferDirentZone.reserve(size_type(bufferSize)); - zimReader->read(bufferDirentZone.data(), indexOffset, bufferSize); - auto direntBuffer = Buffer::makeBuffer(bufferDirentZone.data(), bufferSize); - try { - dirent = std::make_shared(direntBuffer); - } catch (InvalidSize&) { - // buffer size is not enougth, try again : - bufferSize += 256; - continue; - } - // Success ! - break; + { + std::lock_guard l(bufferDirentLock); + zsize_t bufferSize = zsize_t(256); + // On very small file, the offset + 256 is higher than the size of the file, + // even if the file is valid. + // So read only to the end of the file. + auto totalSize = zimReader->size(); + if (indexOffset.v + 256 > totalSize.v) bufferSize = zsize_t(totalSize.v-indexOffset.v); + while (true) { + bufferDirentZone.reserve(size_type(bufferSize)); + zimReader->read(bufferDirentZone.data(), indexOffset, bufferSize); + auto direntBuffer = Buffer::makeBuffer(bufferDirentZone.data(), bufferSize); + try { + dirent = std::make_shared(direntBuffer); + } catch (InvalidSize&) { + // buffer size is not enougth, try again : + bufferSize += 256; + continue; + } + // Success ! + break; + } } - pthread_mutex_unlock(&bufferDirentLock); log_debug("dirent read from " << indexOffset); - pthread_mutex_lock(&direntCacheLock); + std::lock_guard l(direntCacheLock); direntCache.put(idx.v, dirent); - pthread_mutex_unlock(&direntCacheLock); return dirent; } diff --git a/src/fileimpl.h b/src/fileimpl.h index 5dbfd0bb4..787921e8d 100644 --- a/src/fileimpl.h +++ b/src/fileimpl.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -45,7 +44,7 @@ namespace zim std::shared_ptr zimFile; std::shared_ptr zimReader; std::vector bufferDirentZone; - pthread_mutex_t bufferDirentLock; + std::mutex bufferDirentLock; Fileheader header; std::string filename; @@ -54,7 +53,7 @@ namespace zim std::unique_ptr clusterOffsetReader; lru_cache> direntCache; - pthread_mutex_t direntCacheLock; + std::mutex direntCacheLock; typedef std::shared_ptr ClusterHandle; ConcurrentCache clusterCache; diff --git a/src/writer/cluster.cpp b/src/writer/cluster.cpp index 4b983a4c3..2158fd8c9 100644 --- a/src/writer/cluster.cpp +++ b/src/writer/cluster.cpp @@ -45,11 +45,9 @@ Cluster::Cluster(CompressionType compression) _size(0) { blobOffsets.push_back(offset_t(0)); - pthread_mutex_init(&m_closedMutex,NULL); } Cluster::~Cluster() { - pthread_mutex_destroy(&m_closedMutex); if (compressed_data.data()) { delete[] compressed_data.data(); } @@ -80,17 +78,13 @@ void Cluster::close() { compress(); clear_raw_data(); } - pthread_mutex_lock(&m_closedMutex); + std::lock_guard l(m_closedMutex); closed = true; - pthread_mutex_unlock(&m_closedMutex); } bool Cluster::isClosed() const{ - bool v; - pthread_mutex_lock(&m_closedMutex); - v = closed; - pthread_mutex_unlock(&m_closedMutex); - return v; + std::lock_guard l(m_closedMutex); + return closed; } zsize_t Cluster::size() const diff --git a/src/writer/cluster.h b/src/writer/cluster.h index 9808e3093..1868664f2 100644 --- a/src/writer/cluster.h +++ b/src/writer/cluster.h @@ -24,8 +24,8 @@ #include #include #include -#include #include +#include #include #include "../zim_types.h" @@ -88,7 +88,7 @@ class Cluster { ClusterData _data; mutable Blob compressed_data; std::string tmp_filename; - mutable pthread_mutex_t m_closedMutex; + mutable std::mutex m_closedMutex; bool closed = false; private: diff --git a/src/writer/queue.h b/src/writer/queue.h index c191bbf0c..048ce7dbb 100644 --- a/src/writer/queue.h +++ b/src/writer/queue.h @@ -22,15 +22,15 @@ #define MAX_QUEUE_SIZE 10 -#include +#include #include #include "../tools.h" template class Queue { public: - Queue() {pthread_mutex_init(&m_queueMutex,NULL);}; - virtual ~Queue() {pthread_mutex_destroy(&m_queueMutex);}; + Queue() = default; + virtual ~Queue() = default; virtual bool isEmpty(); virtual size_t size(); virtual void pushToQueue(const T& element); @@ -39,7 +39,7 @@ class Queue { protected: std::queue m_realQueue; - pthread_mutex_t m_queueMutex; + std::mutex m_queueMutex; private: // Make this queue non copyable @@ -49,18 +49,14 @@ class Queue { template bool Queue::isEmpty() { - pthread_mutex_lock(&m_queueMutex); - bool retVal = m_realQueue.empty(); - pthread_mutex_unlock(&m_queueMutex); - return retVal; + std::lock_guard l(m_queueMutex); + return m_realQueue.empty(); } template size_t Queue::size() { - pthread_mutex_lock(&m_queueMutex); - size_t retVal = m_realQueue.size(); - pthread_mutex_unlock(&m_queueMutex); - return retVal; + std::lock_guard l(m_queueMutex); + return m_realQueue.size(); } template @@ -70,40 +66,33 @@ void Queue::pushToQueue(const T &element) { do { zim::microsleep(wait); - pthread_mutex_lock(&m_queueMutex); - queueSize = m_realQueue.size(); - pthread_mutex_unlock(&m_queueMutex); + queueSize = size(); wait += 10; } while (queueSize > MAX_QUEUE_SIZE); - pthread_mutex_lock(&m_queueMutex); + std::lock_guard l(m_queueMutex); m_realQueue.push(element); - pthread_mutex_unlock(&m_queueMutex); } template bool Queue::getHead(T &element) { - pthread_mutex_lock(&m_queueMutex); - if (m_realQueue.empty()) { - pthread_mutex_unlock(&m_queueMutex); - return false; - } - element = m_realQueue.front(); - pthread_mutex_unlock(&m_queueMutex); - return true; + std::lock_guard l(m_queueMutex); + if (m_realQueue.empty()) { + return false; + } + element = m_realQueue.front(); + return true; } template bool Queue::popFromQueue(T &element) { - pthread_mutex_lock(&m_queueMutex); + std::lock_guard l(m_queueMutex); if (m_realQueue.empty()) { - pthread_mutex_unlock(&m_queueMutex); return false; } element = m_realQueue.front(); m_realQueue.pop(); - pthread_mutex_unlock(&m_queueMutex); return true; } diff --git a/src/writer/workers.cpp b/src/writer/workers.cpp index c475f46ff..629629ff5 100644 --- a/src/writer/workers.cpp +++ b/src/writer/workers.cpp @@ -41,11 +41,12 @@ #include #include #include +#include #include "log.h" #include "../fs.h" #include "../tools.h" -static pthread_mutex_t s_dbaccessLock = PTHREAD_MUTEX_INITIALIZER; +static std::mutex s_dbaccessLock; std::atomic zim::writer::ClusterTask::waiting_task(0); std::atomic zim::writer::IndexTask::waiting_task(0); @@ -140,9 +141,8 @@ namespace zim indexer.index_text_without_positions(content); } - pthread_mutex_lock(&s_dbaccessLock); + std::lock_guard l(s_dbaccessLock); data->indexer->writableDatabase.add_document(document); - pthread_mutex_unlock(&s_dbaccessLock); } #endif diff --git a/src/xapian/htmlparse.cc b/src/xapian/htmlparse.cc index 0f3316da8..447023fc5 100644 --- a/src/xapian/htmlparse.cc +++ b/src/xapian/htmlparse.cc @@ -29,12 +29,12 @@ // #include "utf8convert.h" #include +#include #include #include #include #include -#include using namespace std; @@ -47,7 +47,7 @@ lowercase_string(string &str) } map zim::HtmlParser::named_ents; -static pthread_mutex_t sInitLock = PTHREAD_MUTEX_INITIALIZER; +static std::mutex sInitLock; inline static bool p_notdigit(char c) @@ -107,7 +107,7 @@ zim::HtmlParser::HtmlParser() #include "namedentities.h" { NULL, 0 } }; - pthread_mutex_lock(&sInitLock); + std::lock_guard l(sInitLock); if (named_ents.empty()) { const struct ent *i = ents; while (i->n) { @@ -115,7 +115,6 @@ zim::HtmlParser::HtmlParser() ++i; } } - pthread_mutex_unlock(&sInitLock); } void From 678d1207838f75d4b5d019bf90c9cbc3c54ac48b Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 3 Nov 2020 17:18:26 +0100 Subject: [PATCH 4/6] Use std::thread to manage threads instead of pthread. --- src/file_reader.cpp | 1 - src/writer/creator.cpp | 11 +++++------ src/writer/creatordata.h | 5 +++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/file_reader.cpp b/src/file_reader.cpp index fbc41951e..80d68cbb3 100644 --- a/src/file_reader.cpp +++ b/src/file_reader.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/src/writer/creator.cpp b/src/writer/creator.cpp index 237ad5dfb..43a582238 100644 --- a/src/writer/creator.cpp +++ b/src/writer/creator.cpp @@ -88,12 +88,11 @@ namespace zim for(unsigned i=0; idata.get()); - data->workerThreads.push_back(thread); + std::thread thread(taskRunner, this->data.get()); + data->workerThreads.push_back(std::move(thread)); } - pthread_create(&data->writerThread, NULL, clusterWriter, this->data.get()); + data->writerThread = std::thread(clusterWriter, this->data.get()); } void Creator::addArticle(std::shared_ptr
article) @@ -209,12 +208,12 @@ namespace zim data->taskList.pushToQueue(nullptr); } for(auto& thread: data->workerThreads) { - pthread_join(thread, nullptr); + thread.join(); } // Wait for writerThread to finish. data->clusterToWrite.pushToQueue(nullptr); - pthread_join(data->writerThread, nullptr); + data->writerThread.join(); TINFO("ResolveRedirectIndexes"); data->resolveRedirectIndexes(); diff --git a/src/writer/creatordata.h b/src/writer/creatordata.h index c366149b9..f37b9b406 100644 --- a/src/writer/creatordata.h +++ b/src/writer/creatordata.h @@ -29,6 +29,7 @@ #include #include #include +#include #include "config.h" #include "direntPool.h" @@ -67,7 +68,7 @@ namespace zim typedef std::vector ClusterList; typedef Queue ClusterQueue; typedef Queue TaskQueue; - typedef std::vector ThreadList; + typedef std::vector ThreadList; CreatorData(const std::string& fname, bool verbose, bool withIndex, std::string language, @@ -103,7 +104,7 @@ namespace zim ClusterQueue clusterToWrite; TaskQueue taskList; ThreadList workerThreads; - pthread_t writerThread; + std::thread writerThread; const CompressionType compression; std::string basename; bool isEmpty = true; From e4c92b3c86f483fe9fff51620669e0e6d003666a Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 3 Nov 2020 17:18:49 +0100 Subject: [PATCH 5/6] Remove use of pthread dependency when this is not mandatory. --- meson.build | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/meson.build b/meson.build index 3f45ec42c..c60700f64 100644 --- a/meson.build +++ b/meson.build @@ -42,16 +42,21 @@ conf.set('ENABLE_XAPIAN', xapian_dep.found()) pkg_requires = ['liblzma', 'libzstd'] if build_machine.system() == 'windows' - thread_dep = dependency('libpthreadVC2') - pkg_requires += ['libpthreadVC2'] extra_link_args = ['-lRpcrt4', '-lWs2_32', '-lwinmm', '-licuuc', '-licuin'] extra_cpp_args = ['-DSORTPP_PASS'] else - thread_dep = dependency('threads') extra_link_args = [] extra_cpp_args = [] endif +compiler = meson.get_compiler('cpp') +if compiler.get_id() == 'gcc' and build_machine.system() == 'linux' + # C++ std::thread is implemented using pthread on linux by gcc + thread_dep = dependency('threads') +else + thread_dep = dependency('', required:false) +endif + if xapian_dep.found() pkg_requires += ['xapian-core'] icu_dep = dependency('icu-i18n', static:static_linkage) From 7a64761d6da61fc121c367dd5ee0c7e742b5de40 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 18 Nov 2020 14:28:19 +0100 Subject: [PATCH 6/6] Use std::atomic instead of a mutex to protect the `closed` variable. --- src/writer/cluster.cpp | 2 -- src/writer/cluster.h | 5 ++--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/writer/cluster.cpp b/src/writer/cluster.cpp index 2158fd8c9..1cbf16a36 100644 --- a/src/writer/cluster.cpp +++ b/src/writer/cluster.cpp @@ -78,12 +78,10 @@ void Cluster::close() { compress(); clear_raw_data(); } - std::lock_guard l(m_closedMutex); closed = true; } bool Cluster::isClosed() const{ - std::lock_guard l(m_closedMutex); return closed; } diff --git a/src/writer/cluster.h b/src/writer/cluster.h index 1868664f2..31e779567 100644 --- a/src/writer/cluster.h +++ b/src/writer/cluster.h @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include "../zim_types.h" @@ -88,8 +88,7 @@ class Cluster { ClusterData _data; mutable Blob compressed_data; std::string tmp_filename; - mutable std::mutex m_closedMutex; - bool closed = false; + std::atomic closed { false }; private: void write_content(writer_t writer) const;