diff --git a/loadgen/CMakeLists.txt b/loadgen/CMakeLists.txt index 624dd8b19c..a48841b1f8 100644 --- a/loadgen/CMakeLists.txt +++ b/loadgen/CMakeLists.txt @@ -9,7 +9,7 @@ message("mlperf_loadgen v${mlperf_loadgen_VERSION_MAJOR}.${mlperf_loadgen_VERSIO # Set build options. NB: CXX_STANDARD is supported since CMake 3.1. if (NOT MSVC) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -W -Wall") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -W -Wall") endif() message(STATUS "Using C++ compiler flags: ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_STANDARD "14") diff --git a/loadgen/benchmark/.gitignore b/loadgen/benchmark/.gitignore new file mode 100644 index 0000000000..e792c8e55f --- /dev/null +++ b/loadgen/benchmark/.gitignore @@ -0,0 +1,2 @@ +loadgen_build +build \ No newline at end of file diff --git a/loadgen/benchmark/README.md b/loadgen/benchmark/README.md new file mode 100644 index 0000000000..f4e1f2824b --- /dev/null +++ b/loadgen/benchmark/README.md @@ -0,0 +1,9 @@ +Note: please install jemalloc first. See: http://jemalloc.net/ +Command: bash run.sh <0=Basic,1=Queue> + +Experiments: +- On Intel(R) Xeon(R) CPU E5-1650 v4 @ 3.60GHz +- Basic SUT : 500-600k i/s +- Basic SUT + jemalloc: 800-900k i/s (`bash run.sh 800000 0`) +- Queued SUT (2 complete threads) + jemalloc: 1.2-1.3M i/s (`bash run.sh 1200000 1 2 2048`) +- Queued SUT (2 complete threads) + jemalloc + server_coalesce_queries: 1.4-1.5M is/ (`bash run.sh 1400000 1 2 512 1`) diff --git a/loadgen/benchmark/repro.cpp b/loadgen/benchmark/repro.cpp new file mode 100644 index 0000000000..d5bc93bb2b --- /dev/null +++ b/loadgen/benchmark/repro.cpp @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "loadgen.h" +#include "query_sample_library.h" +#include "system_under_test.h" +#include "test_settings.h" + +class QSL : public mlperf::QuerySampleLibrary { + public: + ~QSL() override{}; + const std::string& Name() const override { return mName; } + size_t TotalSampleCount() override { return 1000000; } + size_t PerformanceSampleCount() override { return TotalSampleCount(); } + void LoadSamplesToRam( + const std::vector& samples) override {} + void UnloadSamplesFromRam( + const std::vector& samples) override {} + + private: + std::string mName{"Dummy QSL"}; +}; + +class BasicSUT : public mlperf::SystemUnderTest { + public: + BasicSUT() { + // Start with some large value so that we don't reallocate memory. + initResponse(10000); + } + ~BasicSUT() override {} + const std::string& Name() const override { return mName; } + void IssueQuery(const std::vector& samples) override { + int n = samples.size(); + if (n > mResponses.size()) { + std::cout << "Warning: reallocating response buffer in BasicSUT. Maybe " + "you should initResponse with larger value!?" + << std::endl; + initResponse(samples.size()); + } + for (int i = 0; i < n; i++) { + mResponses[i].id = samples[i].id; + } + mlperf::QuerySamplesComplete(mResponses.data(), n); + } + void FlushQueries() override {} + void ReportLatencyResults( + const std::vector& latencies_ns) override{}; + + private: + void initResponse(int size) { + mResponses.resize(size, + {0, reinterpret_cast(&mBuf), sizeof(int)}); + } + int mBuf{0}; + std::string mName{"BasicSUT"}; + std::vector mResponses; +}; + +class QueueSUT : public mlperf::SystemUnderTest { + public: + QueueSUT(int numCompleteThreads, int maxSize) { + // Each thread handle at most maxSize at a time. + std::cout << "QueueSUT: maxSize = " << maxSize << std::endl; + initResponse(numCompleteThreads, maxSize); + // Launch complete threads + for (int i = 0; i < numCompleteThreads; i++) { + mThreads.emplace_back(&QueueSUT::CompleteThread, this, i); + } + } + ~QueueSUT() override { + { + std::unique_lock lck(mMtx); + mDone = true; + mCondVar.notify_all(); + } + for (auto& thread : mThreads) { + thread.join(); + } + } + const std::string& Name() const override { return mName; } + void IssueQuery(const std::vector& samples) override { + std::unique_lock lck(mMtx); + for (const auto& sample : samples) { + mIdQueue.push_back(sample.id); + } + // Let some worker thread to consume tasks + mCondVar.notify_one(); + } + void FlushQueries() override {} + void ReportLatencyResults( + const std::vector& latencies_ns) override{}; + + private: + void CompleteThread(int threadIdx) { + auto& responses = mResponses[threadIdx]; + size_t maxSize{responses.size()}; + size_t actualSize{0}; + while (true) { + { + std::unique_lock lck(mMtx); + mCondVar.wait(lck, [&]() { return !mIdQueue.empty() || mDone; }); + + if (mDone) { + break; + } + + actualSize = std::min(maxSize, mIdQueue.size()); + for (int i = 0; i < actualSize; i++) { + responses[i].id = mIdQueue.front(); + mIdQueue.pop_front(); + } + mCondVar.notify_one(); + } + mlperf::QuerySamplesComplete(responses.data(), actualSize); + } + } + void initResponse(int numCompleteThreads, int size) { + mResponses.resize(numCompleteThreads); + for (auto& responses : mResponses) { + responses.resize(size, + {0, reinterpret_cast(&mBuf), sizeof(int)}); + } + } + int mBuf{0}; + std::string mName{"QueueSUT"}; + std::vector> mResponses; + std::vector mThreads; + std::deque mIdQueue; + std::mutex mMtx; + std::condition_variable mCondVar; + bool mDone{false}; +}; + +int main(int argc, char** argv) { + assert(argc >= 2 && "Need to pass in at least one argument: target_qps"); + int target_qps = std::stoi(argv[1]); + std::cout << "target_qps = " << target_qps << std::endl; + + bool useQueue{false}; + int numCompleteThreads{4}; + int maxSize{1}; + bool server_coalesce_queries{false}; + if (argc >= 3) { + useQueue = std::stoi(argv[2]) != 0; + } + if (argc >= 4) { + numCompleteThreads = std::stoi(argv[3]); + } + if (argc >= 5) { + maxSize = std::stoi(argv[4]); + } + if (argc >= 6) { + server_coalesce_queries = std::stoi(argv[5]) != 0; + } + + QSL qsl; + std::unique_ptr sut; + + // Configure the test settings + mlperf::TestSettings testSettings; + testSettings.scenario = mlperf::TestScenario::Server; + testSettings.mode = mlperf::TestMode::PerformanceOnly; + testSettings.server_target_qps = target_qps; + testSettings.server_target_latency_ns = 10000000; // 10ms + testSettings.server_target_latency_percentile = 0.99; + testSettings.min_duration_ms = 60000; + testSettings.min_query_count = 270000; + testSettings.server_coalesce_queries = server_coalesce_queries; + std::cout << "testSettings.server_coalesce_queries = " + << (server_coalesce_queries ? "True" : "False") << std::endl; + + // Configure the logging settings + mlperf::LogSettings logSettings; + logSettings.log_output.outdir = "build"; + logSettings.log_output.prefix = "mlperf_log_"; + logSettings.log_output.suffix = ""; + logSettings.log_output.prefix_with_datetime = false; + logSettings.log_output.copy_detail_to_stdout = false; + logSettings.log_output.copy_summary_to_stdout = true; + logSettings.log_mode = mlperf::LoggingMode::AsyncPoll; + logSettings.log_mode_async_poll_interval_ms = 1000; + logSettings.enable_trace = false; + + // Choose SUT + if (useQueue) { + std::cout << "Using QueueSUT with " << numCompleteThreads + << " complete threads" << std::endl; + sut.reset(new QueueSUT(numCompleteThreads, maxSize)); + } else { + std::cout << "Using BasicSUT" << std::endl; + sut.reset(new BasicSUT()); + } + + // Start test + std::cout << "Start test..." << std::endl; + mlperf::StartTest(sut.get(), &qsl, testSettings, logSettings); + std::cout << "Test done. Clean up SUT..." << std::endl; + sut.reset(); + std::cout << "Done!" << std::endl; + return 0; +} diff --git a/loadgen/benchmark/run.sh b/loadgen/benchmark/run.sh new file mode 100644 index 0000000000..08a34529e3 --- /dev/null +++ b/loadgen/benchmark/run.sh @@ -0,0 +1,21 @@ +#!/usr/bin/bash +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +echo "Building loadgen..." +if [ ! -e loadgen_build ]; then mkdir loadgen_build; fi; +cd loadgen_build && cmake ../.. && make -j && cd .. +echo "Building test program..." +if [ ! -e build ]; then mkdir build; fi; +g++ --std=c++11 -O3 -I.. -o build/repro.exe repro.cpp -Lloadgen_build -lmlperf_loadgen -lpthread && \ +LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2 build/repro.exe $1 $2 $3 $4 $5 diff --git a/loadgen/benchmark/run_debug.sh b/loadgen/benchmark/run_debug.sh new file mode 100644 index 0000000000..341850eee0 --- /dev/null +++ b/loadgen/benchmark/run_debug.sh @@ -0,0 +1,21 @@ +#!/usr/bin/bash +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +echo "Building loadgen in Debug mode..." +if [ ! -e loadgen_build ]; then mkdir loadgen_build; fi; +cd loadgen_build && cmake -DCMAKE_BUILD_TYPE=Debug ../.. && make -j && cd .. +echo "Building test program in Debug mode..." +if [ ! -e build ]; then mkdir build; fi; +g++ --std=c++11 -O0 -g -I.. -o build/repro.exe repro.cpp -Lloadgen_build -lmlperf_loadgen -lpthread && \ +gdb --args build/repro.exe $1 $2 $3 $4 $5 diff --git a/loadgen/loadgen.cc b/loadgen/loadgen.cc index a334523b08..fee8a42d1f 100644 --- a/loadgen/loadgen.cc +++ b/loadgen/loadgen.cc @@ -151,6 +151,29 @@ class QueryMetadata { return all_samples_done_time; } + // When server_coalesce_queries is set to true in Server scenario, we + // sometimes coalesce multiple queries into one query. This is done by moving + // the other query's sample into current query, while maintaining their + // original scheduled_time. + void CoalesceQueries(QueryMetadata* queries, size_t first, size_t last) { + // Copy sample data over to current query, boldly assuming that each query + // only has one sample. + auto prev_scheduled_time = scheduled_time; + query_to_send.reserve(last - first + + 2); // Extra one for the current query. + for (size_t i = first; i <= last; ++i) { + auto& q = queries[i]; + auto& s = q.samples_[0]; + query_to_send.push_back( + {reinterpret_cast(&s), s.sample_index}); + q.scheduled_time = prev_scheduled_time + q.scheduled_delta; + q.issued_start_time = issued_start_time; + prev_scheduled_time = q.scheduled_time; + } + } + + void Decoalesce() { query_to_send.resize(1); } + public: std::vector query_to_send; const std::chrono::nanoseconds scheduled_delta; @@ -280,7 +303,8 @@ auto SampleDistribution(size_t sample_count, size_t stride, std::mt19937* rng) { indices.push_back(i); } std::shuffle(indices.begin(), indices.end(), *rng); - return [indices = std::move(indices), i = size_t(0)](auto& /*gen*/) mutable { + return + [ indices = std::move(indices), i = size_t(0) ](auto& /*gen*/) mutable { return indices.at(i++); }; } @@ -291,7 +315,9 @@ auto SampleDistribution(size_t sample_count, size_t /*stride*/, std::mt19937* /*rng*/) { return [dist = std::uniform_int_distribution<>(0, sample_count - 1)]( - auto& gen) mutable { return dist(gen); }; + auto& gen) mutable { + return dist(gen); + }; } /// \brief Generates queries for the requested settings, templated by @@ -429,8 +455,10 @@ std::vector GenerateQueries( } } - LogDetail([count = queries.size(), spq = settings.samples_per_query, - duration = timestamp.count()](AsyncDetail& detail) { + LogDetail([ + count = queries.size(), spq = settings.samples_per_query, + duration = timestamp.count() + ](AsyncDetail & detail) { detail("GeneratedQueries: ", "queries", count, "samples per query", spq, "duration", duration); }); @@ -561,16 +589,18 @@ struct QueryScheduler { const PerfClock::time_point start) : start(start) {} - // TODO: Coalesce all queries whose scheduled timestamps have passed. PerfClock::time_point Wait(QueryMetadata* next_query) { auto tracer = MakeScopedTracer([](AsyncTrace& trace) { trace("Scheduling"); }); auto scheduled_time = start + next_query->scheduled_delta; next_query->scheduled_time = scheduled_time; - std::this_thread::sleep_until(scheduled_time); auto now = PerfClock::now(); + if (now < scheduled_time) { + std::this_thread::sleep_until(scheduled_time); + now = PerfClock::now(); + } next_query->issued_start_time = now; return now; } @@ -636,6 +666,7 @@ PerformanceResult IssueQueries(SystemUnderTest* sut, max_latencies_to_record); size_t queries_issued = 0; + size_t queries_count = queries.size(); auto start_for_power = std::chrono::system_clock::now(); const PerfClock::time_point start = PerfClock::now(); @@ -647,11 +678,34 @@ PerformanceResult IssueQueries(SystemUnderTest* sut, // the actual issue time. bool ran_out_of_generated_queries = scenario != TestScenario::Server; size_t expected_latencies = 0; - for (auto& query : queries) { + while (queries_issued < queries_count) { + auto& query = queries[queries_issued]; auto tracer1 = MakeScopedTracer([](AsyncTrace& trace) { trace("SampleLoop"); }); last_now = query_scheduler.Wait(&query); + // If in Server scenario and server_coalesce_queries is enabled, multiple + // queries are coalesed into one big query if the current time has already + // passed the scheduled time of multiple queries. + if (scenario == TestScenario::Server && + settings.requested.server_coalesce_queries) { + auto current_query_idx = queries_issued; + auto scheduled_time = query.scheduled_time; + while (queries_issued < queries_count - 1) { + auto next_scheduled_time = + scheduled_time + queries[queries_issued + 1].scheduled_delta; + if (last_now < next_scheduled_time) { + break; + } + scheduled_time = next_scheduled_time; + queries_issued++; + } + if (queries_issued > current_query_idx) { + query.CoalesceQueries(queries.data(), current_query_idx + 1, + queries_issued); + } + } + // Issue the query to the SUT. { auto tracer3 = @@ -662,6 +716,12 @@ PerformanceResult IssueQueries(SystemUnderTest* sut, expected_latencies += query.query_to_send.size(); queries_issued++; + if (scenario == TestScenario::Server && + settings.requested.server_coalesce_queries) { + // Set the query back to its clean state. + query.Decoalesce(); + } + if (mode == TestMode::AccuracyOnly) { // TODO: Rate limit in accuracy mode so accuracy mode works even // if the expected/target performance is way off. @@ -830,9 +890,9 @@ struct PerformanceSummary { #if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) // MSVC complains if there is no explicit constructor. // (target_latency_percentile above depends on construction with settings) - PerformanceSummary( - const std::string& sut_name_arg, const TestSettingsInternal& settings_arg, - const PerformanceResult& pr_arg) + PerformanceSummary(const std::string& sut_name_arg, + const TestSettingsInternal& settings_arg, + const PerformanceResult& pr_arg) : sut_name(sut_name_arg), settings(settings_arg), pr(pr_arg){}; #endif void ProcessLatencies(); @@ -1268,14 +1328,15 @@ std::pair FindBoundaries( TestSettingsInternal u_settings = l_perf_summary.settings; find_peak_performance::WidenPerformanceField(&u_settings); - LogDetail( - [l_field = find_peak_performance::ToStringPerformanceField( - l_perf_summary.settings), - u_field = find_peak_performance::ToStringPerformanceField( - u_settings)](AsyncDetail& detail) { - detail("FindBoundaries: Checking fields [" + l_field + ", " + u_field + - ")"); - }); + LogDetail([ + l_field = find_peak_performance::ToStringPerformanceField( + l_perf_summary.settings), + u_field = + find_peak_performance::ToStringPerformanceField(u_settings) + ](AsyncDetail & detail) { + detail("FindBoundaries: Checking fields [" + l_field + ", " + u_field + + ")"); + }); std::vector loadable_sets( loadgen::GenerateLoadableSets(qsl, u_settings)); @@ -1313,15 +1374,14 @@ PerformanceSummary FindPeakPerformanceBinarySearch( find_peak_performance::MidOfBoundaries(l_perf_summary.settings, u_perf_summary.settings); - LogDetail([l_field = - find_peak_performance::ToStringPerformanceField( - l_perf_summary.settings), - u_field = - find_peak_performance::ToStringPerformanceField( - u_perf_summary.settings), - m_field = - find_peak_performance::ToStringPerformanceField( - m_settings)](AsyncDetail& detail) { + LogDetail([ + l_field = find_peak_performance::ToStringPerformanceField( + l_perf_summary.settings), + u_field = find_peak_performance::ToStringPerformanceField( + u_perf_summary.settings), + m_field = + find_peak_performance::ToStringPerformanceField(m_settings) + ](AsyncDetail & detail) { detail( "FindPeakPerformanceBinarySearch: Testing the mid value of bounds [" + l_field + ", " + u_field + "): " + m_field); @@ -1409,9 +1469,9 @@ void RunPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, sut->ReportLatencyResults(pr.sample_latencies); - LogSummary( - [perf_summary = PerformanceSummary{sut->Name(), settings, std::move(pr)}]( - AsyncSummary& summary) mutable { perf_summary.Log(summary); }); + LogSummary([perf_summary = + PerformanceSummary{sut->Name(), settings, std::move(pr)}]( + AsyncSummary & summary) mutable { perf_summary.Log(summary); }); qsl->UnloadSamplesFromRam(performance_set.set); } @@ -1436,7 +1496,8 @@ void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, if (scenario != TestScenario::MultiStream && scenario != TestScenario::MultiStreamFree && scenario != TestScenario::Server) { - LogDetail([unsupported_scenario = ToString(scenario)](AsyncDetail& detail) { + LogDetail([unsupported_scenario = + ToString(scenario)](AsyncDetail & detail) { detail.Error(find_peak_performance::kNotSupportedMsg); }); return; @@ -1444,7 +1505,7 @@ void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, LogDetail([base_field = find_peak_performance::ToStringPerformanceField( - base_settings)](AsyncDetail& detail) { + base_settings)](AsyncDetail & detail) { detail("FindPeakPerformance: Check validity of the base settings field: " + base_field); }); @@ -1475,10 +1536,10 @@ void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, sut->ReportLatencyResults(base_perf_summary.pr.sample_latencies); - LogSummary( - [perf_summary = PerformanceSummary{sut->Name(), base_settings, - std::move(base_perf_summary.pr)}]( - AsyncSummary& summary) mutable { perf_summary.Log(summary); }); + LogSummary([perf_summary = + PerformanceSummary{sut->Name(), base_settings, + std::move(base_perf_summary.pr)}]( + AsyncSummary & summary) mutable { perf_summary.Log(summary); }); qsl->UnloadSamplesFromRam(base_performance_set.set); @@ -1494,14 +1555,15 @@ void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, PerformanceSummary l_perf_summary = boundaries.first; PerformanceSummary u_perf_summary = boundaries.second; - LogDetail( - [l_field = find_peak_performance::ToStringPerformanceField( - l_perf_summary.settings), - u_field = find_peak_performance::ToStringPerformanceField( - u_perf_summary.settings)](AsyncDetail& detail) { - detail("FindPeakPerformance: Found boundaries: [" + l_field + ", " + - u_field + ")"); - }); + LogDetail([ + l_field = find_peak_performance::ToStringPerformanceField( + l_perf_summary.settings), + u_field = find_peak_performance::ToStringPerformanceField( + u_perf_summary.settings) + ](AsyncDetail & detail) { + detail("FindPeakPerformance: Found boundaries: [" + l_field + ", " + + u_field + ")"); + }); // Reuse performance_set, u_perf_summary has the largest 'samples_per_query'. std::vector loadable_sets( @@ -1515,16 +1577,16 @@ void FindPeakPerformanceMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, // Print-out the peak performance test setting. LogDetail([field = find_peak_performance::ToStringPerformanceField( - perf_summary.settings)](AsyncDetail& detail) { + perf_summary.settings)](AsyncDetail & detail) { detail("FindPeakPerformance: Found peak performance field: " + field); }); sut->ReportLatencyResults(perf_summary.pr.sample_latencies); - LogSummary( - [perf_summary = PerformanceSummary{sut->Name(), perf_summary.settings, - std::move(perf_summary.pr)}]( - AsyncSummary& summary) mutable { perf_summary.Log(summary); }); + LogSummary([perf_summary = + PerformanceSummary{sut->Name(), perf_summary.settings, + std::move(perf_summary.pr)}]( + AsyncSummary & summary) mutable { perf_summary.Log(summary); }); qsl->UnloadSamplesFromRam(performance_set.set); } @@ -1541,10 +1603,8 @@ void RunAccuracyMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, for (auto& loadable_set : loadable_sets) { { - auto tracer = MakeScopedTracer( - [count = loadable_set.set.size()](AsyncTrace& trace) { - trace("LoadSamples", "count", count); - }); + auto tracer = MakeScopedTracer([count = loadable_set.set.size()]( + AsyncTrace & trace) { trace("LoadSamples", "count", count); }); LoadSamplesToRam(qsl, loadable_set.set); } @@ -1552,10 +1612,8 @@ void RunAccuracyMode(SystemUnderTest* sut, QuerySampleLibrary* qsl, sut, settings, loadable_set, sequence_gen)); { - auto tracer = MakeScopedTracer( - [count = loadable_set.set.size()](AsyncTrace& trace) { - trace("UnloadSampes", "count", count); - }); + auto tracer = MakeScopedTracer([count = loadable_set.set.size()]( + AsyncTrace & trace) { trace("UnloadSampes", "count", count); }); qsl->UnloadSamplesFromRam(loadable_set.set); } } diff --git a/loadgen/test_settings.h b/loadgen/test_settings.h index 49cffb922f..c0f01cb0ce 100644 --- a/loadgen/test_settings.h +++ b/loadgen/test_settings.h @@ -197,8 +197,9 @@ struct TestSettings { /// should be set to 0.97 (97%) in v0.5.(As always, check the policy page for /// updated values for the benchmark you are running.) double server_target_latency_percentile = 0.99; - /// \brief TODO: Implement this. Would combine samples from multiple queries - /// into a single query if their scheduled issue times have passed. + /// \brief If this flag is set to true, LoadGen will combine samples from + /// multiple queries into a single query if their scheduled issue times have + /// passed. bool server_coalesce_queries = false; /// \brief The decimal places of QPS precision used to terminate /// FindPeakPerformance mode.