Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion loadgen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ message("mlperf_loadgen v${mlperf_loadgen_VERSION_MAJOR}.${mlperf_loadgen_VERSIO

# Set build options. NB: CXX_STANDARD is supported since CMake 3.1.
if (NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -W -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -W -Wall")
endif()
message(STATUS "Using C++ compiler flags: ${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_STANDARD "14")
Expand Down
2 changes: 2 additions & 0 deletions loadgen/benchmark/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
loadgen_build
build
9 changes: 9 additions & 0 deletions loadgen/benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Note: please install jemalloc first. See: http://jemalloc.net/
Command: bash run.sh <target_qps> <0=Basic,1=Queue> <numCompleteThreads> <maxSizeInComplete> <server_coalesce_queries=0or1>

Experiments:
- On Intel(R) Xeon(R) CPU E5-1650 v4 @ 3.60GHz
- Basic SUT : 500-600k i/s
- Basic SUT + jemalloc: 800-900k i/s (`bash run.sh 800000 0`)
- Queued SUT (2 complete threads) + jemalloc: 1.2-1.3M i/s (`bash run.sh 1200000 1 2 2048`)
- Queued SUT (2 complete threads) + jemalloc + server_coalesce_queries: 1.4-1.5M is/ (`bash run.sh 1400000 1 2 512 1`)
221 changes: 221 additions & 0 deletions loadgen/benchmark/repro.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cassert>
#include <condition_variable>
#include <deque>
#include <iostream>
#include <mutex>
#include <thread>
#include <vector>

#include "loadgen.h"
#include "query_sample_library.h"
#include "system_under_test.h"
#include "test_settings.h"

class QSL : public mlperf::QuerySampleLibrary {
public:
~QSL() override{};
const std::string& Name() const override { return mName; }
size_t TotalSampleCount() override { return 1000000; }
size_t PerformanceSampleCount() override { return TotalSampleCount(); }
void LoadSamplesToRam(
const std::vector<mlperf::QuerySampleIndex>& samples) override {}
void UnloadSamplesFromRam(
const std::vector<mlperf::QuerySampleIndex>& samples) override {}

private:
std::string mName{"Dummy QSL"};
};

class BasicSUT : public mlperf::SystemUnderTest {
public:
BasicSUT() {
// Start with some large value so that we don't reallocate memory.
initResponse(10000);
}
~BasicSUT() override {}
const std::string& Name() const override { return mName; }
void IssueQuery(const std::vector<mlperf::QuerySample>& samples) override {
int n = samples.size();
if (n > mResponses.size()) {
std::cout << "Warning: reallocating response buffer in BasicSUT. Maybe "
"you should initResponse with larger value!?"
<< std::endl;
initResponse(samples.size());
}
for (int i = 0; i < n; i++) {
mResponses[i].id = samples[i].id;
}
mlperf::QuerySamplesComplete(mResponses.data(), n);
}
void FlushQueries() override {}
void ReportLatencyResults(
const std::vector<mlperf::QuerySampleLatency>& latencies_ns) override{};

private:
void initResponse(int size) {
mResponses.resize(size,
{0, reinterpret_cast<uintptr_t>(&mBuf), sizeof(int)});
}
int mBuf{0};
std::string mName{"BasicSUT"};
std::vector<mlperf::QuerySampleResponse> mResponses;
};

class QueueSUT : public mlperf::SystemUnderTest {
public:
QueueSUT(int numCompleteThreads, int maxSize) {
// Each thread handle at most maxSize at a time.
std::cout << "QueueSUT: maxSize = " << maxSize << std::endl;
initResponse(numCompleteThreads, maxSize);
// Launch complete threads
for (int i = 0; i < numCompleteThreads; i++) {
mThreads.emplace_back(&QueueSUT::CompleteThread, this, i);
}
}
~QueueSUT() override {
{
std::unique_lock<std::mutex> lck(mMtx);
mDone = true;
mCondVar.notify_all();
}
for (auto& thread : mThreads) {
thread.join();
}
}
const std::string& Name() const override { return mName; }
void IssueQuery(const std::vector<mlperf::QuerySample>& samples) override {
std::unique_lock<std::mutex> lck(mMtx);
for (const auto& sample : samples) {
mIdQueue.push_back(sample.id);
}
// Let some worker thread to consume tasks
mCondVar.notify_one();
}
void FlushQueries() override {}
void ReportLatencyResults(
const std::vector<mlperf::QuerySampleLatency>& latencies_ns) override{};

private:
void CompleteThread(int threadIdx) {
auto& responses = mResponses[threadIdx];
size_t maxSize{responses.size()};
size_t actualSize{0};
while (true) {
{
std::unique_lock<std::mutex> lck(mMtx);
mCondVar.wait(lck, [&]() { return !mIdQueue.empty() || mDone; });

if (mDone) {
break;
}

actualSize = std::min(maxSize, mIdQueue.size());
for (int i = 0; i < actualSize; i++) {
responses[i].id = mIdQueue.front();
mIdQueue.pop_front();
}
mCondVar.notify_one();
}
mlperf::QuerySamplesComplete(responses.data(), actualSize);
}
}
void initResponse(int numCompleteThreads, int size) {
mResponses.resize(numCompleteThreads);
for (auto& responses : mResponses) {
responses.resize(size,
{0, reinterpret_cast<uintptr_t>(&mBuf), sizeof(int)});
}
}
int mBuf{0};
std::string mName{"QueueSUT"};
std::vector<std::vector<mlperf::QuerySampleResponse>> mResponses;
std::vector<std::thread> mThreads;
std::deque<mlperf::ResponseId> mIdQueue;
std::mutex mMtx;
std::condition_variable mCondVar;
bool mDone{false};
};

int main(int argc, char** argv) {
assert(argc >= 2 && "Need to pass in at least one argument: target_qps");
int target_qps = std::stoi(argv[1]);
std::cout << "target_qps = " << target_qps << std::endl;

bool useQueue{false};
int numCompleteThreads{4};
int maxSize{1};
bool server_coalesce_queries{false};
if (argc >= 3) {
useQueue = std::stoi(argv[2]) != 0;
}
if (argc >= 4) {
numCompleteThreads = std::stoi(argv[3]);
}
if (argc >= 5) {
maxSize = std::stoi(argv[4]);
}
if (argc >= 6) {
server_coalesce_queries = std::stoi(argv[5]) != 0;
}

QSL qsl;
std::unique_ptr<mlperf::SystemUnderTest> sut;

// Configure the test settings
mlperf::TestSettings testSettings;
testSettings.scenario = mlperf::TestScenario::Server;
testSettings.mode = mlperf::TestMode::PerformanceOnly;
testSettings.server_target_qps = target_qps;
testSettings.server_target_latency_ns = 10000000; // 10ms
testSettings.server_target_latency_percentile = 0.99;
testSettings.min_duration_ms = 60000;
testSettings.min_query_count = 270000;
testSettings.server_coalesce_queries = server_coalesce_queries;
std::cout << "testSettings.server_coalesce_queries = "
<< (server_coalesce_queries ? "True" : "False") << std::endl;

// Configure the logging settings
mlperf::LogSettings logSettings;
logSettings.log_output.outdir = "build";
logSettings.log_output.prefix = "mlperf_log_";
logSettings.log_output.suffix = "";
logSettings.log_output.prefix_with_datetime = false;
logSettings.log_output.copy_detail_to_stdout = false;
logSettings.log_output.copy_summary_to_stdout = true;
logSettings.log_mode = mlperf::LoggingMode::AsyncPoll;
logSettings.log_mode_async_poll_interval_ms = 1000;
logSettings.enable_trace = false;

// Choose SUT
if (useQueue) {
std::cout << "Using QueueSUT with " << numCompleteThreads
<< " complete threads" << std::endl;
sut.reset(new QueueSUT(numCompleteThreads, maxSize));
} else {
std::cout << "Using BasicSUT" << std::endl;
sut.reset(new BasicSUT());
}

// Start test
std::cout << "Start test..." << std::endl;
mlperf::StartTest(sut.get(), &qsl, testSettings, logSettings);
std::cout << "Test done. Clean up SUT..." << std::endl;
sut.reset();
std::cout << "Done!" << std::endl;
return 0;
}
21 changes: 21 additions & 0 deletions loadgen/benchmark/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/bash
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
echo "Building loadgen..."
if [ ! -e loadgen_build ]; then mkdir loadgen_build; fi;
cd loadgen_build && cmake ../.. && make -j && cd ..
echo "Building test program..."
if [ ! -e build ]; then mkdir build; fi;
g++ --std=c++11 -O3 -I.. -o build/repro.exe repro.cpp -Lloadgen_build -lmlperf_loadgen -lpthread && \
LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2 build/repro.exe $1 $2 $3 $4 $5
21 changes: 21 additions & 0 deletions loadgen/benchmark/run_debug.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/bash
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
echo "Building loadgen in Debug mode..."
if [ ! -e loadgen_build ]; then mkdir loadgen_build; fi;
cd loadgen_build && cmake -DCMAKE_BUILD_TYPE=Debug ../.. && make -j && cd ..
echo "Building test program in Debug mode..."
if [ ! -e build ]; then mkdir build; fi;
g++ --std=c++11 -O0 -g -I.. -o build/repro.exe repro.cpp -Lloadgen_build -lmlperf_loadgen -lpthread && \
gdb --args build/repro.exe $1 $2 $3 $4 $5
Loading