Skip to content

Commit

Permalink
Fix full tuner for heterogeneous GPUs and auto precision detection.
Browse files Browse the repository at this point in the history
Fix full tuner for heterogeneous GPUs and auto precision detection.

--full-tuner implies --tune-only
--full-tuner requires an explicit precision

Fixes #1973.

Pull request #1986.
  • Loading branch information
ihavnoid authored and gcp committed Nov 5, 2018
1 parent 1fe59c6 commit 2cfc8d1
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 2 deletions.
13 changes: 13 additions & 0 deletions src/Leela.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,11 @@ static void parse_commandline(int argc, char *argv[]) {

if (vm.count("full-tuner")) {
cfg_sgemm_exhaustive = true;

// --full-tuner auto-implies --tune-only. The full tuner is so slow
// that nobody will wait for it to finish befure running a game.
// this simply prevents some edge cases from confusing other people. #1973
cfg_tune_only = true;
}

if (vm.count("tune-only")) {
Expand All @@ -238,6 +243,14 @@ static void parse_commandline(int argc, char *argv[]) {
exit(EXIT_FAILURE);
}
}
if (cfg_precision == precision_t::AUTO) {
// Auto precision is not supported for tune only cases. #1973
if (cfg_tune_only) {
printf("Automatic precision not supported when tuning only\n");
printf("Please add '--precision single' or '--precision half'\n");
exit(EXIT_FAILURE);
}
}
#endif
#endif

Expand Down
5 changes: 4 additions & 1 deletion src/OpenCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -792,7 +792,10 @@ void OpenCL<net_t>::initialize(const int channels) {
// and will fail to compile the rest of the kernels after a tuning
// run. See #729.
if (cfg_tune_only) {
exit(EXIT_SUCCESS);
// Originally this was an exit() but this will make the tuner
// only tune the first GPU. Return instead. The exit will be called
// after all GPUs are created.
return;
}

// Build program for these specific devices
Expand Down
7 changes: 7 additions & 0 deletions src/OpenCLScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,13 @@ void OpenCLScheduler<net_t>::initialize(const int channels) {
}
gnum++;
}

// Exit immediately after tuning. We should exit here because we skipped
// initializing rest of the kernels due to some NVIDIA drivers crashing.
// (#729)
if (cfg_tune_only) {
exit(EXIT_SUCCESS);
}
}

template<typename net_t>
Expand Down
22 changes: 21 additions & 1 deletion src/Tuner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@

const auto TUNER_FILE_LOCAL = std::string("leelaz_opencl_tuning");

template <typename net_t>
std::list<std::string> Tuner<net_t>::tuned_devices;

#ifndef USE_BLAS
// Eigen helpers
template <typename T>
Expand Down Expand Up @@ -579,7 +582,24 @@ std::string Tuner<net_t>::load_sgemm_tuners(const int m, const int n, const int
const int batch_size) {
auto tuner_file = leelaz_file(TUNER_FILE_LOCAL);
auto file = std::ifstream{tuner_file};
if (!cfg_sgemm_exhaustive && file.good()) {

auto try_prior_tuning = file.good();

// if we want full tuning, don't reuse previously tuned results
// except if the tuning was created from this run from a different GPU instance
// with the same name. This prevents the tuner running for multiple times if
// the system has multiple same GPUs.
if (try_prior_tuning && cfg_sgemm_exhaustive) {
auto dev = m_opencl.get_device_name();
try_prior_tuning = std::any_of(
begin(tuned_devices),
end(tuned_devices),
[&dev](const std::string & x) { return dev == x; }
);
}
tuned_devices.push_back(m_opencl.get_device_name());

if (try_prior_tuning) {
auto line = std::string{};
while (std::getline(file, line)) {
auto tuners = sgemm_tuners_from_line(line, m, n, k, batch_size);
Expand Down
4 changes: 4 additions & 0 deletions src/Tuner.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ class Tuner {
std::string load_sgemm_tuners(const int m, const int n, const int k,
const int batch_size);

// list of device types that was tuned in this run.
// This is to prevent the same device from being tuned multiple times.
static std::list<std::string> tuned_devices;

static constexpr auto TUNER_VERSION = 0;
Tuner(OpenCL<net_t> & opencl, cl::Context context, cl::Device device) :
m_opencl(opencl), m_context(context), m_device(device) {}
Expand Down

0 comments on commit 2cfc8d1

Please sign in to comment.