Skip to content

Commit

Permalink
Disable passing heuristics during self-play.
Browse files Browse the repository at this point in the history
The heuristics aren't needed during self-play, and might potentially
interfere by not letting the network see the consequences of passing or
not passing. So disable them during self-play.
  • Loading branch information
gcp committed Nov 15, 2017
1 parent 59d257d commit 7f4d8d2
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 5 deletions.
2 changes: 1 addition & 1 deletion autogtp/Game.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Game::Game(const QString& weights, QTextStream& out) :
#ifdef WIN32
cmdLine.append(".exe");
#endif
cmdLine.append(" -g -q -n -m 30 -r 0 -w ");
cmdLine.append(" -g -q -n -d -m 30 -r 0 -w ");
cmdLine.append(weights);
cmdLine.append(" -p 1000 --noponder");
fileName = QUuid::createUuid().toRfc4122().toHex();
Expand Down
2 changes: 2 additions & 0 deletions src/GTP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ int cfg_lagbuffer_cs;
int cfg_resignpct;
int cfg_noise;
int cfg_random_cnt;
bool cfg_dumbpass;
#ifdef USE_OPENCL
std::vector<int> cfg_gpus;
int cfg_rowtiles;
Expand Down Expand Up @@ -77,6 +78,7 @@ void GTP::setup_default_parameters() {
cfg_resignpct = 10;
cfg_noise = false;
cfg_random_cnt = 0;
cfg_dumbpass = false;
cfg_logfile_handle = nullptr;
cfg_quiet = false;
}
Expand Down
1 change: 1 addition & 0 deletions src/GTP.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ extern int cfg_lagbuffer_cs;
extern int cfg_resignpct;
extern int cfg_noise;
extern int cfg_random_cnt;
extern bool cfg_dumbpass;
#ifdef USE_OPENCL
extern std::vector<int> cfg_gpus;
extern int cfg_rowtiles;
Expand Down
5 changes: 5 additions & 0 deletions src/Leela.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ void parse_commandline(int argc, char *argv[], bool & gtp_mode) {
("randomcnt,m", po::value<int>()->default_value(cfg_random_cnt),
"Play more randomly the first x moves.")
("noise,n", "Enable policy network randomization.")
("dumbpass,d", "Don't use heuristics for smarter passing.")
("weights,w", po::value<std::string>(), "File with network weights.")
("logfile,l", po::value<std::string>(), "File to log input/output to.")
("quiet,q", "Disable all diagnostic output.")
Expand Down Expand Up @@ -173,6 +174,10 @@ void parse_commandline(int argc, char *argv[], bool & gtp_mode) {
cfg_noise = true;
}

if (vm.count("dumbpass")) {
cfg_dumbpass = true;
}

if (vm.count("playouts")) {
cfg_max_playouts = vm["playouts"].as<int>();
if (!vm.count("noponder")) {
Expand Down
10 changes: 6 additions & 4 deletions src/UCTSearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ int UCTSearch::get_best_move(passflag_t passflag) {
}
}
} else {
if (bestmove == FastBoard::PASS) {
if (!cfg_dumbpass && bestmove == FastBoard::PASS) {
// Either by forcing or coincidence passing is
// on top...check whether passing loses instantly
// do full count including dead stones.
Expand All @@ -189,7 +189,8 @@ int UCTSearch::get_best_move(passflag_t passflag) {
// positions are identical, and this means the position is only won
// if there are no dead stones in our own territory (because we use
// Trump-Taylor scoring there). So strictly speaking, the next
// heuristic isn't required for a pure RL network.
// heuristic isn't required for a pure RL network, and we have
// a commandline option to disable the behavior during learning.
// On the other hand, with a supervised learning setup, we fully
// expect that the engine will pass out anything that looks like
// a finished game even with dead stones on the board (because the
Expand Down Expand Up @@ -220,8 +221,9 @@ int UCTSearch::get_best_move(passflag_t passflag) {
} else {
myprintf("Passing wins :-)\n");
}
} else if (m_rootstate.get_last_move() == FastBoard::PASS) {
// Opponents last move was passing
} else if (!cfg_dumbpass
&& m_rootstate.get_last_move() == FastBoard::PASS) {
// Opponents last move was passing.
// We didn't consider passing. Should we have and
// end the game immediately?
float score = m_rootstate.final_score();
Expand Down

0 comments on commit 7f4d8d2

Please sign in to comment.