Skip to content

Commit

Permalink
Simmetries exploited!
Browse files Browse the repository at this point in the history
New line-command option --symm modifies the agent behaviour in the following way.

For every statem the policy probabilities of moves which are equivalent by symmetry are
collapsed and summed onto a single one of them, which is randomly chosen. The other
moves are not even included in the UC tree.

The total number of visits is then split again evenly among equivalen moves at the
moment of writing the training information.
  • Loading branch information
Vandertic committed Oct 13, 2018
1 parent 212ff2c commit 86b8324
Show file tree
Hide file tree
Showing 11 changed files with 107 additions and 19 deletions.
19 changes: 19 additions & 0 deletions src/FastBoard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <sstream>
#include <string>

#include "Network.h"
#include "Utils.h"
#include "config.h"

Expand Down Expand Up @@ -575,3 +576,21 @@ std::string FastBoard::get_stone_list() const {

return result;
}

int FastBoard::get_sym_move(const int vertex,
const int symmetry) const {
const auto idx = get_index(vertex);
if (idx < 0 || idx >= BOARD_SQUARES)
return vertex;

return get_vertex(symmetry_nn_idx_table[symmetry][idx]);
}

int FastBoard::get_index(const int vertex) const {
const auto xy = get_xy(vertex);
return xy.second * BOARD_SIZE + xy.first;
}

int FastBoard::get_vertex(const int index) const {
return get_vertex(index % BOARD_SIZE, index / BOARD_SIZE);
}
5 changes: 4 additions & 1 deletion src/FastBoard.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ class FastBoard {
square_t get_square(int x, int y) const;
square_t get_square(int vertex) const ;
int get_vertex(int i, int j) const;
int get_vertex(const int index) const;
int get_index(const int vertex) const;
void set_square(int x, int y, square_t content);
void set_square(int vertex, square_t content);
std::pair<int, int> get_xy(int vertex) const;
Expand Down Expand Up @@ -101,7 +103,8 @@ class FastBoard {
static bool starpoint(int size, int x, int y);

int liberties_to_capture(int vtx) const;

int get_sym_move(const int vertex, const int symmetry) const;

protected:
/*
bit masks to detect eyes on neighbors
Expand Down
19 changes: 19 additions & 0 deletions src/FastState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <vector>

#include "FastBoard.h"
#include "Network.h"
#include "Utils.h"
#include "Zobrist.h"

Expand Down Expand Up @@ -215,3 +216,21 @@ void FastState::set_blunder_state(bool state) {
bool FastState::is_blunder() {
return m_blunder_chosen;
}

bool FastState::is_symmetry_invariant(const int symmetry) const {
for (auto y = 0; y < BOARD_SIZE; y++) {
for (auto x = 0; x < BOARD_SIZE; x++) {
const auto sym_vertex =
board.get_vertex(symmetry_nn_idx_table[symmetry][y * BOARD_SIZE + x]);
if (board.get_square(x, y) != board.get_square(sym_vertex))
return false;
}
}

if(m_komove != 0) {
if (m_komove != board.get_sym_move(m_komove, symmetry))
return false;
}

return true;
}
4 changes: 1 addition & 3 deletions src/FastState.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@ class FastState {

void set_blunder_state(bool state);
bool is_blunder();

// size_t get_last_rnd_move_num();
// void set_last_rnd_move_num(size_t num);
bool is_symmetry_invariant(const int symmetry) const;

FullBoard board;

Expand Down
2 changes: 2 additions & 0 deletions src/GTP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ int cfg_resignpct;
int cfg_noise;
bool cfg_fpuzero;
bool cfg_adv_features;
bool cfg_exploit_symmetries;
float cfg_noise_value;
float cfg_lambda;
float cfg_mu;
Expand Down Expand Up @@ -116,6 +117,7 @@ void GTP::setup_default_parameters() {
cfg_resignpct = -1;
cfg_fpuzero = false;
cfg_adv_features = false;
cfg_exploit_symmetries = false;
cfg_noise = false;
cfg_noise_value = 0.03;
cfg_random_cnt = 0;
Expand Down
1 change: 1 addition & 0 deletions src/GTP.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ extern int cfg_resignpct;
extern int cfg_noise;
extern bool cfg_fpuzero;
extern bool cfg_adv_features;
extern bool cfg_exploit_symmetries;
extern float cfg_noise_value;
extern float cfg_komi;
extern float cfg_lambda;
Expand Down
14 changes: 9 additions & 5 deletions src/Leela.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ static void parse_commandline(int argc, char *argv[]) {
po::value<float>()->default_value(cfg_blunder_thr),
"If visits ratio with best is less than this, it's a blunder. "
"Don't save training data for moves before last blunder.")
("symm", "Exploit symmetries by collapsing policy values of "
"equivalent moves to a single one, chosen randomly. When writing "
"training data, split the visit count evenly among equivalent moves.")
;
#ifdef USE_TUNER
po::options_description tuner_desc("Tuning options");
Expand All @@ -123,8 +126,9 @@ static void parse_commandline(int argc, char *argv[]) {
("fpu_reduction", po::value<float>())
("fpu_zero", "Use constant fpu=0.5 (AlphaGoZero). "
"The default is reduced parent's value (LeelaZero).")
("adv_features", "Include advanced features (legal moves, liberties to capture) "
"when saving training data, but lower history from 8 to 4 moves.")
("adv_features", "Include advanced features (legal moves, "
"last liberty intersections) when saving training data. Shorten "
"history from 8 past moves to last 4.")
;
#endif
// These won't be shown, we use them to catch incorrect usage of the
Expand Down Expand Up @@ -308,9 +312,9 @@ static void parse_commandline(int argc, char *argv[]) {
if (vm.count("blunderthr")) {
cfg_blunder_thr = vm["blunderthr"].as<float>();
}



if (vm.count("symm")) {
cfg_exploit_symmetries = true;
}

if (vm.count("timemanage")) {
auto tm = vm["timemanage"].as<std::string>();
Expand Down
2 changes: 1 addition & 1 deletion src/Network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ static std::vector<float> ip2_val_b; // 1 (2 in SINGLE head type)
static bool value_head_not_stm;

// Symmetry helper
static std::array<std::array<int, BOARD_SQUARES>, 8> symmetry_nn_idx_table;
std::array<std::array<int, BOARD_SQUARES>, 8> symmetry_nn_idx_table;

// Value head beta (vbe=Value BEta)
static std::vector<float> conv_vbe_w; // channels*vbe_outputs
Expand Down
3 changes: 3 additions & 0 deletions src/Network.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
std::pair<float,float> sigmoid(float alpha, float beta, float bonus);

extern bool is_mult_komi_net;
extern std::array<std::array<int, BOARD_SQUARES>, 8>
symmetry_nn_idx_table;



class Network {
Expand Down
16 changes: 14 additions & 2 deletions src/Training.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,24 @@ void Training::record(GameState& state, UCTNode& root) {
return;
}

std::vector<int> stabilizer_subgroup;

for (auto i = 0; i < 8; i++) {
if(i == 0 || (cfg_exploit_symmetries && state.is_symmetry_invariant(i))) {
stabilizer_subgroup.emplace_back(i);
}
}

for (const auto& child : root.get_children()) {
auto prob = static_cast<float>(child->get_visits() / sum_visits);
auto move = child->get_move();
if (move != FastBoard::PASS) {
auto xy = state.board.get_xy(move);
step.probabilities[xy.second * BOARD_SIZE + xy.first] = prob;
const auto frac_prob = prob / stabilizer_subgroup.size();
for (auto sym : stabilizer_subgroup) {
const auto sym_move = state.board.get_sym_move(move, sym);
const auto sym_idx = state.board.get_index(sym_move);
step.probabilities[sym_idx] += frac_prob;
}
} else {
step.probabilities[BOARD_SQUARES] = prob;
}
Expand Down
41 changes: 34 additions & 7 deletions src/UCTNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "GTP.h"
#include "GameState.h"
#include "Network.h"
#include "Random.h"
#include "Utils.h"

using namespace Utils;
Expand Down Expand Up @@ -117,16 +118,42 @@ bool UCTNode::create_children(std::atomic<int>& nodecount,
m_agent_eval = value;
}

std::vector<Network::ScoreVertexPair> nodelist;

std::vector<int> stabilizer_subgroup;

for (auto i = 0; i < 8; i++) {
if(i == 0 || (cfg_exploit_symmetries && state.is_symmetry_invariant(i))) {
stabilizer_subgroup.emplace_back(i);
}
}

std::vector<Network::ScoreVertexPair> nodelist;
std::array<bool, BOARD_SQUARES> taken_already{};
auto unif_law = std::uniform_real_distribution<float>{0.0, 1.0};

auto legal_sum = 0.0f;
for (auto i = 0; i < BOARD_SQUARES; i++) {
const auto x = i % BOARD_SIZE;
const auto y = i / BOARD_SIZE;
const auto vertex = state.board.get_vertex(x, y);
if (state.is_move_legal(to_move, vertex)) {
nodelist.emplace_back(raw_netlist.policy[i], vertex);
legal_sum += raw_netlist.policy[i];
const auto vertex = state.board.get_vertex(i);
if (state.is_move_legal(to_move, vertex) && !taken_already[i]) {
auto taken_policy = 0.0f;
auto max_u = 0.0f;
auto rnd_vertex = vertex;
for (auto sym : stabilizer_subgroup) {
const auto j_vertex = state.board.get_sym_move(vertex, sym);
const auto j = state.board.get_index(j_vertex);
if (!taken_already[j]) {
taken_already[j] = true;
taken_policy += raw_netlist.policy[j];

const auto u = unif_law(Random::get_Rng());
if (u > max_u) {
max_u = u;
rnd_vertex = j_vertex;
}
}
}
nodelist.emplace_back(taken_policy, rnd_vertex);
legal_sum += taken_policy;
}
}
nodelist.emplace_back(raw_netlist.policy_pass, FastBoard::PASS);
Expand Down

0 comments on commit 86b8324

Please sign in to comment.