Skip to content

Commit

Permalink
Refactor Network Usage
Browse files Browse the repository at this point in the history
Continuing from PR #4968, this update improves how Stockfish handles network
usage, making it easier to manage and modify networks in the future.

With the introduction of a dedicated Network class, creating networks has become
straightforward. See uci.cpp:
```cpp
NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::embeddedNNUEBig)
```

The new `Network` encapsulates all network-related logic, significantly reducing
the complexity previously required to support multiple network types, such as
the distinction between small and big networks #4915.

Non-Regression STC:
https://tests.stockfishchess.org/tests/view/65edd26c0ec64f0526c43584
LLR: 2.94 (-2.94,2.94) <-1.75,0.25>
Total: 33760 W: 8887 L: 8661 D: 16212
Ptnml(0-2): 143, 3795, 8808, 3961, 173

Non-Regression SMP STC:
https://tests.stockfishchess.org/tests/view/65ed71970ec64f0526c42fdd
LLR: 2.96 (-2.94,2.94) <-1.75,0.25>
Total: 59088 W: 15121 L: 14931 D: 29036
Ptnml(0-2): 110, 6640, 15829, 6880, 85

Compiled with `make -j profile-build`
```
bash ./bench_parallel.sh ./stockfish ./stockfish-nnue 13 50

sf_base =  1568540 +/-   7637 (95%)
sf_test =  1573129 +/-   7301 (95%)
diff    =     4589 +/-   8720 (95%)
speedup = 0.29260% +/- 0.556% (95%)
```

Compiled with `make -j build`
```
bash ./bench_parallel.sh ./stockfish ./stockfish-nnue 13 50

sf_base =  1472653 +/-   7293 (95%)
sf_test =  1491928 +/-   7661 (95%)
diff    =    19275 +/-   7154 (95%)
speedup = 1.30886% +/- 0.486% (95%)
```

closes #5100

No functional change
  • Loading branch information
Disservin committed Mar 12, 2024
1 parent f072634 commit 1a26d69
Show file tree
Hide file tree
Showing 18 changed files with 948 additions and 826 deletions.
8 changes: 4 additions & 4 deletions src/Makefile
Expand Up @@ -55,15 +55,15 @@ PGOBENCH = $(WINE_PATH) ./$(EXE) bench
SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \
misc.cpp movegen.cpp movepick.cpp position.cpp \
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp
nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp

HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \
nnue/evaluate_nnue.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \
nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \
nnue/layers/affine_transform_sparse_input.h nnue/layers/clipped_relu.h nnue/layers/simd.h \
nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \
nnue/nnue_common.h nnue/nnue_feature_transformer.h position.h \
search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \
tt.h tune.h types.h uci.h ucioption.h perft.h
tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.cpp

OBJS = $(notdir $(SRCS:.cpp=.o))

Expand Down Expand Up @@ -502,7 +502,7 @@ endif
# In earlier NDK versions, you'll need to pass -fno-addrsig if using GNU binutils.
# Currently we don't know how to make PGO builds with the NDK yet.
ifeq ($(COMP),ndk)
CXXFLAGS += -stdlib=libc++ -fPIE
CXXFLAGS += -stdlib=libc++ -fPIE -mcmodel=large
comp=clang
ifeq ($(arch),armv7)
CXX=armv7a-linux-androideabi16-clang++
Expand Down
164 changes: 10 additions & 154 deletions src/evaluate.cpp
Expand Up @@ -22,161 +22,18 @@
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <optional>
#include <sstream>
#include <unordered_map>
#include <vector>

#include "incbin/incbin.h"
#include "misc.h"
#include "nnue/evaluate_nnue.h"
#include "nnue/nnue_architecture.h"
#include "nnue/network.h"
#include "nnue/nnue_misc.h"
#include "position.h"
#include "types.h"
#include "uci.h"
#include "ucioption.h"

// Macro to embed the default efficiently updatable neural network (NNUE) file
// data in the engine binary (using incbin.h, by Dale Weiler).
// This macro invocation will declare the following three variables
// const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data
// const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end
// const unsigned int gEmbeddedNNUESize; // the size of the embedded file
// Note that this does not work in Microsoft Visual Studio.
#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF)
INCBIN(EmbeddedNNUEBig, EvalFileDefaultNameBig);
INCBIN(EmbeddedNNUESmall, EvalFileDefaultNameSmall);
#else
const unsigned char gEmbeddedNNUEBigData[1] = {0x0};
const unsigned char* const gEmbeddedNNUEBigEnd = &gEmbeddedNNUEBigData[1];
const unsigned int gEmbeddedNNUEBigSize = 1;
const unsigned char gEmbeddedNNUESmallData[1] = {0x0};
const unsigned char* const gEmbeddedNNUESmallEnd = &gEmbeddedNNUESmallData[1];
const unsigned int gEmbeddedNNUESmallSize = 1;
#endif


namespace Stockfish {

namespace Eval {


// Tries to load a NNUE network at startup time, or when the engine
// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue"
// The name of the NNUE network is always retrieved from the EvalFile option.
// We search the given network in three locations: internally (the default
// network may be embedded in the binary), in the active working directory and
// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY
// variable to have the engine search in a special directory in their distro.
NNUE::EvalFiles NNUE::load_networks(const std::string& rootDirectory,
const OptionsMap& options,
NNUE::EvalFiles evalFiles) {

for (auto& [netSize, evalFile] : evalFiles)
{
std::string user_eval_file = options[evalFile.optionName];

if (user_eval_file.empty())
user_eval_file = evalFile.defaultName;

#if defined(DEFAULT_NNUE_DIRECTORY)
std::vector<std::string> dirs = {"<internal>", "", rootDirectory,
stringify(DEFAULT_NNUE_DIRECTORY)};
#else
std::vector<std::string> dirs = {"<internal>", "", rootDirectory};
#endif

for (const std::string& directory : dirs)
{
if (evalFile.current != user_eval_file)
{
if (directory != "<internal>")
{
std::ifstream stream(directory + user_eval_file, std::ios::binary);
auto description = NNUE::load_eval(stream, netSize);

if (description.has_value())
{
evalFile.current = user_eval_file;
evalFile.netDescription = description.value();
}
}

if (directory == "<internal>" && user_eval_file == evalFile.defaultName)
{
// C++ way to prepare a buffer for a memory stream
class MemoryBuffer: public std::basic_streambuf<char> {
public:
MemoryBuffer(char* p, size_t n) {
setg(p, p, p + n);
setp(p, p + n);
}
};

MemoryBuffer buffer(
const_cast<char*>(reinterpret_cast<const char*>(
netSize == Small ? gEmbeddedNNUESmallData : gEmbeddedNNUEBigData)),
size_t(netSize == Small ? gEmbeddedNNUESmallSize : gEmbeddedNNUEBigSize));
(void) gEmbeddedNNUEBigEnd; // Silence warning on unused variable
(void) gEmbeddedNNUESmallEnd;

std::istream stream(&buffer);
auto description = NNUE::load_eval(stream, netSize);

if (description.has_value())
{
evalFile.current = user_eval_file;
evalFile.netDescription = description.value();
}
}
}
}
}

return evalFiles;
}

// Verifies that the last net used was loaded successfully
void NNUE::verify(const OptionsMap& options,
const std::unordered_map<Eval::NNUE::NetSize, EvalFile>& evalFiles) {

for (const auto& [netSize, evalFile] : evalFiles)
{
std::string user_eval_file = options[evalFile.optionName];

if (user_eval_file.empty())
user_eval_file = evalFile.defaultName;

if (evalFile.current != user_eval_file)
{
std::string msg1 =
"Network evaluation parameters compatible with the engine must be available.";
std::string msg2 =
"The network file " + user_eval_file + " was not loaded successfully.";
std::string msg3 = "The UCI option EvalFile might need to specify the full path, "
"including the directory name, to the network file.";
std::string msg4 = "The default net can be downloaded from: "
"https://tests.stockfishchess.org/api/nn/"
+ evalFile.defaultName;
std::string msg5 = "The engine will be terminated now.";

sync_cout << "info string ERROR: " << msg1 << sync_endl;
sync_cout << "info string ERROR: " << msg2 << sync_endl;
sync_cout << "info string ERROR: " << msg3 << sync_endl;
sync_cout << "info string ERROR: " << msg4 << sync_endl;
sync_cout << "info string ERROR: " << msg5 << sync_endl;

exit(EXIT_FAILURE);
}

sync_cout << "info string NNUE evaluation using " << user_eval_file << sync_endl;
}
}
}

// Returns a static, purely materialistic evaluation of the position from
// the point of view of the given color. It can be divided by PawnValue to get
// an approximation of the material advantage on the board in terms of pawns.
Expand All @@ -188,7 +45,7 @@ int Eval::simple_eval(const Position& pos, Color c) {

// Evaluate is the evaluator for the outer world. It returns a static evaluation
// of the position from the point of view of the side to move.
Value Eval::evaluate(const Position& pos, int optimism) {
Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos, int optimism) {

assert(!pos.checkers());

Expand All @@ -198,8 +55,8 @@ Value Eval::evaluate(const Position& pos, int optimism) {

int nnueComplexity;

Value nnue = smallNet ? NNUE::evaluate<NNUE::Small>(pos, true, &nnueComplexity, psqtOnly)
: NNUE::evaluate<NNUE::Big>(pos, true, &nnueComplexity, false);
Value nnue = smallNet ? networks.small.evaluate(pos, true, &nnueComplexity, psqtOnly)
: networks.big.evaluate(pos, true, &nnueComplexity, false);

// Blend optimism and eval with nnue complexity and material imbalance
optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 512;
Expand All @@ -222,23 +79,22 @@ Value Eval::evaluate(const Position& pos, int optimism) {
// a string (suitable for outputting to stdout) that contains the detailed
// descriptions and values of each evaluation term. Useful for debugging.
// Trace scores are from white's point of view
std::string Eval::trace(Position& pos) {
std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {

if (pos.checkers())
return "Final evaluation: none (in check)";

std::stringstream ss;
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
ss << '\n' << NNUE::trace(pos) << '\n';
ss << '\n' << NNUE::trace(pos, networks) << '\n';

ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);

Value v;
v = NNUE::evaluate<NNUE::Big>(pos, false);
v = pos.side_to_move() == WHITE ? v : -v;
Value v = networks.big.evaluate(pos, false);
v = pos.side_to_move() == WHITE ? v : -v;
ss << "NNUE evaluation " << 0.01 * UCI::to_cp(v) << " (white side)\n";

v = evaluate(pos, VALUE_ZERO);
v = evaluate(networks, pos, VALUE_ZERO);
v = pos.side_to_move() == WHITE ? v : -v;
ss << "Final evaluation " << 0.01 * UCI::to_cp(v) << " (white side)";
ss << " [with scaled NNUE, ...]";
Expand Down
32 changes: 7 additions & 25 deletions src/evaluate.h
Expand Up @@ -20,51 +20,33 @@
#define EVALUATE_H_INCLUDED

#include <string>
#include <unordered_map>

#include "types.h"

namespace Stockfish {

class Position;
class OptionsMap;

namespace Eval {

constexpr inline int SmallNetThreshold = 1139, PsqtOnlyThreshold = 2500;

std::string trace(Position& pos);

int simple_eval(const Position& pos, Color c);
Value evaluate(const Position& pos, int optimism);

// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
// for the build process (profile-build and fishtest) to work. Do not change the
// name of the macro, as it is used in the Makefile.
// name of the macro or the location where this macro is defined, as it is used
// in the Makefile/Fishtest.
#define EvalFileDefaultNameBig "nn-1ceb1ade0001.nnue"
#define EvalFileDefaultNameSmall "nn-baff1ede1f90.nnue"

struct EvalFile {
// UCI option name
std::string optionName;
// Default net name, will use one of the macros above
std::string defaultName;
// Selected net name, either via uci option or default
std::string current;
// Net description extracted from the net file
std::string netDescription;
};

namespace NNUE {
struct Networks;
}

enum NetSize : int;

using EvalFiles = std::unordered_map<Eval::NNUE::NetSize, EvalFile>;
std::string trace(Position& pos, const Eval::NNUE::Networks& networks);

EvalFiles load_networks(const std::string&, const OptionsMap&, EvalFiles);
void verify(const OptionsMap&, const EvalFiles&);
int simple_eval(const Position& pos, Color c);
Value evaluate(const NNUE::Networks& networks, const Position& pos, int optimism);

} // namespace NNUE

} // namespace Eval

Expand Down
3 changes: 0 additions & 3 deletions src/main.cpp
Expand Up @@ -19,7 +19,6 @@
#include <iostream>

#include "bitboard.h"
#include "evaluate.h"
#include "misc.h"
#include "position.h"
#include "tune.h"
Expand All @@ -39,8 +38,6 @@ int main(int argc, char* argv[]) {

Tune::init(uci.options);

uci.evalFiles = Eval::NNUE::load_networks(uci.working_directory(), uci.options, uci.evalFiles);

uci.loop();

return 0;
Expand Down
25 changes: 25 additions & 0 deletions src/misc.h
Expand Up @@ -25,6 +25,7 @@
#include <cstddef>
#include <cstdint>
#include <iosfwd>
#include <memory>
#include <string>
#include <vector>

Expand All @@ -49,6 +50,30 @@ void* aligned_large_pages_alloc(size_t size);
// nop if mem == nullptr
void aligned_large_pages_free(void* mem);

// Deleter for automating release of memory area
template<typename T>
struct AlignedDeleter {
void operator()(T* ptr) const {
ptr->~T();
std_aligned_free(ptr);
}
};

template<typename T>
struct LargePageDeleter {
void operator()(T* ptr) const {
ptr->~T();
aligned_large_pages_free(ptr);
}
};

template<typename T>
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;

template<typename T>
using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;


void dbg_hit_on(bool cond, int slot = 0);
void dbg_mean_of(int64_t value, int slot = 0);
void dbg_stdev_of(int64_t value, int slot = 0);
Expand Down

0 comments on commit 1a26d69

Please sign in to comment.