From a901474bf9579ba259179eb09618d8401a156f64 Mon Sep 17 00:00:00 2001
From: "Robert Nurnberg @ elitebook" <robert.nurnberg@gmx.de>
Date: Sat, 20 Jan 2024 19:15:20 +0100
Subject: [PATCH] Update the WDL model

Update the internal WDL model. After the dual net merge, the internal
evaluations have drifted upwards a bit. With this PR
`NormalizeToPawnValue` changes from `328` to `345`.

The new model was fitted based on about 200M positions extracted from
3.4M fishtest LTC games from the last two weeks, involving SF versions
from 6deb88728fb141e853243c2873ad0cda4dd19320 to current master.

Apart from the WDL model parameter update, this PR implements the
following changes:

WDL Model:
- an incorrect 8-move shift in master's WDL model has been fixed
- the polynomials `p_a` and `p_b` are fitted over the move range [8, 120]
- the coefficients for `p_a` and `p_b` are optimized by maximizing the
  probability of predicting the observed outcome (credits to @vondele)

SF code:
- for wdl values, move will be clamped to `max(8, min(120, move))`
- no longer clamp the internal eval to [-4000,4000]
- compute `NormalizeToPawnValue` with `round`, not `trunc`

The PR only affects displayed `cp` and `wdl` values.

closes https://github.com/official-stockfish/Stockfish/pull/5002

No functional change
---
 src/uci.cpp | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/uci.cpp b/src/uci.cpp
index 789f345481d..2a55fbfab0a 100644
--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -43,7 +43,7 @@
 namespace Stockfish {
 
 constexpr auto StartFEN             = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
-constexpr int  NormalizeToPawnValue = 328;
+constexpr int  NormalizeToPawnValue = 345;
 constexpr int  MaxHashMB            = Is64Bit ? 33554432 : 2048;
 
 UCI::UCI(int argc, char** argv) :
@@ -421,26 +421,23 @@ namespace {
 // eval and a game ply. It fits the LTC fishtest statistics rather accurately.
 int win_rate_model(Value v, int ply) {
 
-    // The model only captures up to 240 plies, so limit the input and then rescale
-    double m = std::min(240, ply) / 64.0;
+    // The fitted model only uses data for moves in [8, 120], and is anchored at move 32.
+    double m = std::clamp(ply / 2 + 1, 8, 120) / 32.0;
 
     // The coefficients of a third-order polynomial fit is based on the fishtest data
     // for two parameters that need to transform eval to the argument of a logistic
     // function.
-    constexpr double as[] = {0.38036525, -2.82015070, 23.17882135, 307.36768407};
-    constexpr double bs[] = {-2.29434733, 13.27689788, -14.26828904, 63.45318330};
+    constexpr double as[] = {-2.00568292, 10.45906746, 1.67438883, 334.45864705};
+    constexpr double bs[] = {-4.97134419, 36.15096345, -82.25513499, 117.35186805};
 
-    // Enforce that NormalizeToPawnValue corresponds to a 50% win rate at ply 64
-    static_assert(NormalizeToPawnValue == int(as[0] + as[1] + as[2] + as[3]));
+    // Enforce that NormalizeToPawnValue corresponds to a 50% win rate at move 32.
+    static_assert(NormalizeToPawnValue == int(0.5 + as[0] + as[1] + as[2] + as[3]));
 
     double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
     double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
 
-    // Transform the eval to centipawns with limited range
-    double x = std::clamp(double(v), -4000.0, 4000.0);
-
-    // Return the win rate in per mille units, rounded to the nearest integer
-    return int(0.5 + 1000 / (1 + std::exp((a - x) / b)));
+    // Return the win rate in per mille units, rounded to the nearest integer.
+    return int(0.5 + 1000 / (1 + std::exp((a - double(v)) / b)));
 }
 }