Merge pull request #140 from kiudee/139_noise_estimation

Add pentanomial noise estimation to tuning server
kiudee · Jun 28, 2021 · e189f90 · e189f90
2 parents 04672db + ac516bf
commit e189f90
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 27 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -13,6 +13,8 @@ History
   for both engines.
 * Tuning server of the distributed tuning framework will now also save the
   optimizer object.
+* Tuning server now also uses the updated pentanomial model including
+  noise estimation.
 * Fix the match parser producing incorrect results, when concurrency > 1 is
   used for playing matches.
 * Fix the server for distributed tuning trying to compute the current optimum

diff --git a/tune/db_workers/tuning_server.py b/tune/db_workers/tuning_server.py
@@ -42,9 +42,9 @@
     TimeControl,
     create_sqlalchemy_engine,
     get_session_maker,
-    simple_penta_to_score,
 )
 from tune.io import InitStrings
+from tune.local import counts_to_penta
 from tune.priors import roundflat
 from tune.utils import expected_ucb
 
@@ -251,29 +251,30 @@ def query_data(self, session, include_active=False):
             .values
         )
         y = {tc: [] for tc in self.time_controls}
+        variances = {tc: [] for tc in self.time_controls}
         for job in jobs:
             for result in job.results:
                 tc = result.time_control.to_tuple()
                 if tc not in self.time_controls:
                     continue
-                draw_rate = float(result.time_control.draw_rate)
                 counts = np.array(
                     [
-                        result.ll_count,
-                        result.dl_count,
-                        result.wl_count + result.dd_count,
-                        result.wd_count,
                         result.ww_count,
+                        result.wd_count,
+                        result.wl_count + result.dd_count,
+                        result.dl_count,
+                        result.ll_count,
                     ]
                 )
-                score = simple_penta_to_score(
-                    draw_rate=draw_rate,
-                    counts=counts,
-                    prior_games=self.experiment.get("prior_games", 1),
-                    prior_elo=self.experiment.get("prior_elo", 0.0),
-                )
-                y[tc].append(-score)
-        return X, np.array(list(y.values())).mean(axis=0), samplesize_reached
+                score, variance = counts_to_penta(counts=counts, random_state=0)
+                y[tc].append(score)
+                variances[tc].append(variance)
+        return (
+            X,
+            np.array(list(y.values())).mean(axis=0),
+            np.array(list(variances.values())).mean(axis=0),
+            samplesize_reached,
+        )
 
     @staticmethod
     def change_engine_config(engine_config, params):
@@ -362,7 +363,9 @@ def run(self):
             # Check if minimum sample size and minimum wait time are reached, then query
             # data and update model:
             with self.sessionmaker() as session:
-                X, y, samplesize_reached = self.query_data(session, include_active=True)
+                X, y, variances, samplesize_reached = self.query_data(
+                    session, include_active=True
+                )
                 self.logger.debug(
                     f"Queried the database for data and got (last 5):\n"
                     f"{X[-5:]}\n{y[-5:]}"
@@ -391,6 +394,7 @@ def run(self):
             self.opt.tell(
                 X.tolist(),
                 y.tolist(),
+                noise_vector=variances.tolist(),
                 fit=True,
                 replace=True,
                 n_samples=self.tunecfg["n_samples"],

diff --git a/tune/local.py b/tune/local.py
@@ -1,14 +1,17 @@
 import pathlib
 import re
 import subprocess
+from typing import Optional, Tuple, Union
 
 import numpy as np
+from numpy.random import RandomState
 from scipy.stats import dirichlet
 from skopt.space import Categorical, Integer, Real
 
 from tune.utils import TimeControl
 
 __all__ = [
+    "counts_to_penta",
     "run_match",
     "parse_experiment_result",
     "reduce_ranges",
@@ -69,6 +72,55 @@ def prob_to_elo(p, k=4.0):
     return k * np.log10(-p / (p - 1))
 
 
+def counts_to_penta(
+    counts: np.ndarray,
+    prior_counts: Optional[np.ndarray] = None,
+    n_dirichlet_samples: int = 1000000,
+    score_scale: float = 4.0,
+    random_state: Union[int, RandomState, None] = None,
+    **kwargs,
+) -> Tuple[float, float]:
+    """Compute mean Elo score and variance of the pentanomial model for a count array.
+
+    Parameters
+    ----------
+    counts : np.ndarray
+        Array of counts for WW, WD, WL/DD, LD and LL
+    prior_counts : np.ndarray or None, default=None
+        Pseudo counts to use for WW, WD, WL/DD, LD and LL in the
+        pentanomial model.
+    n_dirichlet_samples : int, default = 1 000 000
+        Number of samples to draw from the Dirichlet distribution in order to
+        estimate the standard error of the score.
+    score_scale : float, optional (default=4.0)
+        Scale of the logistic distribution used to calculate the score. Has to be a
+        positive real number
+    random_state : int, RandomState instance or None, optional (default: None)
+        The generator used to initialize the centers. If int, random_state is
+        the seed used by the random number generator; If RandomState instance,
+        random_state is the random number generator; If None, the random number
+        generator is the RandomState instance used by `np.random`.
+    kwargs : dict
+        Additional keyword arguments
+    Returns
+    -------
+    tuple (float, float)
+        Mean Elo score and corresponding variance
+    """
+    if prior_counts is None:
+        prior_counts = np.array([0.14, 0.19, 0.34, 0.19, 0.14]) * 2.5
+    elif len(prior_counts) != 5:
+        raise ValueError("Argument prior_counts should contain 5 elements.")
+    dist = dirichlet(alpha=counts + prior_counts)
+    scores = [0.0, 0.25, 0.5, 0.75, 1.0]
+    score = prob_to_elo(dist.mean().dot(scores), k=score_scale)
+    error = prob_to_elo(
+        dist.rvs(n_dirichlet_samples, random_state=random_state).dot(scores),
+        k=score_scale,
+    ).var()
+    return score, error
+
+
 def parse_experiment_result(
     outstr,
     prior_counts=None,
@@ -149,18 +201,14 @@ def parse_experiment_result(
         else:
             counts["LL"] += 1
     counts_array = np.array(list(counts.values()))
-    if prior_counts is None:
-        prior_counts = np.array([0.14, 0.19, 0.34, 0.19, 0.14]) * 2.5
-    elif len(prior_counts) != 5:
-        raise ValueError("Argument prior_counts should contain 5 elements.")
-    dist = dirichlet(alpha=counts_array + prior_counts)
-    scores = [0.0, 0.25, 0.5, 0.75, 1.0]
-    score = prob_to_elo(dist.mean().dot(scores), k=score_scale)
-    error = prob_to_elo(
-        dist.rvs(n_dirichlet_samples, random_state=random_state).dot(scores),
-        k=score_scale,
-    ).var()
-    return score, error
+    return counts_to_penta(
+        counts=counts_array,
+        prior_counts=prior_counts,
+        n_dirichlet_samples=n_dirichlet_samples,
+        score_scale=score_scale,
+        random_state=random_state,
+        **kwargs,
+    )
 
 
 def _construct_engine_conf(