Merge pull request #155 from kiudee/38_priors

Support changing the Gaussian process prior parameters using the CLI
kiudee · Aug 15, 2021 · 2f48924 · 2f48924
2 parents 6a9c623 + aeaef08
commit 2f48924
Show file tree

Hide file tree

Showing 7 changed files with 243 additions and 8 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -2,6 +2,21 @@
 History
 =======
 
+0.8.0 (2021-08-15)
+------------------
+
+Local tuner
+~~~~~~~~~~~
+
+* Replace default lengthscale priors by inverse-gamma distributions.
+* Add the following command line flags, which allow the user to override the
+  prior parameters:
+
+  - ``--gp-signal-prior-scale`` for the scale of the signal prior.
+  - ``--gp-noise-prior-scale`` for the scale of the noise prior.
+  - ``--gp-lengthscale-prior-lb`` for the lower bound of the lengthscale prior.
+  - ``--gp-lengthscale-prior-ub`` for the upper bound of the lengthscale prior.
+
 0.7.3 (2021-06-27)
 ------------------
 

diff --git a/docs/parameters.myst b/docs/parameters.myst
@@ -114,6 +114,32 @@ fitting process:
   - Number of model parameters to sample for the initial model. This is only
     used when resuming or for the first model. Should be a multiple of 100.
     [default: 300]
+* - `"gp_signal_prior_scale"`
+  - `--gp-signal-prior-scale FLOAT`
+  - Prior scale of the signal (standard deviation) magnitude which is used to
+    parametrize a half-normal distribution. Needs to be a number strictly
+    greater than 0.0.
+    [default: 4.0]
+* - `"gp_noise_prior_scale"`
+  - `--gp-noise-prior-scale FLOAT`
+  - Prior scale of the residual noise (standard deviation) which is used to
+    parametrize a half-normal distribution. Needs to be a number strictly
+    greater than 0.0.
+    [default: 0.0006]
+* - `"gp_lengthscale_prior_lb"`
+  - `--gp-lengthscale-prior-lb FLOAT`
+  - Lower bound for the inverse-gamma lengthscale prior.
+    It marks the point where the prior reaches 1% of the cumulative density.
+    Lower values favor non-smooth landscapes and higher values smooth ones.
+    Needs to be a number strictly greater than 0.0.
+    [default: 0.1]
+* - `"gp_lengthscale_prior_ub"`
+  - `--gp-lengthscale-prior-ub FLOAT`
+  - Upper bound for the inverse-gamma lengthscale prior.
+    It marks the point where the prior reaches 99% of the cumulative density.
+    Lower values favor non-smooth landscapes and higher values smooth ones.
+    Needs to be a number strictly greater than 0.0 and the lower bound.
+    [default: 0.5]
 * - `"n_initial_points"`
   - `--n-initial-points INTEGER`
   - Size of initial dense set of points to try before using the GP model to

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "chess-tuning-tools"
-version = "0.7.4"
+version = "0.8.0"
 description = "A collection of tools for local and distributed tuning of chess engines."
 authors = ["Karlson Pfannschmidt <kiudee@mail.upb.de>"]
 license = "Apache-2.0"

diff --git a/tests/test_priors.py b/tests/test_priors.py
@@ -1,11 +1,38 @@
 import numpy as np
-from numpy.testing import assert_almost_equal
+from pytest import approx, raises
 
-from tune.priors import roundflat
+from tune.priors import create_priors, make_invgamma_prior, roundflat
 
 
 def test_roundflat():
-    assert_almost_equal(roundflat(0.3), 0.0, decimal=0.1)
-
+    assert roundflat(0.3) == approx(0.0, abs=1e-6)
     assert roundflat(0.0) == -np.inf
     assert roundflat(-1.0) == -np.inf
+
+
+def test_make_invgamma_prior():
+    prior = make_invgamma_prior()
+    assert prior.kwds["a"] == approx(8.919240823584246)
+    assert prior.kwds["scale"] == approx(1.7290248731437994)
+
+    with raises(ValueError):
+        make_invgamma_prior(lower_bound=-1e-10)
+    with raises(ValueError):
+        make_invgamma_prior(upper_bound=-1e-10)
+    with raises(ValueError):
+        make_invgamma_prior(lower_bound=0.5, upper_bound=0.1)
+
+
+def test_create_priors():
+    priors = create_priors(n_parameters=3)
+    assert len(priors) == 5
+    assert priors[0](2.0) == approx(-1.536140897416146)
+    assert priors[1](2.0) == approx(-23.620792572134874)
+    assert priors[2](2.0) == approx(-23.620792572134874)
+    assert priors[3](2.0) == approx(-23.620792572134874)
+    assert priors[4](2.0) == approx(-10262570.41553909)
+
+    with raises(ValueError):
+        create_priors(n_parameters=3, signal_scale=0.0)
+    with raises(ValueError):
+        create_priors(n_parameters=3, noise_scale=0.0)
diff --git a/tune/cli.py b/tune/cli.py
@@ -22,6 +22,7 @@
     setup_logger,
     update_model,
 )
+from tune.priors import create_priors
 
 
 @click.group()
@@ -195,6 +196,42 @@ def run_server(verbose, logfile, command, experiment_file, dbconfig):
     "Should be a multiple of 100.",
     show_default=True,
 )
+@click.option(
+    "--gp-signal-prior-scale",
+    default=4.0,
+    type=click.FloatRange(min=0.0),
+    help="Prior scale of the signal (standard deviation) magnitude which is used to"
+    "parametrize a half-normal distribution."
+    "Needs to be a number strictly greater than 0.0.",
+    show_default=True,
+)
+@click.option(
+    "--gp-noise-prior-scale",
+    default=0.0006,
+    type=click.FloatRange(min=0.0),
+    help="Prior scale of the noise (standard deviation) which is used to parametrize a "
+    "half-normal distribution."
+    "Needs to be a number strictly greater than 0.0.",
+    show_default=True,
+)
+@click.option(
+    "--gp-lengthscale-prior-lb",
+    default=0.1,
+    type=click.FloatRange(min=0.0),
+    help="Lower bound for the inverse-gamma lengthscale prior. "
+    "It marks the point where the prior reaches 1% of the cumulative density."
+    "Needs to be a number strictly greater than 0.0.",
+    show_default=True,
+)
+@click.option(
+    "--gp-lengthscale-prior-ub",
+    default=0.5,
+    type=click.FloatRange(min=0.0),
+    help="Upper bound for the inverse-gamma lengthscale prior. "
+    "It marks the point where the prior reaches 99% of the cumulative density."
+    "Needs to be a number strictly greater than 0.0 and the lower bound.",
+    show_default=True,
+)
 @click.option(
     "-l",
     "--logfile",
@@ -285,6 +322,10 @@ def local(  # noqa: C901
     gp_samples=300,
     gp_initial_burnin=100,
     gp_initial_samples=300,
+    gp_signal_prior_scale=4.0,
+    gp_noise_prior_scale=0.0006,
+    gp_lengthscale_prior_lb=0.1,
+    gp_lengthscale_prior_ub=0.5,
     logfile="log.txt",
     n_initial_points=16,
     n_points=500,
@@ -328,6 +369,17 @@ def local(  # noqa: C901
 
     # Initialize Optimizer object and if applicable, resume from existing
     # data/optimizer:
+    gp_priors = create_priors(
+        n_parameters=len(param_ranges),
+        signal_scale=settings.get("gp_signal_prior_scale", gp_signal_prior_scale),
+        lengthscale_lower_bound=settings.get(
+            "gp_lengthscale_prior_lb", gp_lengthscale_prior_lb
+        ),
+        lengthscale_upper_bound=settings.get(
+            "gp_lengthscale_prior_ub", gp_lengthscale_prior_ub
+        ),
+        noise_scale=settings.get("gp_noise_prior_scale", gp_noise_prior_scale),
+    )
     opt = initialize_optimizer(
         X=X,
         y=y,
@@ -344,6 +396,7 @@ def local(  # noqa: C901
         model_path=model_path,
         gp_initial_burnin=settings.get("gp_initial_burnin", gp_initial_burnin),
         gp_initial_samples=settings.get("gp_initial_samples", gp_initial_samples),
+        gp_priors=gp_priors,
     )
 
     # Main optimization loop:

diff --git a/tune/local.py b/tune/local.py
@@ -6,7 +6,7 @@
 import time
 from datetime import datetime
 from logging import Logger
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import Callable, List, Optional, Sequence, Tuple, Union
 
 import dill
 import matplotlib.pyplot as plt
@@ -321,6 +321,7 @@ def initialize_optimizer(
     model_path: Optional[str] = None,
     gp_initial_burnin: int = 100,
     gp_initial_samples: int = 300,
+    gp_priors: Optional[List[Callable[[float], float]]] = None,
 ) -> Optimizer:
     """Create an Optimizer object and if needed resume and/or reinitialize.
 
@@ -361,6 +362,12 @@ def initialize_optimizer(
         Number of burnin samples to use for reinitialization.
     gp_initial_samples : int, default=300
         Number of samples to use for reinitialization.
+    gp_priors : list of callables, default=None
+        List of priors to be used for the kernel hyperparameters. Specified in the
+        following order:
+        - signal magnitude prior
+        - lengthscale prior (x number of parameters)
+        - noise magnitude prior
 
     Returns
     -------
@@ -382,7 +389,7 @@ def initialize_optimizer(
         n_initial_points=n_initial_points,
         # gp_kernel=kernel,  # TODO: Let user pass in different kernels
         gp_kwargs=gp_kwargs,
-        # gp_priors=priors,  # TODO: Let user pass in priors
+        gp_priors=gp_priors,
         acq_func=acq_function,
         acq_func_kwargs=dict(alpha=1.96, n_thompson=500),
         random_state=random_state,
@@ -409,6 +416,8 @@ def initialize_optimizer(
                     "existing optimizer instance is no longer "
                     "valid. Reinitializing now."
                 )
+            if gp_priors is not None:
+                opt.gp_priors = gp_priors
 
     if reinitialize and len(X) > 0:
         logger.info(

diff --git a/tune/priors.py b/tune/priors.py
@@ -1,6 +1,12 @@
+import warnings
+from typing import Callable, List
+
 import numpy as np
+from scipy.optimize import curve_fit
+from scipy.stats import halfnorm, invgamma
+from scipy.stats._distn_infrastructure import rv_frozen  # noqa
 
-__all__ = ["roundflat"]
+__all__ = ["make_invgamma_prior", "roundflat", "create_priors"]
 
 
 def roundflat(x, a_low=2.0, a_high=8.0, d_low=0.005, d_high=1.2):
@@ -24,3 +30,102 @@ def roundflat(x, a_low=2.0, a_high=8.0, d_low=0.005, d_high=1.2):
     if x <= 0:
         return -np.inf
     return -2 * ((x / d_low) ** (-2 * a_low) + (x / d_high) ** (2 * a_high))
+
+
+def make_invgamma_prior(
+    lower_bound: float = 0.1, upper_bound: float = 0.5
+) -> rv_frozen:
+    """Create an inverse gamma distribution prior with 98% density inside the bounds.
+
+    Not all combinations of (lower_bound, upper_bound) are feasible and some of them
+    could result in a RuntimeError.
+
+    Parameters
+    ----------
+    lower_bound : float, default=0.1
+        Lower bound at which 1 % of the cumulative density is reached.
+    upper_bound : float, default=0.5
+        Upper bound at which 99 % of the cumulative density is reached.
+
+    Returns
+    -------
+    scipy.stats._distn_infrastructure.rv_frozen
+        The frozen distribution with shape parameters already set.
+
+    Raises
+    ------
+    ValueError
+        Either if any of the bounds is 0 or negative, or if the upper bound is equal or
+        smaller than the lower bound.
+    """
+    if lower_bound <= 0 or upper_bound <= 0:
+        raise ValueError("The bounds cannot be equal to or smaller than 0.")
+    if lower_bound >= upper_bound:
+        raise ValueError(
+            "Lower bound needs to be strictly smaller than the upper " "bound."
+        )
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        (a_out, scale_out), pcov = curve_fit(
+            lambda xdata, a, scale: invgamma.ppf(xdata, a=a, scale=scale),
+            [0.01, 0.99],
+            [lower_bound, upper_bound],
+        )
+    return invgamma(a=a_out, scale=scale_out)
+
+
+def create_priors(
+    n_parameters: int,
+    signal_scale: float = 4.0,
+    lengthscale_lower_bound: float = 0.1,
+    lengthscale_upper_bound: float = 0.5,
+    noise_scale: float = 0.0006,
+) -> List[Callable[[float], float]]:
+    """Create a list of priors to be used for the hyperparameters of the tuning process.
+
+    Parameters
+    ----------
+    n_parameters : int
+        Number of parameters to be optimized.
+    signal_scale : float
+        Prior scale of the signal (standard deviation) which is used to parametrize a
+        half-normal distribution.
+    lengthscale_lower_bound : float
+        Lower bound of the inverse-gamma lengthscale prior. It marks the point at which
+        1 % of the cumulative density is reached.
+    lengthscale_upper_bound : float
+        Upper bound of the inverse-gamma lengthscale prior. It marks the point at which
+        99 % of the cumulative density is reached.
+    noise_scale : float
+        Prior scale of the noise (standard deviation) which is used to parametrize a
+        half-normal distribution.
+
+    Returns
+    -------
+    list of callables
+        List of priors in the following order:
+         - signal prior
+         - lengthscale prior (n_parameters times)
+         - noise prior
+    """
+    if signal_scale <= 0.0:
+        raise ValueError(
+            f"The signal scale needs to be strictly positive. Got {signal_scale}."
+        )
+    if noise_scale <= 0.0:
+        raise ValueError(
+            f"The noise scale needs to be strictly positive. Got {noise_scale}."
+        )
+    signal_prior = halfnorm(scale=signal_scale)
+    lengthscale_prior = make_invgamma_prior(
+        lower_bound=lengthscale_lower_bound, upper_bound=lengthscale_upper_bound
+    )
+    noise_prior = halfnorm(scale=noise_scale)
+
+    priors = [lambda x: signal_prior.logpdf(np.sqrt(np.exp(x))) + x / 2.0 - np.log(2.0)]
+    for _ in range(n_parameters):
+        priors.append(lambda x: lengthscale_prior.logpdf(np.exp(x)) + x)
+    priors.append(
+        lambda x: noise_prior.logpdf(np.sqrt(np.exp(x))) + x / 2.0 - np.log(2.0)
+    )
+    return priors