Merge pull request #160 from kiudee/153_raise_error

Parse cutechess-cli log and check for common errors
kiudee · Sep 11, 2021 · 4835608 · 4835608
2 parents 2f48924 + ba7b1a9
commit 4835608
Show file tree

Hide file tree

Showing 3 changed files with 156 additions and 10 deletions.
diff --git a/tests/test_local.py b/tests/test_local.py
@@ -6,8 +6,10 @@
 from skopt.utils import normalize_dimensions
 
 from tune.local import (
+    check_log_for_errors,
     initialize_data,
     initialize_optimizer,
+    is_debug_log,
     parse_experiment_result,
     reduce_ranges,
     update_model,
@@ -260,3 +262,72 @@ def test_update_model():
     assert np.allclose(opt.Xi, points)
     assert np.allclose(opt.yi, scores)
     assert np.allclose(opt.noisei, variances)
+
+
+@pytest.mark.parametrize(
+    "log_line,expected",
+    [
+        (
+            "3287 <engine1(0): info depth 1 seldepth 1 time 2 nodes 1 score cp -8502 "
+            "tbhits 0 pv a7a8n",
+            True,
+        ),
+        ("2018 >lc0(1): ucinewgame", True),
+        ("Finished game 1 (engine1 vs engine2): 0-1 {White loses on time}", False),
+        ("Finished game 3287 <engine1(0): ", False),
+        ("...      White vs Black: 0 - 1 - 0  [0.000] 1", False),
+    ],
+)
+def test_is_debug_log(log_line, expected):
+    assert is_debug_log(log_line) == expected
+
+
+def test_check_log_for_errors(caplog):
+
+    # Test loss on time:
+    teststr = """Started game 1 of 2 (SFDev vs Lc0.11198)
+    Finished game 1 (SFDev vs Lc0.11198): 1-0 {Black loses on time}
+    Score of SFDev vs Lc0.11198: 1 - 0 - 0  [1.000] 1
+    Started game 2 of 2 (Lc0.11198 vs SFDev)
+    Finished game 2 (Lc0.11198 vs SFDev): 0-1 {White loses on time}
+    Score of SFDev vs Lc0.11198: 2 - 0 - 0  [1.000] 2
+    Elo difference: inf +/- nan
+    Finished match"""
+
+    check_log_for_errors(teststr.split("\n"))
+    assert "Engine Lc0.11198 lost on time as Black." in caplog.text
+
+    # Test crash/termination of engine:
+    teststr = """Terminating process of engine lc0(0)
+    16994 >lc0(1): isready
+    16994 <lc0(1): readyok
+    ...      lc0 playing White: 0 - 1 - 0  [0.000] 1
+    ...      White vs Black: 0 - 1 - 0  [0.000] 1
+    Elo difference: -inf +/- nan, LOS: 15.9 %, DrawRatio: 0.0 %
+    Finished match
+    Finished game 1 (lc0 vs lc0): 0-1 {White's connection stalls}
+    Score of lc0 vs lc0: 0 - 1 - 0  [0.000] 1
+    ...      lc0 playing White: 0 - 1 - 0  [0.000] 1
+    ...      White vs Black: 0 - 1 - 0  [0.000] 1
+    Elo difference: -inf +/- nan, LOS: 15.9 %, DrawRatio: 0.0 %
+    Finished match
+    16995 >lc0(1): quit"""
+    check_log_for_errors(teststr.split("\n"))
+    assert (
+        "lc0's connection stalled as White. Game result is unreliable." in caplog.text
+    )
+
+    # Test correct forward of error:
+    teststr = "797 <lc0(0): error The cuda backend requires a network file."
+    check_log_for_errors([teststr])
+    assert (
+        "cutechess-cli error: The cuda backend requires a network file." in caplog.text
+    )
+
+    # Test unknown UCI option:
+    teststr = "424 <lc0(1): error Unknown option: UnknownOption"
+    check_log_for_errors([teststr])
+    assert (
+        "UCI option UnknownOption was unknown to the engine. Check if the spelling "
+        "is correct." in caplog.text
+    )
diff --git a/tune/cli.py b/tune/cli.py
@@ -13,8 +13,10 @@
 from tune.db_workers import TuningClient, TuningServer
 from tune.io import load_tuning_config, prepare_engines_json, write_engines_json
 from tune.local import (
+    check_log_for_errors,
     initialize_data,
     initialize_optimizer,
+    is_debug_log,
     parse_experiment_result,
     plot_results,
     print_results,
@@ -436,14 +438,18 @@ def local(  # noqa: C901
         # Run experiment:
         root_logger.info("Start experiment")
         now = datetime.now()
-        settings["debug_mode"] = settings.get(
-            "debug_mode", False if verbose <= 1 else True
-        )
         out_exp = []
+        out_all = []
         for output_line in run_match(**settings):
-            root_logger.debug(output_line.rstrip())
-            out_exp.append(output_line)
-        out_exp = "".join(out_exp)
+            line = output_line.rstrip()
+            is_debug = is_debug_log(line)
+            if is_debug and verbose > 2:
+                root_logger.debug(line)
+            if not is_debug:
+                out_exp.append(line)
+            out_all.append(line)
+        check_log_for_errors(cutechess_output=out_all)
+        out_exp = "\n".join(out_exp)
         later = datetime.now()
         difference = (later - now).total_seconds()
         root_logger.info(f"Experiment finished ({difference}s elapsed).")

diff --git a/tune/local.py b/tune/local.py
@@ -27,6 +27,8 @@
     "counts_to_penta",
     "initialize_optimizer",
     "run_match",
+    "is_debug_log",
+    "check_log_for_errors",
     "parse_experiment_result",
     "print_results",
     "plot_results",
@@ -641,8 +643,6 @@ def run_match(
         Number of games to run in parallel. Be careful when running time control
         games, since the engines can negatively impact each other when running
         in parallel.
-    debug_mode : bool, default=False
-        If True, pass ``-debug`` to cutechess-cli.
 
     Yields
     -------
@@ -726,8 +726,7 @@ def run_match(
     string_array.extend(("-games", "2"))
     string_array.append("-repeat")
     string_array.append("-recover")
-    if debug_mode:
-        string_array.append("-debug")
+    string_array.append("-debug")
     string_array.extend(("-pgnout", "out.pgn"))
 
     with subprocess.Popen(
@@ -737,6 +736,76 @@ def run_match(
             yield line
 
 
+def is_debug_log(cutechess_line: str,) -> bool:
+    """Check if the provided cutechess log line is a debug mode line.
+
+    Parameters
+    ----------
+    cutechess_line : str
+        One line from a cutechess log.
+
+    Returns
+    -------
+    bool
+        True, if the given line is a debug mode line, False otherwise.
+    """
+    if re.match(r"[0-9]+ [<>]", cutechess_line) is not None:
+        return True
+    return False
+
+
+def check_log_for_errors(cutechess_output: List[str],) -> None:
+    """Parse the log output produced by cutechess-cli and scan for important errors.
+
+    Parameters
+    ----------
+    cutechess_output : list of str
+        String containing the log output produced by cutechess-cli.
+    """
+    logger = logging.getLogger(LOGGER)
+    for line in cutechess_output:
+
+        # Check for forwarded errors:
+        pattern = r"[0-9]+ [<>].+: error (.+)"
+        match = re.search(pattern=pattern, string=line)
+        if match is not None:
+            logger.warning(f"cutechess-cli error: {match.group(1)}")
+
+        # Check for unknown UCI option
+        pattern = r"Unknown (?:option|command): (.+)"
+        match = re.search(pattern=pattern, string=line)
+        if match is not None:
+            logger.error(
+                f"UCI option {match.group(1)} was unknown to the engine. "
+                f"Check if the spelling is correct."
+            )
+            continue
+
+        # Check for loss on time
+        pattern = (
+            r"Finished game [0-9]+ \((.+) vs (.+)\): [0-9]-[0-9] {(\S+) "
+            r"(?:loses on time)}"
+        )
+        match = re.search(pattern=pattern, string=line)
+        if match is not None:
+            engine = match.group(1) if match.group(3) == "White" else match.group(2)
+            logger.warning(f"Engine {engine} lost on time as {match.group(3)}.")
+            continue
+
+        # Check for connection stall:
+        pattern = (
+            r"Finished game [0-9]+ \((.+) vs (.+)\): [0-9]-[0-9] {(\S+)'s "
+            r"(?:connection stalls)}"
+        )
+        match = re.search(pattern=pattern, string=line)
+        if match is not None:
+            engine = match.group(1) if match.group(3) == "White" else match.group(2)
+            logger.error(
+                f"{engine}'s connection stalled as {match.group(3)}. "
+                f"Game result is unreliable."
+            )
+
+
 def parse_experiment_result(
     outstr,
     prior_counts=None,