Merge pull request #63 from kobanium/develop

Support analyze commands and fix bugs
kobanium · Jun 12, 2023 · 32c86b7 · 32c86b7
2 parents 07d9c7b + a06e271
commit 32c86b7
Show file tree

Hide file tree

Showing 16 changed files with 392 additions and 49 deletions.
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
@@ -0,0 +1,8 @@
+Author
+============
+Yuki Kobayashi ("kobanium")
+
+
+Contributors
+============
+"CGLemon", Chinese translation and GTP analyze commands implementation.
diff --git a/README.md b/README.md
@@ -20,6 +20,8 @@ TamaGo runs on Python 3.6 or higher.
 - [How to execute supervised learning](#how-to-execute-supervised-learning)
 - [How to execute reinforcement learning](#how-to-execute-reinforcement-learning)
 - [GoGui analyze commands](#gogui-analyze-commands)
+- [Analyze commands](#analyze-commands)
+- [CGOS analyze mode](#cgos-analyze-mode)
 - [License](#license)
 
 # Requirements
@@ -56,6 +58,8 @@ TamaGo's command line options are as follows,
 | `--const-time` | Time to thinking per move | Real number more than 0 | 10.0 | None | When you use '--const-time' or '--time' options, this option is ignored.|
 | `--time` | Total remaining time for a game | Real number more than 0 | 600.0 | None |
 | `--batch-size` | Mini-batch size for MCTS | Integer number more than 0 | 13 | NN_BATCH_SIZE | NN_BATCH_SIZE is defined in mcts/constant.py. |
+| `--tree-size` | Maximum number of MCTS nodes | Integer number more than 0 | 100000 | MCTS_TREE_SIZE | MCTS_TREE_SIZE is defined in mcts/constant.py. |
+| `--cgos-mode` | Enable to capture all dead stones. | true or false | true | false | |
 
 ## Examples of TamaGo execution as GTP engine.
 1) Setting board size to 5, using model/model.bin as a trained file, avoiding to use a GPU.
@@ -82,6 +86,11 @@ python main.py --visits 500
 ```
 python main.py --const-time 10.0
 ```
+7) Settings for testing on CGOS.
+```
+python main.py --model model/sl-model.bin --use-gpu true --cgos-mode true --superko true --batch-size 13 --time 600 --komi 7 --tree-size 200000
+```
+
 
 ## Trained neural network parameters file.
 Trained neural network parameters file is available [here](https://github.com/kobanium/TamaGo/releases). When you place a trained file in the "model" direcotry under the name "model.bin", you can run TamaGo using a trained file without a command line option. If TamaGo's structure of neural network and a trained neural network parameter files are unmached, TamaGo cannot load a trained file. Please care about version of the trained model file and version of TamaGo.
@@ -100,9 +109,20 @@ Value range of Policy is more than or equal 0.0 and less than or equal to 1.0.
 
 ![Display policy value](img/gogui_analyze_policy.png)
 
-
 Redder is higher value, bluer is lower value.
+
 ![Coloring policy value](img/gogui_analyze_policy_color.png)
 
+# Analyze commands
+TamaGo version 0.7.0 supports lz-analyze and lz-genmove_analyze commands. When you use Lizzie or Sabaki, you can analyze positions using these commands.
+![lz-analyze-sample](img/lz_analyze_sample.png)
+
+# CGOS analyze mode
+TamaGo version 0.7.0 supports cgos-analyze, cgos-genmove_analyze commands. When you connect [Computer Go Server (CGOS)](http://www.yss-aya.com/cgos/) for testing TamaGo, you can check search information enabling --cgos-mode option.
+
+![cgos-analyze](img/cgos-analyze.png)
+
+![cgos-analyze-pv](img/cgos-analyze-pv.png)
+
 # License
 You can use TamaGo under [Apache License 2.0](LICENSE).
diff --git a/board/coordinate.py b/board/coordinate.py
@@ -52,10 +52,10 @@ def convert_to_gtp_format(self, pos: int) -> str:
             str: Go Text Protocol形式の座標。
         """
         if pos == PASS:
-            return "PASS"
+            return "pass"
 
         if pos == RESIGN:
-            return "RESIGN"
+            return "resign"
 
         x_coord = pos % self.board_size_with_ob - OB_SIZE + 1
         y_coord = self.board_size - (pos // self.board_size_with_ob - OB_SIZE)

diff --git a/board/go_board.py b/board/go_board.py
@@ -142,7 +142,6 @@ def put_stone(self, pos: int, color: Stone) -> NoReturn:
 
         opponent_color = Stone.get_opponent_color(color)
 
-
         self.board[pos] = color
         self.pattern.put_stone(pos, color)
         self.positional_hash = affect_stone_hash(self.positional_hash, pos, color)
@@ -164,7 +163,7 @@ def put_stone(self, pos: int, color: Stone) -> NoReturn:
                     for removed_pos in removed_stones:
                         self.pattern.remove_stone(removed_pos)
                     self.positional_hash = affect_string_hash(self.positional_hash, \
-                        removed_stones, color)
+                        removed_stones, opponent_color)
 
         if color == Stone.BLACK:
             self.prisoner[0] += prisoner
@@ -238,7 +237,7 @@ def is_legal(self, pos: int, color: Stone) -> bool:
             neighbor4 = self.get_neighbor4(pos)
             neighbor_ids = [self.strings.get_id(neighbor) for neighbor in neighbor4]
             unique_ids = list(set(neighbor_ids))
-            current_hash = self.positional_hash
+            current_hash = self.positional_hash.copy()
 
             # 打ち上げる石があれば打ち上げたと仮定
             for string_id in unique_ids:

diff --git a/board/record.py b/board/record.py
@@ -63,6 +63,14 @@ def get(self, moves: int) -> Tuple[Stone, int, np.array]:
         """
         return (self.color[moves], self.pos[moves], self.hash_value[moves])
 
+    def get_hash_history(self) -> np.array:
+        """ハッシュ値の履歴を取得する。
+
+        Returns:
+            np.array: ハッシュ値の履歴。
+        """
+        return self.hash_value
+
 
 def copy_record(dst: Record, src: Record) -> NoReturn:
     """着手履歴をコピーする。

diff --git a/doc/ja/README.md b/doc/ja/README.md
@@ -43,7 +43,9 @@ python main.py
 | `--visits` | 1手あたりの探索回数 | 1以上の整数 | 1000 | 1000 | --const-timeオプション、または--timeオプションの指定があるときは本オプションを無視します。 |
 | `--const-time` | 1手あたりの探索時間 (秒) | 0より大きい実数 | 10.0 |  | --timeオプションの指定があるときは本オプションを無視します。 |
 | `--time` | 持ち時間 (秒) | 0より大きい実数 | 600.0 | |
-| `--batch-size` | 探索時のニューラルネットワークのミニバッチサイズ | 0より大きい整数 | NN_BATCH_SIZE | NN_BATCH_SIZEはmcts/constant.pyに定義してあります。 |
+| `--batch-size` | 探索時のニューラルネットワークのミニバッチサイズ | 1以上の整数 | 13 | NN_BATCH_SIZE | NN_BATCH_SIZEはmcts/constant.pyに定義してあります。 |
+| `--tree-size` | 探索木を構成するノードの最大数 | 1以上の整数 | 100000| MCTS_TREE_SIZE | MCTS_TREE_SIZEはmcts/constant.pyに定義してあります。 |
+| `--cgos-mode` | 石を打ち上げるまでパスを抑制するフラグ | true または false | true | false | |
 
 ## プログラムの実行例は下記のとおりです
 1) 碁盤のサイズを5、model/model.binを学習済みモデルとして使用し、GPUを使用せずに実行するケース
@@ -70,6 +72,10 @@ python main.py --visits 500
 ```
 python main.py --const-time 10.0
 ```
+7) CGOSの9路盤で動かすケース
+```
+python main.py --model model/sl-model.bin --use-gpu true --cgos-mode true --superko true --batch-size 13 --time 600 --komi 7 --tree-size 200000
+```
 
 ## 学習済みモデルファイルについて
 学習済みのモデルファイルについては[こちら](https://github.com/kobanium/TamaGo/releases)から取得してください。modelフォルダ以下にmodel.binファイルを配置するとコマンドラインオプションの指定無しで動かせます。ニューラルネットワークの構造と学習済みモデルファイルが一致しないとロードできないので、取得したモデルファイルのリリースバージョンとTamaGoのバージョンが一致しているかに注意してください。  
@@ -90,8 +96,22 @@ Policyの値は0.0〜1.0の範囲で表示されます。
 
 
 Policyの値による色付けはPolicyの値が大きいほど赤く、小さいほど青く表示されます。
+
 ![Policyの値で色付け](../../img/gogui_analyze_policy_color.png)
 
+# Analyze commands
+TamaGoはバージョン0.7.0からlz-analyze, lz-genmove_analyzeをサポートしています。
+SabakiやLizzieからご利用ください。
+
+![解析コマンドの例](../../img/lz_analyze_sample.png)
+
+# CGOS analyze mode
+TamaGoはバージョン0.7.0からcgos-analyze, cgos-genmove_analyzeをサポートしています。[Computer Go Server (CGOS)](http://www.yss-aya.com/cgos/)に接続する際は、--cgos-modeオプションをTrueにすることで読み筋などの情報を表示することができます。
+
+![CGOSでの動作例](../../img/cgos-analyze.png)
+
+![CGOSでの読み筋表示](../../img/cgos-analyze-pv.png)
+
 # License
 ライセンスはApache License ver 2.0です。
 
@@ -104,7 +124,7 @@ Policyの値による色付けはPolicyの値が大きいほど赤く、小さ
   - [x] Super Koの判定処理
 - 探索部の実装
   - [x] 木とノードのデータ構造
-  - [ ] モンテカルロ木探索
+  - [x] モンテカルロ木探索
     - ~~クラシックなMCTS~~
       - ~~UCT~~
       - ~~RAVE~~
@@ -113,9 +133,9 @@ Policyの値による色付けはPolicyの値が大きいほど赤く、小さ
       - [x] PUCB値の計算
       - [x] ニューラルネットワークのミニバッチ処理  
     - [x] Sequential Halving applied to tree探索
-    - [ ] CGOS対応
-      - [ ] 死石がなくなるまでパスを抑制
-      - [ ] cgos_genmove対応
+    - [x] CGOS対応
+      - [x] 死石がなくなるまでパスを抑制
+      - [x] cgos_genmove対応
     - [x] 持ち時間による探索時間制御
 - 学習の実装
   - [x] SGFファイルの読み込み処理

diff --git a/gtp/client.py b/gtp/client.py
@@ -10,7 +10,7 @@
 from board.coordinate import Coordinate
 from board.go_board import GoBoard
 from board.stone import Stone
-from common.print_console import print_err
+from common.print_console import print_err, print_out
 from gtp.gogui import GoguiAnalyzeCommand, display_policy_distribution, \
     display_policy_score
 from mcts.time_manager import TimeControl, TimeManager
@@ -28,7 +28,7 @@ class GtpClient: # pylint: disable=R0902,R0903
     def __init__(self, board_size: int, superko: bool, model_file_path: str, \
         use_gpu: bool, policy_move: bool, use_sequential_halving: bool, \
         komi: float, mode: TimeControl, visits: int, const_time: float, \
-        time: float, batch_size: int): # pylint: disable=R0913
+        time: float, batch_size: int, tree_size: int, cgos_mode: bool): # pylint: disable=R0913
         """Go Text Protocolクライアントの初期化をする。
 
         Args:
@@ -44,6 +44,8 @@ def __init__(self, board_size: int, superko: bool, model_file_path: str, \
             const_time (float): 1手あたりの探索時間。
             time (float): 持ち時間。
             batch_size (int): 探索時のニューラルネットワークのミニバッチサイズ。
+            tree_size (int): 探索木を構成するノードの最大数。
+            cgos_mode (bool): 全ての石を打ち上げるまでパスしない設定フラグ。
         """
         self.gtp_commands = [
             "version",
@@ -62,7 +64,11 @@ def __init__(self, board_size: int, superko: bool, model_file_path: str, \
             "komi",
             "showboard",
             "load_sgf",
-            "gogui-analyze_commands"
+            "gogui-analyze_commands",
+            "lz-analyze",
+            "lz-genmove_analyze",
+            "cgos-analyze",
+            "cgos-genmove_analyze"
         ]
         self.superko = superko
         self.board = GoBoard(board_size=board_size, komi=komi, check_superko=superko)
@@ -91,7 +97,8 @@ def __init__(self, board_size: int, superko: bool, model_file_path: str, \
         try:
             self.network = load_network(model_file_path, use_gpu)
             self.use_network = True
-            self.mcts = MCTSTree(network=self.network, batch_size=batch_size)
+            self.mcts = MCTSTree(network=self.network, batch_size=batch_size, \
+                tree_size=tree_size, cgos_mode=cgos_mode)
         except FileNotFoundError:
             print_err(f"Model file {model_file_path} is not found")
         except RuntimeError:
@@ -184,7 +191,8 @@ def _genmove(self, color: str) -> NoReturn:
                     pos = self.mcts.generate_move_with_sequential_halving(self.board, \
                         genmove_color, self.time_manager, False)
                 else:
-                    pos = self.mcts.search_best_move(self.board, genmove_color, self.time_manager)
+                    pos = self.mcts.search_best_move(self.board, \
+                        genmove_color, self.time_manager, {})
         else:
             # ランダムに着手生成
             legal_pos = [pos for pos in self.board.onboard_pos \
@@ -287,6 +295,77 @@ def _load_sgf(self, arg_list: List[str]) -> NoReturn:
 
         respond_success("")
 
+    def _analyze(self, mode: str, arg_list: List[str]) -> NoReturn:
+        """analyzeコマンド（lz-analyze, cgos-analyze）を実行する。
+
+        Args:
+            mode (str): 解析モード。値は"lz"か"cgos"。
+            arg_list (List[str]): コマンドの引数リスト (手番の色, 更新間隔)。
+        """
+        color = arg_list[0]
+        interval = 0
+        if len(arg_list) >= 2:
+            interval = int(arg_list[1])/100
+
+        if color[0][0] in ['B', 'b']:
+            to_move = Stone.BLACK
+        elif color[0][0] == ['B', 'w']:
+            to_move = Stone.WHITE
+        else:
+            respond_failure("lz-analyze color")
+            return
+
+        analysis_query = {
+            "mode" : mode,
+            "interval" : interval,
+            "ponder" : True
+        }
+        self.mcts.ponder(self.board, to_move, analysis_query)
+
+    def _genmove_analyze(self, mode: str, arg_list: List[str]) -> NoReturn:
+        """genmove_analyzeコマンド（lz-genmove_analyze, cgos-genmove_analyze）を実行する。
+
+        Args:
+            mode (str): 解析モード。値は"lz"か"cgos"。
+            arg_list (List[str]): コマンドの引数リスト（手番の色, 更新間隔)。
+        """
+        color = arg_list[0]
+        interval = 0
+        if len(arg_list) >= 2:
+            interval = int(arg_list[1])/100
+
+        if color.lower()[0] == 'b':
+            genmove_color = Stone.BLACK
+        elif color.lower()[0] == 'w':
+            genmove_color = Stone.WHITE
+        else:
+            respond_failure("genmove_analyze color")
+            return
+
+        if self.use_network:
+            # モンテカルロ木探索で着手生成
+            analysis_query = {
+                "mode" : mode,
+                "interval" : interval,
+                "ponder" : False
+            }
+            pos = self.mcts.search_best_move(self.board, genmove_color, \
+                self.time_manager, analysis_query)
+        else:
+            # ランダムに着手生成
+            legal_pos = [pos for pos in self.board.onboard_pos \
+                if self.board.is_legal_not_eye(pos, genmove_color)]
+            if legal_pos:
+                pos = random.choice(legal_pos)
+            else:
+                pos = PASS
+
+        if pos != RESIGN:
+            self.board.put_stone(pos, genmove_color)
+
+        print_out(f"play {self.coordinate.convert_to_gtp_format(pos)}\n")
+
+
     def run(self) -> NoReturn: # pylint: disable=R0912,R0915
         """Go Text Protocolのクライアントの実行処理。
         入力されたコマンドに対応する処理を実行し、応答メッセージを表示する。
@@ -366,10 +445,26 @@ def run(self) -> NoReturn: # pylint: disable=R0912,R0915
                 self.board.display_self_atari(Stone.BLACK)
                 self.board.display_self_atari(Stone.WHITE)
                 respond_success("")
+            elif input_gtp_command == "lz-analyze":
+                print_out("= ")
+                self._analyze("lz", command_list[1:])
+                print("")
+            elif input_gtp_command == "lz-genmove_analyze":
+                print_out("= ")
+                self._genmove_analyze("lz", command_list[1:])
+            elif input_gtp_command == "cgos-analyze":
+                print_out("= ")
+                self._analyze("cgos", command_list[1:])
+                print("")
+            elif input_gtp_command == "cgos-genmove_analyze":
+                print_out("= ")
+                self._genmove_analyze("cgos", command_list[1:])
+            elif input_gtp_command == "hash_record":
+                print_err(self.board.record.get_hash_history())
+                respond_success("")
             else:
                 respond_failure("unknown_command")
 
-
 def respond_success(response: str) -> NoReturn:
     """コマンド処理成功時の応答メッセージを表示する。
 

diff --git a/img/cgos-analyze-pv.png b/img/cgos-analyze-pv.png
diff --git a/img/cgos-analyze.png b/img/cgos-analyze.png
diff --git a/img/lz_analyze_sample.png b/img/lz_analyze_sample.png