-
Notifications
You must be signed in to change notification settings - Fork 1
/
game.py
119 lines (90 loc) · 3.16 KB
/
game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Game Specific rules
# Imported and used by:
# - Selfplay.py [for self play]
# - Evaluate.py [for evaluating network]
import numpy as np
from typing import List
import chess
'''
Represents state of the game (at any given time) : todo (edit this)
'''
class GameState:
'''
start the game fresh or copy other state
'''
def __init__(self, other=None):
if other is None:
self.board = chess.Board()
else:
self.board = chess.Board(other.board.fen())
def player(self) -> int:
if self.board.turn:
return 1
else:
return -1
def winner(self):
outcome = self.board.outcome()
if outcome is None:
return None
if outcome.termination == chess.Termination.CHECKMATE:
return (1 if outcome.winner else -1)
return 0
def terminated(self) -> bool:
if self.winner() is not None:
return True
else:
return False
def legal_actions(self) -> List[bool]:
moves = [False] * (64 * 64)
for move in self.board.legal_moves:
a = move.from_square
b = move.to_square
idx = (a * 64) + b
moves[idx] = True
return moves
def get_move(self, action: int):
a = action // 64
b = action % 64
move = chess.Move(a,b)
if chess.square_rank(b) == (7 if self.player() == 1 else 0) and self.board.piece_type_at(a) == chess.PAWN:
move = chess.Move(a,b,chess.QUEEN)
return move
def next_state(self, action: int):
legal_actions = self.legal_actions()
if not legal_actions[action]:
raise RuntimeError("Illegal action")
g = GameState(self)
g.board.push(self.get_move(action))
return g
"Returns representation of state suitable for input to neural network"
def to_image(self):
planes = []
for i in range(1,7):
plane = [
[
(1 if ((a * 8) + b) in self.board.pieces(chess.PieceType(i), True) else 0)
-(1 if ((a * 8) + b) in self.board.pieces(chess.PieceType(i), False) else 0)
for b in range(8)
]
for a in range(8)
]
planes.append(plane)
player_plane = [[self.player() for b in range(8)] for a in range(8)]
planes.append(player_plane)
return np.array([planes])
# for minimax, optimal value is:
# optimal_value(s) = max(optimal_value(ch) for ch in children_states(s))
# ... i.e. maximising my action's value
def leaf_value(self) -> int:
"""
Returns the value of the current state from the current player's perspective.
+1 : This is a "won" state
-1 : This is a "lost" state
"""
winner = self.winner()
if winner == 0:
return 0
else:
# in tic tac toe I don't need to check who won, because the player
# who made the last move must have won
return +1