In [None]:
%%capture
!pip install creversi

In [None]:
from creversi import *

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from copy import copy, deepcopy

import torch
import torch.nn as nn

In [None]:
class PolicyNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        n_filters = 100
        self.input_layer = nn.Sequential(
            nn.Conv2d(8,n_filters,kernel_size=5,padding=2),
            nn.ReLU()
        )
        self.hidden_layer = nn.Sequential(
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.BatchNorm2d(n_filters),
            nn.ReLU(),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.BatchNorm2d(n_filters),
            nn.ReLU(),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.BatchNorm2d(n_filters),
            nn.ReLU(),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.BatchNorm2d(n_filters),
            nn.ReLU(),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.BatchNorm2d(n_filters),
            nn.ReLU(),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.BatchNorm2d(n_filters),
            nn.ReLU(),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.BatchNorm2d(n_filters),
            nn.ReLU(),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.BatchNorm2d(n_filters),
            nn.ReLU(),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.BatchNorm2d(n_filters),
            nn.ReLU(),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.BatchNorm2d(n_filters),
            nn.ReLU(),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.BatchNorm2d(n_filters),
            nn.ReLU()
        )
        self.output_layer = nn.Sequential(
            nn.Conv2d(n_filters,1,kernel_size=1),
            nn.Flatten()
        )
        
    def forward(self,x):
        out = self.input_layer(x)
        out = self.hidden_layer(out)
        out = self.output_layer(out)
        return out

In [None]:
class ValueNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        n_filters = 10
        self.input_layer = nn.Sequential(
            nn.Conv2d(9,n_filters,kernel_size=5,padding=2),
            nn.ReLU()
        )
        self.hidden_layer = nn.Sequential(
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(n_filters),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(n_filters),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(n_filters),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(n_filters),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(n_filters),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(n_filters),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(n_filters),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(n_filters),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(n_filters),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(n_filters),
            nn.Conv2d(n_filters,n_filters,kernel_size=3,padding=1),
            nn.Conv2d(n_filters,n_filters,kernel_size=1,padding=1),
            nn.Flatten()
        )
        self.output_layer = nn.Sequential(
            nn.Linear(n_filters*100, 256),
            nn.ReLU(),
            nn.Linear(256, 1)
        )
        
    def forward(self,x):
        out = self.input_layer(x)
        out = self.hidden_layer(out)
        out = self.output_layer(out)
        return out.tanh()

In [None]:
def board_to_array(board):
    """
    boardオブジェクトからndarrayに変換する関数(PolicyNetwork用)。
    第1チャンネルは黒石の位置、第2チャンネルに白石の位置、第3チャンネルに空白の位置、
    第4チャンネルに合法手の位置、第5チャンネルに返せる石の個数、第6チャンネルに隅=1、
    第7チャンネルに1埋め、第8チャンネルに0埋め。
    """
    b = np.zeros((8,8,8), dtype=np.float32)
    board.piece_planes(b)
    if not board.turn:
        b = b[[1,0,2,3,4,5,6,7],:,:]
    b[2] = np.where(b[0]+b[1]==1, 0, 1)
    legal_moves = list(board.legal_moves)
    if legal_moves != [64]:
        n_returns = []
        for move in legal_moves:
            board_ = copy(board)
            n_before = board_.opponent_piece_num()
            board_.move(move)
            n_after = board_.piece_num()
            n_returns.append(n_before-n_after)
        tmp = np.zeros(64)
        tmp[legal_moves] = n_returns
        tmp = tmp.reshape(8,8)
        b[3] = np.where(tmp > 0,1,0)
        b[4] = tmp
    b[5] = np.array([1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 
                     0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 
                     0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
                     1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1.]).reshape(8,8)
    b[6] = 1
    return b

In [None]:
def board_to_array2(board):
    """
    boardオブジェクトからndarrayに変換する関数(ValueNetwork用)。
    第1チャネルは黒石の位置、第2チャネルに白石の位置、第3チャネルに空白の位置、
    第4チャネルに合法手の位置、第5チャネルに返せる石の個数、第6チャネルに隅=1、
    第7チャネルに1埋め、第8チャネルに0埋め、第9チャネルに手番情報(黒番=0埋め、白番=1埋め)
    """
    b = np.zeros((9,8,8), dtype=np.float32)
    board.piece_planes(b)
    if not board.turn:
        b = b[[1,0,2,3,4,5,6,7,8],:,:]
        b[8] = 1
    b[2] = np.where(b[0]+b[1]==1, 0, 1)
    legal_moves = list(board.legal_moves)
    if legal_moves != [64]:
        n_returns = []
        for move in legal_moves:
            board_ = copy(board)
            n_before = board_.opponent_piece_num()
            board_.move(move)
            n_after = board_.piece_num()
            n_returns.append(n_before-n_after)
        tmp = np.zeros(64)
        tmp[legal_moves] = n_returns
        tmp = tmp.reshape(8,8)
        b[3] = np.where(tmp > 0,1,0)
        b[4] = tmp
    b[5] = np.array([1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 
                     0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 
                     0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
                     1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1.]).reshape(8,8)
    b[6] = 1
    return b

In [None]:
def board_to_array_aug2(board, return_torch=False):
    boards = []
    board_array = board_to_array2(board)
    boards.append(board_array)
    boards.append(np.flip(board_array,axis=2).copy())
    for k in range(1,4):
        board_array_rot = np.rot90(board_array, k=k, axes=(1,2)).copy()
        boards.append(board_array_rot)
        boards.append(np.flip(board_array_rot, axis=2).copy())
    if return_torch:
        return torch.from_numpy(np.array(boards))
    else:
        return np.array(boards)

In [None]:
def show_board(board,title='',prob=None):
    """boardの状態とPolicyNetworkの出力を表示する関数"""
    board_arr = board_to_array(board)
    B = board_arr[2]*0.5
    B += board_arr[0]
    plt.figure(figsize=(2,2))
    sns.heatmap(B, cmap='gray_r',cbar=False,linewidths=0.5)
    if prob is not None:
        sns.heatmap(prob, cmap='gray_r',annot=True, fmt='.0f',alpha=0,cbar=False)
    plt.xticks(ticks=np.arange(0.5,8.5),labels=list('abcdefgh'))
    plt.yticks(ticks=np.arange(0.5,8.5),labels=list('12345678'),rotation=0)
    plt.title(title)
    plt.show()
    
board = Board()
show_board(board,'SAMPLE')

In [None]:
def receive_input(legal_moves):
    """ユーザからの入力を受け取る関数"""
    legal_moves_str = [move_to_str(move) for move in legal_moves]
    move_str = input('Your turn : ')
    if move_str == '0':
        return '0'
    while move_str not in legal_moves_str:
        move_str = input('Invalid input. Try again : ')
    move = move_from_str(move_str)
    return move

In [None]:
# 学習済みモデルの読み込み
model = torch.load('/kaggle/input/reversi-datasets/SL-PolicyNetwork-v3-checkpoint-5epoch-subdata99.pth')
model_v = ValueNetwork()
model_v.load_state_dict(torch.load('/kaggle/input/reversi-datasets/value-network-v2.pth'))
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device).eval()
model_v = model_v.to(device).eval()

### AI後手番

In [None]:
# hint = True
# board = Board()
# show_board(board, "Your turn.")
# while not board.is_game_over():
#     legal_moves = list(board.legal_moves)
#     if 64 in legal_moves: # パスの処理
#         print(f"{'You' if board.turn else 'AI'} Passed.")
#         move = 64
#     elif not board.turn: # AIの番
#         with torch.no_grad():
#             p = model(torch.from_numpy(board_to_array(board)).unsqueeze(0).to(device)).cpu()
#         p_legal = p[0][legal_moves]
#         move = legal_moves[p_legal.argmax().item()]
#         prob = p.softmax(dim=1).numpy().reshape(8,8) *100
#         show_board(board,"AI's turn. Now thinking...",prob)
#     else:  # ユーザーの番
#         if hint:
#             with torch.no_grad():
#                 p = model(torch.from_numpy(board_to_array(board)).unsqueeze(0).to(device)).cpu()
#             prob = p.softmax(dim=1).numpy().reshape(8,8) *100
#             show_board(board,"Your turn. Hint...",prob)
            
#         move = receive_input(legal_moves)
#         if move == '0': break
#     board.move(move)
#     show_board(board,f'put [{move_to_str(move)}].')

### AI先手番

In [None]:
# hint = True
# board = Board()
# while not board.is_game_over():
#     legal_moves = list(board.legal_moves)
#     if 64 in legal_moves: # パスの処理
#         print(f"{'You' if not board.turn else 'AI'} Passed.")
#         move = 64
#     elif board.turn: # AIの番
#         with torch.no_grad():
#             p = model(torch.from_numpy(board_to_array(board)).unsqueeze(0).to(device)).cpu()
#         p_legal = p[0][legal_moves]
#         move = legal_moves[p_legal.argmax().item()]
#         prob = p.softmax(dim=1).numpy().reshape(8,8) *100
#         show_board(board,"AI's turn. Now thinking...",prob)
#     else:  # ユーザーの番
#         if hint:
#             with torch.no_grad():
#                 p = model(torch.from_numpy(board_to_array(board)).unsqueeze(0).to(device)).cpu()
#             prob = p.softmax(dim=1).numpy().reshape(8,8) *100
#             show_board(board,"Your turn. Hint...",prob)
#         move = receive_input(legal_moves)
#         if move == '0': break
#     board.move(move)
#     show_board(board,f'put [{move_to_str(move)}].')

### アンサンブルバージョン

In [None]:
# アンサンブルバージョン
# board = Board()
# while not board.is_game_over():
#     legal_moves = list(board.legal_moves)
#     if 64 in legal_moves: # パスの処理
#         print(f"{'You' if not board.turn else 'AI'} Passed.")
#         move = 64
#     elif board.turn: # AIの番
#         # 8パターンの盤面を生成
#         board_array = board_to_array(board)
#         boards = [board_array, np.flip(board_array,axis=2).copy()]
#         for k in range(1,4):
#             board_array_rot = np.rot90(board_array, k=k, axes=(1,2)).copy()
#             boards.append(board_array_rot)
#             boards.append(np.flip(board_array_rot, axis=2).copy())
#         # 各パターンに対する予測
#         probs = model(torch.from_numpy(np.array(boards)).to(device)).softmax(1).cpu().detach().numpy()
#         probs_org = [probs[0], np.fliplr(probs[1].reshape(8,8)).flatten(),
#                      np.rot90(probs[2].reshape(8,8), k=-1).copy().flatten(), np.rot90(np.fliplr(probs[3].reshape(8,8)), k=-1).copy().flatten(),
#                      np.rot90(probs[4].reshape(8,8), k=-2).copy().flatten(), np.rot90(np.fliplr(probs[5].reshape(8,8)), k=-2).copy().flatten(),
#                      np.rot90(probs[6].reshape(8,8), k=-3).copy().flatten(), np.rot90(np.fliplr(probs[7].reshape(8,8)), k=-3).copy().flatten()]
#         probs_org = np.array(probs_org)
#         # 平均を算出
#         p = probs_org.mean(axis=0)
#         p_legal = p[legal_moves]
#         move = legal_moves[p_legal.argmax()]
#         show_board(board,"AI's turn. Now thinking...", p.reshape(8,8) *100)
#     else:  # ユーザーの番
#         move = receive_input(legal_moves)
#         if move == '0': break
#     board.move(move)
#     show_board(board,f'put [{move_to_str(move)}].')

## ValueNetworkの検証

In [None]:
board = Board()
if_board = Board()
values = []
if_values = []
AI_idx = []
while not board.is_game_over():
    legal_moves = list(board.legal_moves)
    if board.turn:
        AI_idx.append(len(values)-1)

    # パスの処理
    if 64 in legal_moves:
        move = 64
    # AIの番
    elif not board.turn:
        with torch.no_grad():
            p = model(torch.from_numpy(board_to_array(board)).unsqueeze(0).to(device)).cpu()
        p_legal = p[0][legal_moves]
        move = legal_moves[p_legal.argmax().item()]
    # ランダムプレイヤーの番
    else:
        move = np.random.choice(legal_moves)
    
    # 局面を評価
    line,turn = board.to_line(), board.turn
    board.move(move)
    v = model_v(board_to_array_aug2(board,True)).mean().item()*64
    values.append(v)

    # 反実仮想評価を計算
    vmin,vmax = np.inf,-np.inf
    for if_move in legal_moves:
        # 動かす前の盤面を復元------------
        if if_board.turn != turn:
            if_board.move_pass()
        if_board.set_line(line, turn)
        # -----------------------------
        if_board.move(if_move)
        if_v = model_v(board_to_array_aug2(if_board,True)).mean().item()*64
        if if_v < vmin:
            vmin = if_v
        if if_v > vmax:
            vmax = if_v
    if_values.append([vmin,vmax])

# 集計
z = board.diff_num() if board.turn else -board.diff_num()
if_values = np.array(if_values)
values = np.array(values)
display(board)

# plot
plt.figure(figsize=(10,3))
plt.plot(values,c='r',marker='o',markersize=4)
plt.scatter(AI_idx, values[AI_idx], marker='x', zorder=2, label="AI's turn")
plt.fill_between(range(len(if_values)), if_values[:,0], if_values[:,1], alpha=0.5)
plt.ylim(-64,64)
plt.axhline(0,c='black',ls='--')
plt.axhline(z,c='blue',ls=':')
plt.yticks(range(-60,70,10))
plt.grid()
plt.legend()
plt.show()

# ValueNetworkで引き分けを目指す

In [None]:
board = Board()
if_board = Board()
values = []
if_values = []
AI_idx = []
while not board.is_game_over():
    legal_moves = list(board.legal_moves)
    if board.turn:
        AI_idx.append(len(values)-1)

    # パスの処理
    if 64 in legal_moves:
        move = 64
    # AIの番
    elif not board.turn:
        vbest,move = np.inf,None
        line,turn = board.to_line(), board.turn
        for if_move in legal_moves:
            # 動かす前の盤面を復元------------
            if if_board.turn != turn:
                if_board.move_pass()
            if_board.set_line(line, turn)
            # -----------------------------
            if_board.move(if_move)
            with torch.no_grad():
                if_v = model_v(board_to_array_aug2(if_board,True)).mean().item()*64
            if abs(if_v) < vbest:
                vbest = abs(if_v)
                move = if_move
    # ランダムプレイヤーの番
    else:
        move = np.random.choice(legal_moves)
    
    # 局面を評価
    line,turn = board.to_line(), board.turn
    board.move(move)
    with torch.no_grad():
        v = model_v(board_to_array_aug2(board,True)).mean().item()*64
    values.append(v)

    # 反実仮想評価を計算
    vmin,vmax = np.inf,-np.inf
    for if_move in legal_moves:
        # 動かす前の盤面を復元------------
        if if_board.turn != turn:
            if_board.move_pass()
        if_board.set_line(line, turn)
        # -----------------------------
        if_board.move(if_move)
        with torch.no_grad():
            if_v = model_v(board_to_array_aug2(if_board,True)).mean().item()*64
        if if_v < vmin:
            vmin = if_v
        if if_v > vmax:
            vmax = if_v
    if_values.append([vmin,vmax])

# 集計
z = board.diff_num() if board.turn else -board.diff_num()
if_values = np.array(if_values)
values = np.array(values)
print(z)
display(board)

# plot
plt.figure(figsize=(10,3))
plt.plot(values,c='r',marker='o',markersize=4)
plt.scatter(AI_idx, values[AI_idx], marker='x', zorder=2, label="AI's turn")
plt.fill_between(range(len(if_values)), if_values[:,0], if_values[:,1], alpha=0.5)
plt.ylim(-64,64)
plt.axhline(0,c='black',ls='--')
plt.axhline(z,c='blue',ls=':')
plt.yticks(range(-60,70,10))
plt.grid()
plt.legend()
plt.show()

In [None]:
Z = []
for n in tqdm(range(10000)):
    board = Board()
    if_board = Board()
    while not board.is_game_over():
        legal_moves = list(board.legal_moves)
        # パスの処理
        if 64 in legal_moves:
            move = 64
        # AIの番
        elif not board.turn:
            vbest,move = np.inf,None
            line,turn = board.to_line(), board.turn
            for if_move in legal_moves:
                # 動かす前の盤面を復元------------
                if if_board.turn != turn:
                    if_board.move_pass()
                if_board.set_line(line, turn)
                # -----------------------------
                if_board.move(if_move)
                with torch.no_grad():
                    if_v = model_v(board_to_array_aug2(if_board,True)).mean().item()*64
                if abs(if_v) < vbest:
                    vbest = abs(if_v)
                    move = if_move
        # ランダムプレイヤーの番
        else:
            move = np.random.choice(legal_moves)
        board.move(move)

    # 集計
    z = board.diff_num() if board.turn else -board.diff_num()
    Z.append(z)
Z = np.array(Z)

In [None]:
np.save('result.npy', Z)

In [None]:
cnt = []
for d in range(-64,65):
    cnt.append((Z==d).sum())
plt.figure(figsize=(15,2))
plt.bar(range(-64,65), cnt)
plt.xticks(range(-60,65,5))
plt.grid()
plt.show()

for d in range(65):
    p = (abs(Z)<=d).sum()/len(Z)
    w = 1.96 * (p*(1-p)/len(Z))**0.5
    print(f'|diff|<={d} : {p*100:.1f}%  ({(p-w)*100:.1f}%,{(p+w)*100:.1f}%)')

In [None]:
cnt = []
for d in range(-64,65):
    cnt.append((Z==d).sum()/len(Z))
plt.figure(figsize=(5,3))
plt.bar(range(-64,65), cnt)
plt.grid()
plt.show()
