# policy gradient (part3: 模型评估)

In [None]:
# 环境配置
%cd /playground/sgd_deep_learning/sgd_rl/go
import sys
sys.path.append('./python')

In [None]:
import os
import torch
import datetime
from collections import namedtuple

from dlgo import agent
from dlgo import scoring
from dlgo.goboard_fast import GameState, Player, Point
from dlgo.networks import cnn_small, resnet18
from dlgo.encoders import get_encoder_by_name

In [None]:
class args:
    agent1='data/pg/agent_checkpoint_update.pth'
    agent2='data/pg/agent_checkpoint_update.pth'
    num_games=10
    

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("model file exists: ", os.path.exists(args.agent1))
print("model file exists: ", os.path.exists(args.agent2))
print("device:", device)

In [None]:
BOARD_SIZE = 9
COLS = 'ABCDEFGHJKLMNOPQRST'
STONE_TO_CHAR = {
    None: '.',
    Player.black: 'x',
    Player.white: 'o',
}

def avg(items):
    if not items:
        return 0.0
    return sum(items) / float(len(items))

def print_board(board):
    for row in range(BOARD_SIZE, 0, -1):
        line = []
        for col in range(1, BOARD_SIZE + 1):
            stone = board.get(Point(row=row, col=col))
            line.append(STONE_TO_CHAR[stone])
        print('%2d %s' % (row, ''.join(line)))
    print('   ' + COLS[:BOARD_SIZE])

class GameRecord(namedtuple('GameRecord', 'moves winner margin')):
    pass

def name(player):
    if player == Player.black:
        return 'B'
    return 'W'

In [None]:
def simulate_game(black_player, white_player):
    moves = []
    game = GameState.new_game(BOARD_SIZE)
    agents = {
        Player.black: black_player,
        Player.white: white_player,
    }
    while not game.is_over():
        next_move = agents[game.next_player].select_move(game)
        moves.append(next_move)
        #if next_move.is_pass:
        #    print('%s passes' % name(game.next_player))
        game = game.apply_move(next_move)

    print_board(game.board)
    game_result = scoring.compute_game_result(game)
    print(game_result)

    return GameRecord(
        moves=moves,
        winner=game_result.winner,
        margin=game_result.winning_margin,
    )

In [None]:
def main():
    
    encoder_name = 'sevenplane'
    model = cnn_small(input_channel_num=7, board_size=BOARD_SIZE)
    encoder = get_encoder_by_name(name=encoder_name, board_size=BOARD_SIZE)
    
    agent1 = agent.load_policy_agent(model=model, encoder=encoder, device=device)
    # agent2 = agent.load_policy_agent(model=model, encoder=encoder, device=device)
    
    # agent1 = agent.load_policy_agent(model=model, save_path=args.agent1, device=device)
    agent2 = agent.load_policy_agent(model=model, save_path=args.agent2, device=device)

    wins = 0
    losses = 0
    color1 = Player.black
    for i in range(args.num_games):
        print('Simulating game %d/%d...' % (i + 1, args.num_games))
        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player)
        if game_record.winner == color1:
            wins += 1
        else:
            losses += 1
        color1 = color1.other
    print('Agent 1 record: %d/%d' % (wins, wins + losses))

main()

In [None]:
from scipy.stats import binomtest
confidence = binomtest(60, 100, 0.5)
print(confidence)
print(confidence.pvalue)

x = []
y = []
for i in range(0,101):
    x.append(i)
    y.append(binomtest(i, 100, 0.5).pvalue)
    # print("{}, {:.3f}".format(i, binomtest(i, 100, 0.5).pvalue))


import matplotlib.pyplot as plt


# 使用 Matplotlib 创建折线图
plt.plot(x, y, label='折线图')  # 指定 x 和 y 值，可以添加标签

# 添加标题和标签
plt.title('confidence')
plt.xlabel('X axis')
plt.ylabel('Y axis')

# 添加图例
plt.legend()

# 显示折线图
plt.show()

### playground

In [None]:
import glob

def list_experience_files(data_dir):
    files = []
    base = data_dir + '*.pth'
    for experience_file in glob.glob(base):
        files.append(experience_file)                    
    return files

dir_path = "/playground/sgd_deep_learning/sgd_rl/go/data/pg/experience/"

for x in list_experience_files(dir_path):
    print(x)

In [None]:
import hashlib
import uuid

# 生成随机字符串作为文件名
random_filename = str(uuid.uuid4())

# 使用 hashlib 计算文件名的哈希值
hash_object = hashlib.md5(random_filename.encode())
hash_value = hash_object.hexdigest()

print(hash_value, type(hash_value))

# 使用哈希值作为文件名
file_name = f"{hash_value}.txt"

print(f"随机文件名: {random_filename}")
print(f"哈希值: {hash_value}")
print(f"文件名: {file_name}")