# Alpha Go Zeors (part1: 对局数据收集)
 
    agent1 and agent2 are identical
    combine their experiences for training (从两个方便收集到的数据一块训练)

    2s一盘棋局，是不是代价有点大。

In [None]:
# 环境配置
%cd /playground/sgd_deep_learning/sgd_rl/go
import sys
sys.path.append('./python')

In [None]:
import os
import time
import torch
from collections import namedtuple

from dlgo import scoring
from dlgo import zero
from dlgo import GameState, Player, Point

### 脚本输入参数设定

In [None]:
# 数据 模型存放目录
data_home_path = 'data/agz/'
if not os.path.exists(data_home_path):
    os.makedirs(data_home_path)

# 脚本输入参数 
class args:
    board_size = 9 # 缩小计算量, 保证算法的验证速度
    num_games = 10 # 每轮迭代只收集10games的数据
    learning_agent = data_home_path + 'agent_checkpoint.pth'
    experience_out = data_home_path + 'experience.pth'
    
print(os.path.exists(args.learning_agent))

# 全局变量
global BOARD_SIZE
BOARD_SIZE = args.board_size

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

### helper functions

In [None]:
COLS = 'ABCDEFGHJKLMNOPQRST'
STONE_TO_CHAR = {
    None: '.',
    Player.black: 'x',
    Player.white: 'o',
}

def avg(items):
    if not items:
        return 0.0
    return sum(items) / float(len(items))


def print_board(board):
    for row in range(BOARD_SIZE, 0, -1):
        line = []
        for col in range(1, BOARD_SIZE + 1):
            stone = board.get(Point(row=row, col=col))
            line.append(STONE_TO_CHAR[stone])
        print('%2d %s' % (row, ''.join(line)))
    print('   ' + COLS[:BOARD_SIZE])


class GameRecord(namedtuple('GameRecord', 'moves winner margin')):
    pass

def name(player):
    if player == Player.black:
        return 'B'
    return 'W'

### 模拟对局

In [None]:
def simulate_game(black_player, white_player):
    moves = []
    game = GameState.new_game(BOARD_SIZE)
    agents = {
        Player.black: black_player,
        Player.white: white_player,
    }
    while not game.is_over():
        # print_board(game.board)
        next_move = agents[game.next_player].select_move(game)
        # print('[log] {} move-{}'.format(game.next_player, next_move))
        moves.append(next_move)
        game = game.apply_move(next_move)

    print_board(game.board)
    game_result = scoring.compute_game_result(game)
    print(game_result)

    # nametuple todo moves作用？margin作用？
    return GameRecord(
        moves=moves,
        winner=game_result.winner,
        margin=game_result.winning_margin,
    )

### main loop of self_play

In [None]:
def main_loop():
    agent_filename = args.learning_agent
    experience_filename = args.experience_out

    encoder = zero.ZeroEncoder(BOARD_SIZE)
    model = zero.agznet(input_channel_num=encoder.num_planes(), 
                            moves_num=encoder.num_moves())
    agent1, agent2 = None, None
    if not os.path.exists(agent_filename):
        # TODO rounds_per_move 10->20, 模拟的时间没有翻倍，只涨了一点。需要做性能分析
        agent1 = zero.ZeroAgent(model, encoder, rounds_per_move=20, c=2.0, device=device) 
        agent2 = zero.ZeroAgent(model, encoder, rounds_per_move=20, c=2.0, device=device)
    else: 
        agent1 = zero.load_agent(model, encoder, agent_filename, rounds_per_move=10, c=2.0, device=device)
        agent2 = zero.load_agent(model, encoder, agent_filename, rounds_per_move=10, c=2.0, device=device)
    
    assert (agent1 is not None) and (agent2 is not None)

    collector1 = zero.ZeroExperienceCollector()
    collector2 = zero.ZeroExperienceCollector()
    agent1.set_collector(collector1)
    agent2.set_collector(collector2)
    #######################################################################
    t1 = time.time()
    for i in range(args.num_games):
        print('Simulating game %d/%d...' % (i + 1, args.num_games))
        collector1.begin_episode()
        collector2.begin_episode()

        game_record = simulate_game(agent1, agent2)
        if game_record.winner == Player.black:
            collector1.complete_episode(reward=1)
            collector2.complete_episode(reward=-1)
        else:
            collector2.complete_episode(reward=1)
            collector1.complete_episode(reward=-1)

    print("simulatinon of {} games , cost_time:{:.3f}s.".format(args.num_games, time.time()-t1))

    experience = zero.combine_experience([collector1, collector2]) # 整合所有训练数据
    experience.serialize(experience_filename) # 序列化存储
    
    print("collect {} samples".format(len(experience)))
    
main_loop()