# Alpha Go Zero Demo


    # This scripts demonstrates all the steps to create and train an
    # AGZ-style bot.
    # For practical purposes, you would separate this script into multiple
    # parts (for initializing, generating self-play games, and training).
    # You'll also need to run for many more rounds.

In [None]:
# 环境配置
%cd /playground/sgd_deep_learning/sgd_rl/go
import sys
sys.path.append('./python')

In [None]:
import torch

from dlgo import scoring
from dlgo import zero
from dlgo.goboard_fast import GameState, Player, Point

In [None]:
def simulate_game(board_size, black_agent, white_agent):
    
    print('Starting the game!')
    game = GameState.new_game(board_size)
    agents = {
        Player.black: black_agent,
        Player.white: white_agent,
    }

    black_agent.collector.begin_episode()
    white_agent.collector.begin_episode()
    
    while not game.is_over():
        next_move = agents[game.next_player].select_move(game)
        game = game.apply_move(next_move)

    game_result = scoring.compute_game_result(game)
    print(game_result)
    
    # Give the reward to the right agent.
    if game_result.winner == Player.black:
        black_agent.collector.complete_episode(1)
        white_agent.collector.complete_episode(-1)
    else:
        black_agent.collector.complete_episode(-1)
        white_agent.collector.complete_episode(1)

In [None]:
def main():
    # Initialize a zero agent
    class args:
        board_size = 9
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        encoder = zero.ZeroEncoder(board_size)  # 注意这里的转变
        model = zero.agznet(input_channel_num=encoder.num_planes(), 
                            moves_num=encoder.num_moves())
        lr = 0.01
        batch_size = 2048
        K = 5 # 训练收集的对局数
    
    # To train a strong bot, you should run at least 
    # a few hundred rounds per move (1600 rounds per move)
    
    black_agent = zero.ZeroAgent(args.model, args.encoder, rounds_per_move=10, c=2.0, device=args.device)
    white_agent = zero.ZeroAgent(args.model, args.encoder, rounds_per_move=10, c=2.0, device=args.device)
    
    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()
    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    # In real training, you should simulate thousands of games for each training batch.
    for _ in range(args.K):
        simulate_game(args.board_size, black_agent, white_agent)

    exp_data = zero.combine_experience([c1, c2])
    black_agent.train(exp_data, args.lr, args.batch_size) 

main()