## MCTS数据实验
    由于mcts bot的搜索速度比较慢，通过训练NN_bot 可以快速由棋盘数据 预测 nextmove

In [None]:
# 环境配置
%cd /playground/sgd_deep_learning/sgd_rl/go
import sys
sys.path.append('./python')

In [None]:
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
# from dlgo.networks.resnet import

import torch
from torch import nn
import torch.nn.functional as F
import numpy as np

import os
import time

from dlgo.data import GoDataset
from dlgo.script.generate_mcts_games import generate_game

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

# datatype = torch.bfloat16 # 节省内存但是计算太慢，？？？至少在cnn框架下不建议使用
# datatype = torch.float32
datatype = torch.float16

### step1: 准备mcts bot对局数据
    * 蒙特卡洛搜索树模拟对局实在是太慢了
    * 直接使用项目自带的数据 data/mcts_generated_games (训练集200 games，测试集20 games)

In [None]:
# 使用蒙特卡洛bot自我对局，收集监督训练的数据
class args:    
    board_size=9
    rounds=1000
    temperature=0.8
    max_moves=60               
    num_games=10
    board_out = "data/mcts/mini/"
    move_out = "data/mcts/mini/"

def check_dir_exist(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

check_dir_exist(args.board_out)
check_dir_exist(args.move_out)
    
def main():
    xs = []
    ys = []

    for i in range(args.num_games):
        print('Generating game %d/%d...' % (i + 1, args.num_games))
        x, y = generate_game(args.board_size, args.rounds, args.max_moves, args.temperature)
        xs.append(x)
        ys.append(y)

    x = np.concatenate(xs)
    y = np.concatenate(ys)

    np.save(args.board_out + 'features-{}.npy'.format(args.num_games), x)
    np.save(args.move_out + 'labels-{}.npy'.format(args.num_games) , y)
    
# main() # mcts模拟对局太慢，暂不执行

In [None]:
# 直接调用已生成数据用于训练
train_data_path = "data/mcts_generated_games/features-200.npy"
test_data_path = "data/mcts_generated_games/features-20.npy"

def load_dataset_with_tag(data_path, device):
    features = np.load(data_path)
    # print(features.shape)
    labels = np.load(data_path.replace("features", "labels"))
    # np to tensor
    x = torch.from_numpy(features).to(datatype).to(device=device).unsqueeze(dim=1)
    y = torch.tensor(np.argmax(labels, axis=1), dtype=torch.long).to(device=device)
    # wrap by Dataset
    ds = TensorDataset(x, y)
    return ds 

# tag 用于corss-entropy loss
train_ds = load_dataset_with_tag(data_path=train_data_path, device=device)
test_ds = load_dataset_with_tag(data_path=test_data_path, device=device)
print(len(train_ds), len(test_ds))
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=64, shuffle=True)

for x, label in train_dl:
    print(x.shape, label.shape)
    break

# def load_dataset_with_onehot(data_path, device):
#     features = np.load(data_path)
#     labels = np.load(data_path.replace("features", "labels"))
#     # np to tensor
#     x = torch.from_numpy(features).to(datatype).to(device=device)
#     y = torch.from_numpy(labels).to(torch.long).to(device=device)
#     # wrap by Dataset
#     ds = TensorDataset(x, y)
#     return ds 

# # onehot 用于MSE-loss
# train_ds_oh = load_dataset_with_onehot(data_path=train_data_path, device=device)
# test_ds_oh = load_dataset_with_onehot(data_path=test_data_path, device=device)
# train_dl_oh = DataLoader(train_ds_oh, batch_size=64, shuffle=True)
# test_dl_oh = DataLoader(train_ds_oh, batch_size=64, shuffle=True)
# for x, label in train_dl_oh:
#     print(x.shape, label.shape)
#     break

### step2: FC_NN (move预测模型) with mse loss

"As you can see, the prediction accuracy of your
experiment is at only around 2.3%, which isn’t satisfying
at first sight. But recall that your baseline of
randomly guessing moves is about 1.2%. This tells
you that although the performance isn’t great,
the model is learning and can predict moves better
than random." -- P129 in book

和实验数据大致吻合（只使用了200games做训练集，20games做测试集，比书中数据要少很多）

        epoch:29, i:50/182, avg_loss:0.041
        epoch:29, i:100/182, avg_loss:0.043
        epoch:29, i:150/182, avg_loss:0.044
        -----------------------------
        Epoch 29: train_pred_succ:36.343%, 11705
        Epoch 29: test_pred_succ:1.565%, 1214

        best_epoch:10, best_test_pred:2.471%


        使用dropout，效果略好
        best_epoch:14, best_test_pred:2.636%

In [None]:
fc_model = nn.Sequential(*[
    nn.Flatten(start_dim=1),
    nn.Linear(9*9, 1000),
    nn.ReLU(),
    nn.Dropout(p=0.5),
    nn.Linear(1000, 500),
    nn.ReLU(),
    nn.Dropout(p=0.5),
    nn.Linear(500, 9*9),
])

In [None]:
def evaluate(model, test_dataloader):
    n_test = len(test_dataloader.dataset)
    with torch.no_grad():
        succ_num = 0
        for x, label in test_dataloader:
            y = model(x)
            # y = torch.randn((x.shape[0], 81), device=device, dtype=datatype) # 测试随机有1.25%的正确率
            # 计算预测正确概率
            # print(type(y), y.shape, y)
            predict = torch.argmax(y, dim=1)
            # print(type(predict), predict.shape, predict)
            # print(type(label),label.shape, label)
            succ_num += torch.sum(predict == label)
        return succ_num/n_test # 预测正确的概率 [瞎猜的概率1/9*9 = 12%]

In [None]:
def train(train_dataloader, model, loss_fn, optimizer, epochs=3, test_dataloader=None):    
    n_train = len(train_dataloader.dataset) # 总样本量
    best_test_pred = 0
    best_epoch = -1
    
    for epoch in range(epochs):
        acc_loss = 0
        i = 0
        n = 0
        
        for x, label in train_dataloader:
            y = model(x)
            loss = loss_fn(y, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            acc_loss += loss.item()
            n += x.shape[0]
            i += 1
            
            if i%50==0:
                print("epoch:{}, i:{}/{}, avg_loss:{:.3f}"
                      .format(epoch, i,
                              n_train//x.shape[0],
                              acc_loss/n,))

        #########  end epoch  ###########
        print("-----------------------------")
        # 训练集的准确率
        train_succ_ratio = evaluate(model, train_dataloader) * 100
        print("Epoch {}: train_pred_succ:{:.3f}%, {}".format(epoch, train_succ_ratio, n_train))
        
        if test_dataloader:
            n_test = len(test_dataloader.dataset) 
            test_succ_ratio = evaluate(model, test_dataloader) * 100
            
            if test_succ_ratio > best_test_pred:
                best_test_pred = test_succ_ratio
                best_epoch = epoch 
            
            print("Epoch {}: test_pred_succ:{:.3f}%, {}".format(epoch, test_succ_ratio, n_test))
        else:
            print("Epoch {0} complete".format(epoch))
            
        print('\n')
        
    
    # 训练结束
    print("best_epoch:{}, best_test_pred:{:.3f}%".format(best_epoch, best_test_pred))

In [None]:
model = fc_model.to(datatype).to(device)
ce_loss = nn.CrossEntropyLoss()

# optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
optimizer = torch.optim.Adagrad(model.parameters()) # 还不错
# optimizer = torch.optim.Adadelta(model.parameters()) # 也还行
# optimizer = torch.optim.Adam(params=model.parameters(), lr=1) # 效果不好

# 暂时注释掉
# train(model=model,
#       epochs=30,
#       loss_fn=ce_loss,
#       optimizer=optimizer,
#       train_dataloader=train_dl,
#       test_dataloader=test_dl,)

### step3: 卷积模型

In [None]:
cnn_model = nn.Sequential(*[
    nn.Conv2d(1, 48, kernel_size=3, padding=1, stride=1, bias=False),
    nn.BatchNorm2d(48),
    nn.ReLU(),
    nn.Dropout(p=0.5),
    
    nn.Conv2d(48, 48, kernel_size=3, padding=1, stride=1, bias=False),
    nn.BatchNorm2d(48),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2,),
    nn.Dropout(p=0.5),
    
    nn.Flatten(start_dim=1),
    nn.Linear(48*4*4, 512),
    nn.ReLU(),
    nn.Dropout(p=0.5),
    nn.Linear(512, 9*9),
    ])

x = torch.randn(64,1,9,9)
for layer in cnn_model:
    print(layer)
    x= layer(x)
    print(x.shape)

In [None]:
model = cnn_model.to(datatype).to(device)
ce_loss = nn.CrossEntropyLoss()

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# optimizer = torch.optim.Adagrad(model.parameters()) # 还不错
# optimizer = torch.optim.Adadelta(model.parameters()) # 也还行

t1 = time.time()
train(model=model,
      epochs=30,
      loss_fn=ce_loss,
      optimizer=optimizer,
      train_dataloader=train_dl,
      test_dataloader=test_dl,)
print('{:.3f}s'.format(time.time()-t1))

1、完全看不出来数据类型对CNN模型的加速效果，难道是由于计算密集型导致的模型数据类型影响不大？？

    torch.float16  best_epoch:12, best_test_pred:2.883%, 20.8s  | best_epoch:21, best_test_pred:2.554%, 23.579s
    torch.float32  best_epoch:19, best_test_pred:2.554%, 24.335s | best_epoch:15, best_test_pred:2.554%, 22.048s
    torch.bfloat16 best_epoch:22, best_test_pred:2.636%, 75.696s |
    注意混合精度训练非常慢(慢了3倍)，而且效果一般

2、 少量的数据集合200，并不能在测试集合上达到书中描述的8%的预测准确性。   
最好的模型在3.6%左右，确实比fc_nn好点（提升1个百分点）。

In [None]:
a = torch.randn(10,9,9)
b= a.unsqueeze(dim=1)
print(b.shape)