In [17]:
import itertools
import json
import os
import numpy as np
import tensorflow as tf
import keras
from keras.models import Model
from keras.models import load_model
from keras.layers import Dense, Dropout, Flatten, Reshape, Input
from keras.layers import Conv2D, MaxPooling2D, LeakyReLU, Activation
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD
from keras.regularizers import l2

from config import BOARD_SIZE

In [18]:
from keras.models import load_model
import numpy as np
from copy import deepcopy
from random import shuffle
import os
import sys
import random
from config import BOARD_SIZE, GameState, WHITE, BLACK

import numpy as np
from copy import deepcopy, copy
from random import choice

from MCTS import MCTS
from config import GameState, WHITE, BLACK, BOARD_SIZE
from board import Board

BATCH_SIZE = 32
PATH_TO_DATA = './../self_train_models/'
PATH_TO_MODELS = PATH_TO_DATA + '{}.h5'
PATH_TO_HISTORY = PATH_TO_DATA + 'history/{}.h5'
GENERATING_DATA_MCTS_DEPTH = 20
DATASET_SIZE = 75
COMPETING_MCTS_DEPTH = 20

In [19]:
def _load_nn(name):
    print('model ', name, ' was loaded from file ', PATH_TO_MODELS.format(name))
    model = load_model(PATH_TO_MODELS.format(name))
    return model


def _save_old_model(model):
    cnt = 0
    while True:
        if os.path.isfile(PATH_TO_HISTORY.format(str(cnt))):
            cnt += 1
            continue
        model.save(PATH_TO_HISTORY.format(str(cnt)))
        return


class SelfPlayIteration(object):
    def __init__(self):
        self._main_player_nn = _load_nn('main_player')
        self._opponent_nn = _load_nn('main_player')
        self._mcts = MCTS(self._main_player_nn, GENERATING_DATA_MCTS_DEPTH)

    def iterate(self):
        train_data = self._create_self_train_dataset(DATASET_SIZE)
        print('-------------------TRAINING STAGE-------------------\ndata size {}'.format(str(len(train_data))))
        for _ in range(5):
            self._main_player_nn.fit_generator(
                self._train_generator(train_data), epochs=1, verbose=1,
                steps_per_epoch=len(train_data) // BATCH_SIZE, shuffle=True
            )
        # TODO(check if main_player is better than opponent)
        print('-------------------COMPETING STAGE------------------')
        if (self._compete()):
            print('Great training! Updating main model.')
            self._main_player_nn.save(PATH_TO_MODELS.format('main_player'))
            _save_old_model(self._opponent_nn)
            return
        print('Oh... Old model is better. Keeping the old one. Try again.')

    @staticmethod
    def _train_generator(data):
        size, X, y, win = 0, [], [], []
        for item in data:
            X.append(item[0])
            y.append(item[1])
            win.append(item[2])
            size += 1
            if size == BATCH_SIZE:
                yield np.float64(np.array(X)).reshape((size, 15, 15)), \
                      [np.array(y).reshape((size, 225)), np.array(win).reshape((size, 1))]
                size, X, y, win = 0, [], [], []

    @staticmethod
    def _generate_symmetries(board, label):
        label = np.array(label).reshape((15, 15))
        syms = list()
        syms.append((
            deepcopy(board.get_board().tolist()),
            deepcopy(label.ravel().tolist()),
        ))
        syms.append((
            deepcopy(np.rot90(board.get_board()).tolist()),
            deepcopy(np.rot90(label).ravel().tolist()),
        ))
        syms.append((
            deepcopy(np.rot90(board.get_board(), 2).tolist()),
            deepcopy(np.rot90(label, 2).ravel().tolist()),
        ))
        syms.append((
            deepcopy(np.rot90(board.get_board(), 3).tolist()),
            deepcopy(np.rot90(label, 3).ravel().tolist()),
        ))
        return syms

    def _play_game(self, first_move=None):
        print('-', end='')
        game_dataset = []
        board = Board()
        player = BLACK
        is_winner_from_black_perspective = 1
        if first_move is not None:
            board.execute_move((first_move[0], first_move[1]), player)
            player = -player
            is_winner_from_black_perspective = -is_winner_from_black_perspective
        moves_cnt = 0
        while board.get_state() == GameState.InProgress:
            moves_cnt += 1
            #print('g', end='')
            label_probs = self._mcts.get_new_actions_probs(board)
            # TODO (here we push colors as a third train param then don't forget to change it to is_win!)
            syms = SelfPlayIteration._generate_symmetries(board, label_probs)
            for s in syms:
                game_dataset.append(np.array(
                    [s[0], s[1], [np.float64(is_winner_from_black_perspective)]]
                ))
            action = np.random.choice(len(label_probs), p=label_probs)
            #print('({}, {}), '.format(action // 15, action % 15), end='')
            board.execute_move((action // BOARD_SIZE, action % BOARD_SIZE), player)
            player = -player
            is_winner_from_black_perspective = -is_winner_from_black_perspective
        print(moves_cnt, end='')
        if board.get_state() == GameState.Black:
            # BLACK is winner -> third param ok
            return game_dataset
        if board.get_state() == GameState.White:
            for i in range(len(game_dataset)):
                # reverse is_winner label
                game_dataset[i][2][0] = - game_dataset[i][2][0]
            return game_dataset
        if board.get_state() == GameState.Draw:
            for i in range(len(game_dataset)):
                game_dataset[i][2][0] = np.float64(0)
            return game_dataset
        raise ValueError('unsupported state')

    def _create_self_train_dataset(self, size=DATASET_SIZE):
        print('-----------------CREATING-DATASET-----------------')
        dataset = []
        for _ in range(size):
            dataset.extend(self._play_game())
        for _ in range(25):
            i = random.randint(0,225)
            dataset.extend(self._play_game((i // BOARD_SIZE, i % BOARD_SIZE)))
        shuffle(dataset)
        return dataset

    @staticmethod
    def _compete_single_game(black_player, white_player):
        print('-', end='')
        board = Board()
        players = [black_player, white_player]
        colors = [BLACK, WHITE]
        cur_ind = 0
        while board.get_state() == GameState.InProgress:
            action = players[cur_ind](board)
            board.execute_move((action // BOARD_SIZE, action % BOARD_SIZE), colors[cur_ind])
            cur_ind = (cur_ind + 1) % 2
        return board.get_state()

    def _compete(self, games_cnt=30):
        print('--------------------COMPETING------------------')
        main_player_won_cnt = 0
        opponent_won_cnt = 0
        draws = 0

        main_player_mcts = MCTS(self._main_player_nn, COMPETING_MCTS_DEPTH)
        opponent_mcts = MCTS(self._opponent_nn, COMPETING_MCTS_DEPTH)
        # 1 stage: main_player - BLACK, opponent - WHITE
        for _ in range(games_cnt):
            result = SelfPlayIteration._compete_single_game(
                lambda x: np.argmax(main_player_mcts.get_new_actions_probs(x, False)),
                lambda y: np.argmax(opponent_mcts.get_new_actions_probs(y, False))
            )
            if result == GameState.Black:
                main_player_won_cnt += 1
            elif result == GameState.White:
                opponent_won_cnt += 1

        # 2 stage: main_player - WHIRE, opponent - BLACK
        for _ in range(games_cnt):
            result = SelfPlayIteration._compete_single_game(
                lambda x: np.argmax(opponent_mcts.get_new_actions_probs(x, False)),
                lambda y: np.argmax(main_player_mcts.get_new_actions_probs(y, False))
            )
            if result == GameState.Black:
                opponent_won_cnt += 1
            elif result == GameState.White:
                main_player_won_cnt += 1

        print('main player won: {}, opponent won: {}'.format(main_player_won_cnt, opponent_won_cnt))
        if main_player_won_cnt + opponent_won_cnt != 0:
            if main_player_won_cnt / (main_player_won_cnt + opponent_won_cnt) >= 0.6:
                return True
        return False


In [20]:
while True:
    self_train = SelfPlayIteration()
    self_train.iterate()

model  main_player  was loaded from file  ./../self_train_models/main_player.h5
model  main_player  was loaded from file  ./../self_train_models/main_player.h5
-----------------CREATING-DATASET-----------------
-20-12-15-138-32-

KeyboardInterrupt: 