In [None]:
import copy

import numpy as np
from keras.layers import Input, Convolution2D, Dense, Dropout, Flatten, concatenate, BatchNormalization
from keras.models import Model  # basic class for specifying and training a neural network
from keras import losses
from keras.callbacks import EarlyStopping
from keras import backend as K
import keras
import tensorflow as tf
from sortedcontainers import SortedSet

import core
import learner
from learner.pqmind import PQMind
from core.detail_board import Board
import random

import cProfile

random_state = np.random.RandomState(42)

MIN_Q = -1
MAX_Q = 1

import importlib
importlib.reload(learner)
importlib.reload(core)

In [None]:
def test_imports():
    import site
    import core
    import tensorflow
    return site.getsitepackages()

sc.parallelize([1]).map(lambda x : test_imports()).collect()

In [None]:
def play_a_game(i):
    
    mind = PQMind(size=SIZE, alpha=0.2, init=False, channels=CHANNELS)
    mind.value_est.set_weights(q_model_bc.value)
    mind.policy_est.set_weights(p_model_bc.value)
    
    round_board = Board(size=SIZE, win_chain_length=WIN_CHAIN_LENGTH)
    
    # randomize the board a bit
    for j in range(random.randint(0, int((SIZE ** 2) / 3.0))):
        round_board.make_random_move()
    
    current_player = round_board.player_to_move
    while True:
        result = mind.make_move(round_board,
                                as_player=current_player,
                                retrain=False,
                                epsilon=0.1,
                                max_depth=25,
                                k=k_function(i),
                                max_iters=max_iter_function(i),
                                )
        print(round_board.pprint())
        current_player = -current_player
        if result:
            break
            
    return mind.train_vectors, mind.train_p, mind.train_q

In [None]:

def max_iter_function(i):
    # the first iteration has to teach the model that most positions aren't game ending
    if i == 0:
        return 1
    if i < 5:
        return 2
    if i < 10:
        return 10
    return 20

def k_function(i):
    return (SIZE ** 2)
    

In [None]:
SIZE = 7
WIN_CHAIN_LENGTH = 5
CHANNELS = 20
EPOCHS = 100
BATCH_SIZE = 32
GAME_BATCH = 500

PATIENCE = 5

mind = PQMind(size=SIZE, alpha=0.2, init=True, channels=CHANNELS)
#mind.load_net('gomoku/models/distributed_7_with_19chain_2')
q_model = mind.value_est
p_model = mind.policy_est

def distributed_play(i):

    collected = sc.parallelize(zip(range(GAME_BATCH), range(GAME_BATCH))).partitionBy(GAME_BATCH, lambda x: x) \
                    .map(lambda x : play_a_game(i)).collect()

    train_vectors = []
    train_p = []
    train_q = []

    for vector, p, q in collected:
        train_vectors.extend(vector)
        if i < 2:
            for x in q:
                if abs(x) > 0.999:
                    train_q.append(x)
                else:
                    train_q.append(0)
        else:
            train_q.extend(q)
        train_p.extend(p)

    print(train_q[:100])

    train_inputs = []
    for vector, whose_move in train_vectors:
        train_inputs.append(vector.reshape(SIZE, SIZE, CHANNELS))

    train_inputs = np.array(train_inputs)

    if len(train_vectors) > 0:
        q_model.fit(x=train_inputs,
                    y=np.array(train_q),
                    shuffle=False,
                    callbacks=[EarlyStopping(patience=PATIENCE)],
                    validation_split=0.1,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS)
        # doesn't always need to train P
        if max_iter_function(i) > 3:
            p_model.fit(x=train_inputs,
                        y=np.array(train_p),
                        shuffle=False,
                        callbacks=[EarlyStopping(patience=PATIENCE)],
                        validation_split=0.1,
                        batch_size=BATCH_SIZE,
                        epochs=EPOCHS)

    print('Num Train Vectors', len(train_vectors))




In [None]:
epochs = 5000
for i in range(epochs):
    q_model_bc = sc.broadcast(copy.deepcopy(q_model.get_weights()))
    p_model_bc = sc.broadcast(copy.deepcopy(p_model.get_weights()))
    distributed_play(i)
    if i % 2 == 0:
        play_a_game(i)

In [None]:
mind.value_est.set_weights(q_model.get_weights())
mind.policy_est.set_weights(p_model.get_weights())
mind.save('gomoku/models/distributed_7_with_19chain_2')

In [None]:
play_a_game(55)

In [None]:
mind = PQMind(size=SIZE, alpha=0.2, init=True, channels=CHANNELS)
mind.value_est.set_weights(q_model_bc.value)
mind.policy_est.set_weights(p_model_bc.value)

round_board = Board(size=SIZE, win_chain_length=WIN_CHAIN_LENGTH)

# randomize the board a bit
for j in range(random.randint(0, int((SIZE ** 2) / 3.0))):
    round_board.make_random_move()

current_player = round_board.player_to_move
while True:
    result = mind.make_move(round_board,
                            as_player=current_player,
                            retrain=False,
                            epsilon=0.1,
                            max_depth=25,
                            k=SIZE ** 2,
                            max_iters=20,
                            )
    print(round_board.pprint())
    current_player = -current_player
    if result:
        break

mind.value_est.summary()

In [None]:
mind.train_vectors

In [None]:
def debug_game():
    mind = PQMind(size=7, alpha=0.2, init=True, channels=4)
    assert()

sc.parallelize(zip(range(1), range(1))).map(lambda x: debug_game()).collect()

In [None]:
q_model_bc = sc.broadcast(copy.deepcopy(q_model.get_weights()))
p_model_bc = sc.broadcast(copy.deepcopy(p_model.get_weights()))
collected = sc.parallelize(zip(range(200), range(200))).partitionBy(200, lambda x: x) \
                .map(lambda x : play_a_game(0)).collect()



In [None]:
mind = PQMind(size=SIZE, alpha=0.2, init=True, channels=CHANNELS)

#mind.load_net('gomoku/models/distributed_7_with_19chain_2')
q_model = mind.value_est
p_model = mind.policy_est

train_vectors = []
train_p = []
train_q = []

for vector, p, q in collected:
    train_vectors.extend(vector)
    train_q.extend(q)
    train_p.extend(p)
    
train_inputs = []
for vector, whose_move in train_vectors:
    train_inputs.append(vector.reshape(SIZE, SIZE, CHANNELS))

train_inputs = np.array(train_inputs)

In [None]:
train_q[232]

In [None]:
train_inputs[232, :, :, 1]

In [None]:
train_inputs[232, :, :, :3]

In [None]:


if len(train_vectors) > 0:
    q_model.fit(x=train_inputs,
                y=np.array(train_q),
                shuffle=False,
                callbacks=[EarlyStopping(patience=PATIENCE)],
                validation_split=0.1,
                batch_size=BATCH_SIZE,
                epochs=EPOCHS)
    # doesn't always need to train P
    if max_iter_function(i) > 3:
        p_model.fit(x=train_inputs,
                    y=np.array(train_p),
                    shuffle=False,
                    callbacks=[EarlyStopping(patience=PATIENCE)],
                    validation_split=0.1,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS)

print('Num Train Vectors', len(train_vectors))

In [None]:
np.savez('gomoku/models/7_20channel.npz', train_inputs=train_inputs, train_p=train_p, train_q=train_q)

In [None]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

In [None]:
get_available_gpus()