In [None]:
import copy

import numpy as np
from keras.layers import Input, Convolution2D, Dense, Dropout, Flatten, concatenate, BatchNormalization
from keras.models import Model  # basic class for specifying and training a neural network
from keras import losses
from keras.callbacks import EarlyStopping
from keras import backend as K
import keras
import tensorflow as tf
from sortedcontainers import SortedSet

import core
import learner
from learner.pexp_mind import PExpMind
from core.board import Board
from core import optimized_minimax
import random

import time
import os

import cProfile

random_state = np.random.RandomState(42)

MIN_Q = -1
MAX_Q = 1

import importlib
importlib.reload(learner.pexp_mind)
importlib.reload(core.board)
importlib.reload(core)
importlib.reload(core.optimized_minimax)

In [None]:
def test_imports():
    import site
    import core
    import tensorflow
    from core.board import Board
    from learner.pexp_mind import PExpMind
    return site.getsitepackages()

sc.parallelize([1]).map(lambda x : test_imports()).collect()

In [None]:
def play_a_game(i):
    
    mind = PExpMind(size=SIZE, alpha=0.2, init=False, channels=CHANNELS)
    mind.value_est.set_weights(q_model_bc.value)
    mind.policy_est.set_weights(p_model_bc.value)
    
    round_board = Board(size=SIZE, win_chain_length=WIN_CHAIN_LENGTH)
    
    # randomize the board a bit
    for j in range(random.randint(0, int((SIZE ** 2) / 5.0))):
        round_board.make_random_move()
        
    if round_board.game_over():
        return
    
    current_player = round_board.player_to_move
    while True:
        result = mind.make_move(round_board,
                                as_player=current_player,
                                epsilon=0.1,
                                max_depth=25,
                                k=k_function(i),
                                max_iters=max_iter_function(i),
                                )
        print(round_board.pprint())
        current_player = -current_player
        if result:
            break
            
    return mind.train_vectors, mind.train_p, mind.train_q

In [None]:

def max_iter_function(i):
    # the first iteration has to teach the model that most positions aren't game ending
    if i == 0:
        return 1
    if i < 2:
        return 5
    if i < 4:
        return 10
    if i < 7:
        return 15
    if i < 10:
        return 20
    if i < 15:
        return 25
    return 30

def k_function(i):
    return (SIZE ** 2)
    
def wait_until_exists(file_path):
    while not os.path.exists(file_path):
        time.sleep(1)

    if os.path.isfile(file_path):
        return True
    else:
        raise ValueError("%s isn't a file!" % file_path)

In [None]:
SIZE = 7
WIN_CHAIN_LENGTH = 5
CHANNELS = 4
EPOCHS = 100
BATCH_SIZE = 32
GAME_BATCH = 1000

VECTORS_NPZ = 'gomoku/models/waiting_vectors.npz'
VECTORS_COMPLETE = 'gomoku/models/waiting_vectors_complete'
P_MODEL = "gomoku/models/waiting_p.model"
Q_MODEL = "gomoku/models/waiting_q.model"
MODEL_COMPLETE = 'gomoku/models/waiting_models_complete'

PATIENCE = 3

In [None]:


mind = PExpMind(size=SIZE, alpha=0.2, init=True, channels=CHANNELS)

mind.value_est = keras.models.load_model(Q_MODEL)
mind.policy_est = keras.models.load_model(P_MODEL)

q_model = mind.value_est
p_model = mind.policy_est

# save base models
q_model.save(Q_MODEL)
p_model.save(P_MODEL)

q_model_bc = sc.broadcast(q_model.get_weights())
p_model_bc = sc.broadcast(p_model.get_weights())

def distributed_play(i):

    collected = sc.parallelize(zip(range(GAME_BATCH), range(GAME_BATCH))).partitionBy(GAME_BATCH, lambda x: x) \
                    .map(lambda x : play_a_game(i)).collect()

    all_vectors = []
    all_p = []
    all_q = []

    train_q_vectors = []
    train_p_vectors = []
    train_p = []
    train_q = []

    for vectors, p, q in collected:
        all_vectors.extend(vectors)
        all_p.extend(p)
        all_q.extend(q)

    for vector, p, q in zip(all_vectors, all_p, all_q):
        train_q_vectors.append(vector)
        if optimized_minimax.PVSNode.is_result_q(q) or i > 0:
            train_q.append(q)
        else:
            train_q.append(0)

        if abs(q) > 0:
            train_p_vectors.append(vector)
            train_p.append(p)

    print(train_q[:100])

    np.savez(VECTORS_NPZ, 
             train_p_vectors = train_p_vectors,
             train_q_vectors = train_q_vectors, 
             train_p=train_p, 
             train_q=train_q)
    
    with open(VECTORS_COMPLETE, 'w') as f:
        f.write('')


In [None]:
epochs = 5000
start_at = 15

for i in range(start_at, epochs):
    if i > 0:
        if wait_until_exists(MODEL_COMPLETE):
            print("Models Completed!")
            q_model_bc = sc.broadcast(keras.models.load_model(Q_MODEL).get_weights())
            p_model_bc = sc.broadcast(keras.models.load_model(P_MODEL).get_weights())
            os.remove(MODEL_COMPLETE)
            
    distributed_play(i)
    
    play_a_game(i)
    
    if i % 5 == 0:
        mind.value_est = keras.models.load_model(Q_MODEL)
        mind.policy_est = keras.models.load_model(P_MODEL)
        mind.save('gomoku/models/7_channel4_exp')

In [None]:
q_model_bc = sc.broadcast(copy.deepcopy(q_model.get_weights()))
p_model_bc = sc.broadcast(copy.deepcopy(p_model.get_weights()))
collected = sc.parallelize(zip(range(100), range(100))).partitionBy(100, lambda x: x) \
                .map(lambda x : play_a_game(0)).collect()

In [None]:
all_vectors = []
all_p = []
all_q = []

train_q_vectors = []
train_p_vectors = []
train_p = []
train_q = []

i=0
for vectors, p, q in collected:
    all_vectors.extend(vectors)
    all_p.extend(p)
    all_q.extend(q)

for vector, p, q in zip(all_vectors, all_p, all_q):
    train_q_vectors.append(vector)
    if optimized_minimax.PExpNode.is_result_q(q) or i > 0:
        train_q.append(q)
    else:
        train_q.append(0)

    if abs(q) > 0:
        train_p_vectors.append(vector)
        train_p.append(p)

In [None]:
from collections import Counter
Counter(train_q)

In [None]:
q_model.fit(x=fit_train_inputs,
                    y=np.array(fit_train_q),
                    shuffle=True,
                    callbacks=[EarlyStopping(patience=PATIENCE)],
                    validation_data = (fit_valid_inputs, fit_valid_q),
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS)


In [None]:
play_a_game(0)

In [None]:
np.savez('gomoku/models/7_20channel.npz', train_inputs=train_inputs, train_p=train_p, train_q=train_q)

In [None]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

In [None]:
get_available_gpus()

In [None]:
train_inputs = []
for vector, whose_move in train_vectors:
    train_inputs.append(vector.reshape(SIZE, SIZE, CHANNELS))

train_inputs = np.array(train_inputs)

fraction = 0.9
fit_train_inputs = train_inputs[:int(len(train_inputs) * fraction)]
fit_train_q = train_q[:int(len(train_inputs) * fraction)]
fit_train_p = np.array(train_p[:int(len(train_inputs) * fraction)]).reshape(-1, SIZE ** 2)

fit_valid_inputs = train_inputs[int(len(train_inputs) * fraction):]
fit_valid_q = train_q[int(len(train_inputs) * fraction):]
fit_valid_p = np.array(train_p[int(len(train_inputs) * fraction):]).reshape(-1, SIZE ** 2)

if len(train_vectors) > 0:
    q_model.fit(x=fit_train_inputs,
                y=np.array(fit_train_q),
                shuffle=True,
                callbacks=[EarlyStopping(patience=PATIENCE)],
                validation_data = (fit_valid_inputs, fit_valid_q),
                batch_size=BATCH_SIZE,
                epochs=EPOCHS)
    # doesn't always need to train P
    if max_iter_function(i) > 2:
        p_model.fit(x=fit_train_inputs,
                    y=np.array(fit_train_p),
                    shuffle=True,
                    callbacks=[EarlyStopping(patience=PATIENCE)],
                    validation_data = (fit_valid_inputs, fit_valid_p),
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS)