In [None]:
import numpy as np
import random
from sklearn.gaussian_process import GaussianProcessRegressor, kernels
from time import time
import requests
import json
import multiprocessing as mp
import mc
import pandas as pd
from scipy.stats import norm

In [None]:

def expand_single(root, n_simulations, use_bo=False, gpr=None, parallel=None):
    
    """
    Does two things sequentially, each in parallel
    - GPR predict
    - Monte Carlo
    """
    
    embeddings, scores, next_moves, next_move_ndx = root.depth_first_search_gp()
    
    if use_bo:
        next_embedding = [e.game_embedding for e in next_moves]
        if parallel:
            needs_mc = []

            # gets batches
            for i in range(parallel['n_parallel']):
                needs_mc.append({'N': n_simulations, 
                                 'n_gpr_samples': parallel['n_gpr_samples'],
                                 'n_parallel': parallel['n_parallel'],
                                 'root': root, 'id': i,
                                 'gpr_url': 'http://localhost:8100/gpr?reload={}&gpr_path={}'.format(parallel['reload'], parallel['pickle_path']),
                                 'mpi_url': 'http://localhost:8080/mpi'}) 
            results = parallel['pool'].map(parallel['gpr_worker'], needs_mc)
            mpi = [x[1] for x in results]
            mx = max(mpi)
            ndx = mpi.index(mx)
   
            # runs monte carlo
            results = parallel['pool'].map(parallel['mc_worker'], 
                                           [{'move': move, 'n_simulations': parallel['n_simulations']} for move in results[ndx][0]])
                            
            return results
           
        else:
            predictions, sigma = gpr.predict(next_embedding, return_std=True )
            ucb = [p + 1.96 * s for p, s in zip(predictions, sigma)]
            lcb = [p - 1.96 * s for p, s in zip(predictions, sigma)]
            mx = max(ucb)
            mn = min(lcb)
            if np.abs(mx) > np.abs(mn):
                new_move_ndx = list(ucb).index(mx)
            else:
                new_move_ndx = list(lcb).index(mn)
    else:
        new_move_ndx = random.randint(0, len(next_moves)-1)
        
    move = next_moves.pop(new_move_ndx)
    parent = move.parent
    parent.children.append(mc.Node(move.game.copy(), move.x, move.y))
    parent.children[-1].scores = parent.children[-1].MC(n_simulations)
    parent.children[-1].calculate_score()
    return parent.possible_next_moves.pop(next_move_ndx[new_move_ndx]) # remove it from it's original spot


## Game Play
Functions that plays the oppoent's move and expands nodes in the tree
- when repeating, start over with a clean slate of 'next moves'
- it doesn't help for this to build up 'possible next moves' indefinitely
- we can keep the embedding, score pairs though

In [None]:


def learning(master_game, n_children, n_simulations, 
                  n_expansions, use_bo, gpr, parallelization=None):
    """ 
    Other player moves and children have random expansion
    - there's no gaurantee that the other player's move is one that is in the system
    - for now, it's easier to just randomly expand it
    """
    
    x, y = master_game.get_random_move() # O moves at random
    print('Other player moves at random {} {}'.format(x, y))
    master_game.place_stone(x, y)
    MCTS = mc.Node(master_game, x, y)
    
    MCTS.expand(n_children) 
    
    for c in MCTS.children:
        c.scores = c.MC(n_simulations)
        c.calculate_score()
        
    print('Expanding')    
    for a in range(n_expansions):
        if not a % 5:
            print('  ' + str(a))
        move = expand_single(MCTS, n_simulations, use_bo, gpr, parallelization)
        
    return MCTS

def play_best_move(master_game, MCTS):
    
    best_node, best_x, best_y, win_probability = MCTS.depth_first_search_move()
    print('Playing best move {} {} with win probability {}'.format(best_x, best_y,round(win_probability, 4)))

    master_game.place_stone(best_x, best_y)
    print("N Moves {}\n{} of player 1's pieces captured\n{} of player 2's pieces captured".format(master_game.n_moves, master_game.n_captured[1], master_game.n_captured[2]))
    master_game.print_board()
    return master_game

In [None]:
# Bayesian Optimization approach
# for next expansion

def probability_integral_transform(embeddings, scores):
    # convert to normal distribution

    df = pd.DataFrame({'embedding': embeddings, 'probability': scores})
    df = df[df['probability'] >= 0].sort_values('probability')
    adj = 1/(2*df.shape[0])

    df['standard_normal'] = [norm.ppf(i/df.shape[0]+adj) for i, x in enumerate(df['probability'])]
    
    return df

def fit_BO(MCTS, saved_embeddings, saved_scores):


    embeddings, scores, next_moves, next_move_ndx = MCTS.depth_first_search_gp()
    print("{} next moves this round.".format(len(next_moves)))

    embeddings += saved_embeddings
    scores += saved_scores
    gpr = GaussianProcessRegressor(kernels.Matern() + kernels.WhiteKernel(), copy_X_train=False)
    start = time()
    df = probability_integral_transform(embeddings, scores)
 
    gpr.fit([e for e in df['embedding']], df['standard_normal'])
    print("Fitting GPR took {} seconds".format(round(time()-start, 4)))
    next_embedding = [e.game_embedding for e in next_moves]
    predictions, sigma = gpr.predict(next_embedding, return_std=True )
    print("{} total embeddings for GPR model".format(len(embeddings)))
    return embeddings, scores, gpr

## Initialize with no GPR and no parallelization

In [None]:
MasterGame = mc.GoGame(9)
saved_scores = []
saved_embeddings = []
n_children = 7           # opponent random
n_simulations = 13
use_bo = False
n_expansions = 11        # if parallel, you don't need many
gpr = None
MCTS = learning(MasterGame, n_children, n_simulations, n_expansions, use_bo, gpr)
mg = play_best_move(MasterGame, MCTS)

In [None]:
p.close()
p = mp.Pool(4)
parallel = {'n_gpr_samples': 200,
            'n_parallel': 4,
            'y_best': np.sqrt(2),
            'gpr_worker': mc.get_best_batch,
            'mc_worker': mc.simulations,
            'n_simulations': n_simulations,
            'reload': 'True',
            'pickle_path': 'gpr.pkl',
            'pool': p}


In [None]:
# !! TODO: MPI is for player 2 only
# should incorporate different measure for player 1

print('Simulating')
for iteration in range(6):
    print("Play Turn and Expand {}".format(iteration))
    
    MCTS = learning(MasterGame, n_children, n_simulations, n_expansions, use_bo, gpr, parallel)
    parallel['reload'] = 'False'

    print("Getting Best Move")
    mg = play_best_move(MasterGame, MCTS)
    if not iteration % 4:
        print("Fitting GPR")
        saved_embeddings, saved_scores, gpr = fit_BO(MCTS, saved_embeddings, saved_scores)
        
        with open(parallel['pickle_path'], 'wb') as m:
            pickle.dump(gpr, m)

        use_bo = True
        parallel['reload'] = 'True'
    
    else:
        print("Not refitting. Just saving.")
        embeddings, scores, next_moves, next_move_ndx = MCTS.depth_first_search_gp()
        saved_embeddings += embeddings
        saved_scores += scores


In [None]:
p.close()
import pickle


In [None]:
reload(mc)

In [None]:
p = mp.Pool(4)

In [None]:
#embeddings, scores, next_moves, next_move_ndx = MCTS.depth_first_search_gp()

In [None]:
batch_of_batches = []
for x in range(4):
    batches = []
    for x in range(100):
        batch = [next_moves[random.randint(0, len(next_moves)-1)].game_embedding for y in range(4)]
        batches.append(batch.copy())
    batch_of_batches.append({'gpr_pickle': 'gpr.pkl', 'batches': batches.copy()})
    

In [None]:
start = time()
results = p.map(mc.gpr_predict, batch_of_batches)
print(time()-start)

In [None]:
for r in results:
    print(len(r[0]))

In [None]:
url = 'http://localhost:8100/gpr'
len(string_batches)

In [None]:
string_batches = []
for b in batches:
    string_batches.append(';'.join([','.join([str(x) for x in v]) for v in b]))


In [None]:
start = time()
response = json.loads(requests.post(url=url, data=json.dumps({'batches': '|'.join(string_batches)})).content)
print(time()-start)

In [None]:
cv = response['covariances'].split('|')[0].split(';')
[[float(x) for x in r.split(',')] for r in cv]


In [None]:
p