In [1]:
from collections import defaultdict
from itertools import repeat
from multiprocessing import Pool

import numpy as np
import pandas as pd

from mcts_sim_functions import simulate_mcts, simulate_mcts_gp

import matplotlib.pyplot as plt
from matplotlib import pylab
pylab.rcParams['figure.figsize'] = (10.0, 8.0)

ModuleNotFoundError: No module named 'mcts_sim_functions'

In [None]:
CWID='A12345678' # TODO Change to your own A#.

simul_seed = int(CWID[1:]) 

# Explore the Exploration

$$v=C\times \sqrt{\frac{ln(parent.N+\alpha)}{node.N+\alpha}}$$

Assume $C=1$ and $\alpha=1$.

$parent.N=0, node.N=0 \Rightarrow v=0$

$parent.N=1, node.N=0 \Rightarrow v \approx 0.83$

$parent.N=2, node.N=0 \Rightarrow v \approx 1.05$

$parent.N=1, node.N=1 \Rightarrow v \approx 0.59$


In [None]:
def ucb_explore(child_N, parent_N, C = 1.4, alpha = 1):    
    return C * np.sqrt(np.log(parent_N+alpha)/(child_N+alpha))

In [None]:
pd.set_option("display.precision", 3)

In [None]:
df = pd.DataFrame(None, columns=['parentN', 'childN', 'v'])

parent_Ns = range(5)
for parent_N in parent_Ns:
    for child_N in range(parent_N+1):
        v=ucb_explore(child_N, parent_N, C=1)
        df.loc[len(df.index)] = [parent_N, child_N, v]

In [None]:
df

## Understanding the Log Function

In [None]:
max_parent_N = 5000

_, ax = plt.subplots()
ax.plot(range(1, max_parent_N, 100), np.log(range(1, max_parent_N, 100)), linewidth=3, c='k')
ax.set_ylabel("ln(N)")
ax.set_xlabel("N")

In [None]:
df = pd.DataFrame(None, columns=['parentN', 'childN', 'v'])

parent_N_exps = range(5)

child_N = 1

for parent_N_exp in parent_N_exps:

    parent_N = np.power(10, parent_N_exp)

    v=ucb_explore(child_N, parent_N, C=1)
    df.loc[len(df.index)] = [parent_N, child_N, v]

In [None]:
df

In [None]:
child_Ns = [0, 1, 2]

max_parent_N = 5000

_, axs = plt.subplots(nrows=1, ncols=len(child_Ns), figsize=(15, 3.3))

for i in range(len(child_Ns)):

    res = [ucb_explore(child_Ns[i], parent_N, C=1) for parent_N in range(1, max_parent_N, 100)]
    axs[i].plot(range(1, max_parent_N, 100), res, linewidth=3, c='k')
    axs[i].set_ylabel("Explore value")
    axs[i].set_xlabel("Parent N")
    axs[i].set_title("Child N = {}".format(child_Ns[i]))

## Parent N versus Child N

In [None]:
nx1 = nx2 = 200
x1_min = x2_min = 0
x1_max = 10
x2_max = 20
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max, nx1), np.linspace(x2_min, x2_max, nx2))
D=np.c_[xx1.ravel(), xx2.ravel()]

In [None]:
Cs = [0.5, 1, 2]

_, axs = plt.subplots(nrows=1, ncols=len(Cs), figsize=(10, 3.3))

for i in range(len(Cs)):

    v = np.asarray([ucb_explore(d[0], d[1], C=Cs[i]) for d in D])
    v = v.reshape(xx1.shape)

    axs[i].contour(xx1, xx2, v, [0.25, 0.5, 1], linewidths=3., colors=['g', 'b', 'r'])
    #axs[i].set_ylabel("Parent N")
    axs[i].set_xlabel("Child N")
    axs[i].set_title("C= {}".format(Cs[i]))

In [None]:
# Min parent.N needed to match a target v
def min_parentN(target_v, child_N, C, alpha=1):
    return np.exp((target_v**2 * (child_N+alpha))/(C**2))-alpha

In [None]:
target_vs = [0.25, 0.5, 0.75, 1]
child_Ns = range(6)
Cs = [0.5, 1, 2, 3]

_, axs = plt.subplots(nrows=len(target_vs), ncols=len(Cs), figsize=(7*len(Cs), 7*len(target_vs)))

for i in range(len(target_vs)):

    axs[i][0].set_ylabel("Target v = {}".format(target_vs[i]))

    for j in range(len(Cs)):
        v = [min_parentN(target_vs[i], child_N, Cs[j]) for child_N in child_Ns]
        axs[i][j].plot(child_Ns, v, linewidth=2)
        axs[i][j].set_xlabel("Child N")
        axs[i][j].set_title("C={}".format(Cs[j]))


# Tree Stats

In [None]:
def ave_table(results, key):
    res = {}
    for c in results.keys():
        res[c] = {}
        for max_iter in results[c].keys():
            res[c][max_iter] = np.mean(results[c][max_iter][key])
    return res

In [None]:
def join_results(mp_res):
    full_results = {}
    for k1 in mp_res[0]:
        full_results[k1] = {}
        for k2 in mp_res[0][k1]:
            full_results[k1][k2] = defaultdict(list)
    
    for res in mp_res:
        for k1 in res:
            for k2 in res[k1]:
                for k3 in res[k1][k2]:
                    full_results[k1][k2][k3].append(res[k1][k2][k3])
    
    return full_results

In [None]:
def print_results(results):
    measures = []
    _Cs = list(results.keys())
    _max_iters = list(results[_Cs[0]].keys())
    measures = results[_Cs[0]][_max_iters[0]].keys()

    for k in measures:
        res_table = pd.DataFrame(ave_table(results, k))
        res_table.columns = ['C={}'.format(i) for i in _Cs]
        res_table.index = ['Max Iter={}'.format(i) for i in _max_iters]
        print(k)
        print(res_table)
        print()

In [None]:
# Parameters used for all simulations below.
max_iters = [100, 500, 1000]
Cs = [0, 0.5, 1, 2]
seeds = list(range(19))+[simul_seed]

## TTT

In [None]:
%%time
n_processes = None # Uses all available.

mp_res = None

with Pool(n_processes) as p:
    mp_res = p.starmap(simulate_mcts, zip(repeat("ttt"), repeat(Cs), seeds, repeat(max_iters)))

In [None]:
results = join_results(mp_res)
print_results(results)

## C4

In [None]:
%%time
n_processes = None # Uses all available.

mp_res = None

with Pool(n_processes) as p:
    mp_res = p.starmap(simulate_mcts, zip(repeat("c4"), repeat(Cs), seeds, repeat(max_iters)))

In [None]:
results = join_results(mp_res)
print_results(results)

# Game Play

## TTT

In [None]:
%%time

n_processes = None # Uses all available.

mp_res = None

xp = 'mcts'
op = 'random'

with Pool(n_processes) as p:
    mp_res = p.starmap(simulate_mcts_gp, zip(repeat("ttt"), repeat(xp), repeat(op),  repeat(Cs), seeds, repeat(max_iters)))

In [None]:
results = join_results(mp_res)
print_results(results)

In [None]:
%%time

n_processes = None # Uses all available.

mp_res = None

xp = 'mcts'
op = 'alpha_beta'

with Pool(n_processes) as p:
    mp_res = p.starmap(simulate_mcts_gp, zip(repeat("ttt"), repeat(xp), repeat(op),  repeat(Cs), seeds, repeat(max_iters)))


In [None]:
results = join_results(mp_res)
print_results(results)

In [None]:
%%time

n_processes = None # Uses all available.

mp_res = None

xp = 'alpha_beta'
op = 'mcts'

with Pool(n_processes) as p:
    mp_res = p.starmap(simulate_mcts_gp, zip(repeat("ttt"), repeat(xp), repeat(op),  repeat(Cs), seeds, repeat(max_iters)))


In [None]:
results = join_results(mp_res)
print_results(results)

In [None]:
%%time

n_processes = None # Uses all available.

mp_res = None

xp = 'mcts'
op = 'mcts_base'

with Pool(n_processes) as p:
    mp_res = p.starmap(simulate_mcts_gp, zip(repeat("ttt"), repeat(xp), repeat(op),  repeat(Cs), seeds, repeat(max_iters), repeat({'C':0.5, 'max_iter':500})))

In [None]:
results = join_results(mp_res)
print_results(results)

In [None]:
%%time

n_processes = None # Uses all available.

mp_res = None

xp = 'mcts_base'
op = 'mcts'

with Pool(n_processes) as p:
    mp_res = p.starmap(simulate_mcts_gp, zip(repeat("ttt"), repeat(xp), repeat(op),  repeat(Cs), seeds, repeat(max_iters), repeat({'C':0.5, 'max_iter':500})))

In [None]:
results = join_results(mp_res)
print_results(results)

## C4

In [None]:
%%time

n_processes = None # Uses all available.

mp_res = None

xp = 'mcts'
op = 'random'

with Pool(n_processes) as p:
    mp_res = p.starmap(simulate_mcts_gp, zip(repeat("c4"), repeat(xp), repeat(op),  repeat(Cs), seeds, repeat(max_iters)))

In [None]:
results = join_results(mp_res)
print_results(results)

In [None]:
%%time

n_processes = None # Uses all available.

mp_res = None

xp = 'mcts'
op = 'mcts_base'

with Pool(n_processes) as p:
    mp_res = p.starmap(simulate_mcts_gp, zip(repeat("c4"), repeat(xp), repeat(op),  repeat(Cs), seeds, repeat(max_iters), repeat({'C':0.5, 'max_iter':500})))

In [None]:
results = join_results(mp_res)
print_results(results)

In [None]:
%%time

n_processes = None # Uses all available.

mp_res = None

xp = 'mcts_base'
op = 'mcts'

with Pool(n_processes) as p:
    mp_res = p.starmap(simulate_mcts_gp, zip(repeat("c4"), repeat(xp), repeat(op),  repeat(Cs), seeds, repeat(max_iters), repeat({'C':0.5, 'max_iter':500})))

In [None]:
results = join_results(mp_res)
print_results(results)