In [1]:
D = lambda x, y: x + y * 8
queen_moves = sum(
    ([
        D(0, i), D(i, 0),
        D(0, -i), D(-i, 0),
        D(i,  i), D(-i,  i),
        D(i, -i), D(-i, -i),
    ]
    for i in xrange(1, 8)),
    []
)
knight_moves = [
    D( 1, 2), D( 1, -2),
    D(-1, 2), D(-1, -2),
    D( 2, 1), D( 2, -1),
    D(-2, 1), D(-2, -1),
]
all_layers = sorted(queen_moves + knight_moves)
assert len(all_layers) == 64
difference_to_layer_index = {diff: i for i, diff in enumerate(all_layers)}

def one_hot_to_large(move):
#    assert move.shape == (1,)
#    return move
    assert move.shape == (2, 8, 8)
    pick_up, put_down = map(np.argmax, move)
    difference = put_down - pick_up
    result = np.zeros((len(all_layers), 64))
    result[difference_to_layer_index[difference], pick_up] = 1
    return result.reshape((len(all_layers), 8, 8))

In [2]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import tensorflow as tf
import glob, random, time, os, zlib
import model_dual as model

FEATURE_COUNT = 6 + 6 + 1
CROSS_VAL_SIZE = 3000
IN_SAMPLE_SIZE = 1000
MINIBATCH_SIZE = 512
DATA_ROOT = "final_output/"
TOTAL_CHUNK_COUNT = 22

def to_hms(x):
    x = int(x)
    seconds = x % 60
    minutes = (x // 60) % 60
    hours   = x // 60 // 60
    return "%2i:%02i:%02i" % (hours, minutes, seconds)

# For some reason some Python versions basically explode on .decode("zlib") for large strings.
# We can bypass by just decoding it in blocks ourself and assembling them.
def stream_decompress(s):
    decomp = zlib.decompressobj()
    block_size = 2**23
    i = 0
    results = []
    while i < len(s):
        block = s[i:i+block_size]
        results.append(decomp.decompress(block))
        i += block_size
    results.append(decomp.flush())
    return "".join(results)

def load_chunk(features, moves):
    def load_flat_array(path, shape):
        with open(path) as f:
            data = f.read()
        data = stream_decompress(data)
        return np.fromstring(data, dtype=np.int8).reshape(shape)
    features = load_flat_array(features, (-1, 8, 8, FEATURE_COUNT))
    moves    = load_flat_array(moves, (-1, 2, 8, 8))
#    # Move each sample to be of shape (2, 8, 8) so we can use tf.nn.softmax_cross_entropy_with_logits_v2.
#   moves    = np.moveaxis(moves, -1, 1)
    assert len(features) == len(moves)
    return {"features": features, "moves": moves}

# Views into the extremely large dataset.
next_chunk_index = 0
chunk = None
in_sample_test = None

def load_next_chunk():
    global next_chunk_index, chunk, in_sample_test
    print "    >>> Loading chunk:", next_chunk_index
    # Free the memory from the previous chunk FIRST, if we have one loaded.
    # This is necessary to avoid running out of memory.
    if chunk is not None:
        del chunk
        del in_sample_test
    start = time.time()
    chunk = load_chunk(
        os.path.join(DATA_ROOT, "features_%03i.z" % next_chunk_index),
        os.path.join(DATA_ROOT, "moves_%03i.z" % next_chunk_index),
    )
    next_chunk_index = (next_chunk_index + 1) % TOTAL_CHUNK_COUNT
    in_sample_test = {
        "features": chunk["features"][:IN_SAMPLE_SIZE],
        "moves":    map(one_hot_to_large, chunk["moves"][:IN_SAMPLE_SIZE]),
    }
    stop = time.time()
    print "    >>> (In %f) Samples: %i" % (stop - start, len(chunk["features"]))

def get_random_subset(samples, n):
    indices = random.sample(xrange(len(samples["features"])), n)
    return {
        "features": [samples["features"][i] for i in indices],
        "moves": [one_hot_to_large(samples["moves"][i]) for i in indices],
    }

In [3]:
load_next_chunk()
cross_val = load_chunk(
    os.path.join(DATA_ROOT, "test_features.z"),
    os.path.join(DATA_ROOT, "test_moves.z"),
)
cross_val = get_random_subset(cross_val, CROSS_VAL_SIZE)

    >>> Loading chunk: 0
    >>> (In 10.340097) Samples: 7840136


In [4]:
net = model.ChessPolicyNet("policy/")
print "Total network parameters:", net.total_parameters
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
total_training_steps = 0
loss_plot = []
in_sample_loss_plot = []

Total network parameters: 2972352
Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [5]:
print net.final_output.shape
print net.desired_output_ph.shape

(?, 64, 64)
(?, 64, 8, 8)


In [5]:
model_save_counter = 0
def save_model():
    global model_save_counter
    model_save_counter += 1
    model.sess = sess
    model.save_model(net, "models/policy-10x128-model-%03i.npy" % model_save_counter)

In [None]:
total_work = 0.0
start_time = time.time()
best_loss = float("inf")
lr_schedule = lambda step: 0.005 * 0.5**(step / 15e4)

for overall_step in range(10000):
    lr = lr_schedule(total_training_steps)
    elapsed = time.time() - start_time
    in_sample_loss = net.get_loss(in_sample_test)
    loss = net.get_loss(cross_val)
    color_pair = "", ""
    if loss < best_loss:
        color_pair = "\x1b[31m", "\x1b[0m"
    message = "%s%6i [%s - %s] Loss: %.6f  In-sample loss: %.6f  Accuracy: %.3f  lr = %f%s" % (
        color_pair[0],
        total_training_steps,
        to_hms(elapsed),
        to_hms(total_work),
        loss,
        in_sample_loss,
        net.get_accuracy(cross_val) * 100,
        lr,
        color_pair[1]
    )
    print(message)
    with open("/home/snp/chess_training_log", "a+") as f:
        print >>f, message
    loss_plot.append((total_training_steps, loss))
    in_sample_loss_plot.append((total_training_steps, in_sample_loss))
    best_loss = min(best_loss, loss)

    for _ in range(500):
        minibatch = get_random_subset(chunk, MINIBATCH_SIZE)
        working = time.time()
        net.train(minibatch, lr)
        total_work += time.time() - working
        # Try really hard to not keep any views around!
        del minibatch
        total_training_steps += 1

    # Periodically swap out the data for fresh training data.
    if (overall_step + 1) % 5 == 0:
        load_next_chunk()
    if (overall_step + 1) % 150 == 0:
        save_model()

[31m     0 [ 0:00:00 -  0:00:00] Loss: 8.317173  In-sample loss: 8.316814  Accuracy: 0.400  lr = 0.005000[0m


In [None]:
load_next_chunk()

In [None]:
import matplotlib
matplotlib.rcParams["figure.figsize"] = [12, 8]

In [None]:
plt.hold(True)
plt.plot(*zip(*loss_plot))
plt.plot(*zip(*in_sample_loss_plot))

## Testing.

In [None]:
print [int(chunk["features"][i].sum() == 96) for i in xrange(1000)]

In [None]:
print [chunk["moves"][i].sum()+1 for i in xrange(1000)]

In [None]:
print len(chunk["moves"])
print len(chunk["features"])

In [None]:
chunk["moves"][:1000].flatten()

In [None]:
for i in (-1, 0, 1):
    print i, list(chunk["moves"][:100000]).count(i)

In [None]:
buckets = {-1: [], 0: [], 1: []}
src = chunk
#src = cross_val
for i in xrange(20):
    s = slice(1000 * i, 1000 * (i + 1))
    result = net.final_output.eval(feed_dict={
        net.input_ph: src["features"][s],
        net.desired_output_ph: src["moves"][s],
        net.is_training_ph: False,
    })
    for output, outcome in zip(result, src["moves"][s]):
        buckets[outcome[0]].append(output[0])
_ = plt.hist(buckets[-1], bins=100, histtype="step")
_ = plt.hist(buckets[0], bins=100, histtype="step")
_ = plt.hist(buckets[1], bins=100, histtype="step")
plt.legend(["Loss", "Draw", "Win"])

In [None]:
chunk["moves"].shape

In [None]:
cross_val["moves"].shape

In [None]:
cross_val["features"][0].shape

In [None]:
sess.run(net.W2)

In [None]:
np.moveaxis(chunk["features"][0], -1, 0)

In [None]:
import utils
reload(utils)

In [None]:
i = 0

In [None]:
i += 1
print i, cross_val["moves"][i]
utils.features_to_board(cross_val["features"][i])

In [None]:
src = chunk
src = cross_val
print "Average outcome:", np.array(src["moves"]).sum() / float(len(src["moves"]))

In [None]:
v = ((chunk["moves"].flatten() - 0.052403045049218534) ** 2).sum()
v / len(chunk["moves"])

Evidence:

18 -- obviously won for white
37 -- looks plausibly like a draw
38 -- looks like a continuation of 37, and is still a draw
39 -- same
40 -- same
41 -- same
47 -- looks won for white
59 -- looks won for white, but reported as a draw? edit: according to TB, is a draw
70 -- looks won for black
75 -- looks won for black
90 -- looks won for white
91 -- looks won for white

In [None]:
## Padding.

a

a

a

a

a

a

a

a

a

a

a

a
