In [1]:
import bokeh
from bokeh.io import output_notebook
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Slider, Span
from bokeh.plotting import figure, curdoc

from collections import defaultdict
from ipywidgets import interact
import matplotlib.pyplot as plt
import numpy as np
import os
import subprocess
import torch
from natsort import natsorted
import sqlite3

torch.set_printoptions(linewidth=120, edgeitems=5)
output_notebook()

In [71]:
class Stats:
    def __init__(self):
        self.policy4_sum1 = torch.zeros((8, 8))
        self.policy4_count1 = 0
        self.policy4_sum2 = torch.zeros((8, 8))
        self.policy4_count2 = 0
        
        self.policy5_num = defaultdict(float)
        self.policy5_den = defaultdict(float)

    def dump(self):
        print('e4:')
        print(self.policy4_sum1 / self.policy4_count1)
        print('!e4:')
        print(self.policy4_sum2 / self.policy4_count2)
        
        for key in sorted(self.policy5_num):
            num = self.policy5_num[key]
            den = self.policy5_den[key]
            print('%.3f %s' % (num / den, key))
            
        
def load_dir(directory, stats):
    for filename in os.listdir(directory):
        if filename == 'done.txt':
            continue
        full_filename = os.path.join(directory, filename)

        data = torch.jit.load(full_filename).state_dict()
        data_input = data['input']
        data_value = data['value']
        data_policy = data['policy']
        
        num_pieces = data_input.sum(axis=(1, 2, 3))

        four_piece_indices = torch.where(num_pieces == 4)[0]
        for i in four_piece_indices:
            input_row = data_input[i][1]
            e4 = int(input_row[3, 4])
            if e4:
                stats.policy4_sum1 += data_policy[i]
                stats.policy4_count1 += 1
            else:
                stats.policy4_sum2 += data_policy[i]
                stats.policy4_count2 += 1
            
        five_piece_indices = torch.where(num_pieces == 5)[0]
        for i in five_piece_indices:
            input_row = data_input[i][1]
            key = input_row[2:6, 2:6]
            d3 = int(input_row[2, 3])
            e3 = int(input_row[2, 4])
            c4 = int(input_row[3, 2])
            f4 = int(input_row[3, 5])
            c5 = int(input_row[4, 2])
            f5 = int(input_row[4, 5])
            d6 = int(input_row[5, 3])
            e6 = int(input_row[5, 4])
            tup = (d3, e3, c4, f4, c5, f5, d6, e6)
            value_float = float(data_value[i][0])
            stats.policy5_num[tup] += value_float
            stats.policy5_den[tup] += 1

In [72]:
stats = Stats()

self_play_dir = '/media/dshin/alphazero/othello/v5/self-play-data'

for gen in range(5000, 5100):
    gen_dir = os.path.join(self_play_dir, 'gen-%s' % gen)
    if not os.path.isdir(gen_dir):
        break
    load_dir(gen_dir, stats)

stats.dump()

e4:
tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.2500, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2500, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.2500, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.2500, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])
!e4:
tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.2500, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.2500, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2500, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0

In [74]:
model_filename = '/media/dshin/alphazero/othello/v5/models/gen-6000.ptj'
model = torch.jit.load(model_filename)

In [83]:
starting_position = torch.zeros((2, 8, 8))
starting_position[0, 3, 4] = 1
starting_position[0, 4, 3] = 1
starting_position[1, 3, 3] = 1
starting_position[1, 4, 4] = 1

In [94]:
output = model(starting_position.reshape((-1, 2, 8, 8)))
policy = output[0]
print(torch.round(torch.softmax(policy[0].flatten(), dim=0).reshape((8, 8))))

tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]], grad_fn=<RoundBackward0>)


In [95]:
torch.round?