In [91]:
import bokeh
from bokeh.io import output_notebook
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Slider, Span
from bokeh.plotting import figure, curdoc

from collections import defaultdict
from ipywidgets import interact
import matplotlib.pyplot as plt
import numpy as np
import os
import subprocess
import torch
from torch import nn as nn
from natsort import natsorted
import sqlite3

torch.set_printoptions(linewidth=120, edgeitems=5)
output_notebook()

In [23]:
filename = 'games/537158602104981-152.ptd'
data = torch.jit.load(filename).state_dict()
list(data)

['input', 'opp_policy', 'ownership', 'policy', 'score_margin', 'value']

In [25]:
print(data['score_margin'][0])
print(data['ownership'][0])

tensor([-10], dtype=torch.int32)
tensor([[[0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0]],

        [[0, 0, 0, 0, 0, 0, 0, 0],
         [0, 1, 0, 0, 0, 1, 1, 1],
         [0, 0, 1, 0, 0, 0, 1, 1],
         [0, 1, 1, 0, 0, 1, 0, 1],
         [0, 1, 0, 0, 1, 0, 0, 1],
         [0, 0, 1, 1, 1, 1, 0, 1],
         [0, 0, 0, 0, 0, 0, 0, 1],
         [1, 1, 1, 1, 1, 1, 0, 1]],

        [[1, 1, 1, 1, 1, 1, 1, 1],
         [1, 0, 1, 1, 1, 0, 0, 0],
         [1, 1, 0, 1, 1, 1, 0, 0],
         [1, 0, 0, 1, 1, 0, 1, 0],
         [1, 0, 1, 1, 0, 1, 1, 0],
         [1, 1, 0, 0, 0, 0, 1, 0],
         [1, 1, 1, 1, 1, 1, 1, 0],
         [0, 0, 0, 0, 0, 0, 1, 0]]], dtype=torch.uint8)


In [22]:
set([float(sum(sum(data['ownership'][k]))) for k in range(len(data['ownership']))])

{44.0}

In [71]:
class Stats:
    def __init__(self):
        self.policy4_sum1 = torch.zeros((8, 8))
        self.policy4_count1 = 0
        self.policy4_sum2 = torch.zeros((8, 8))
        self.policy4_count2 = 0
        
        self.policy5_num = defaultdict(float)
        self.policy5_den = defaultdict(float)

    def dump(self):
        print('e4:')
        print(self.policy4_sum1 / self.policy4_count1)
        print('!e4:')
        print(self.policy4_sum2 / self.policy4_count2)
        
        for key in sorted(self.policy5_num):
            num = self.policy5_num[key]
            den = self.policy5_den[key]
            print('%.3f %s' % (num / den, key))
            
        
def load_dir(directory, stats):
    for filename in os.listdir(directory):
        if filename == 'done.txt':
            continue
        full_filename = os.path.join(directory, filename)

        data = torch.jit.load(full_filename).state_dict()
        data_input = data['input']
        data_value = data['value']
        data_policy = data['policy']
        
        num_pieces = data_input.sum(axis=(1, 2, 3))

        four_piece_indices = torch.where(num_pieces == 4)[0]
        for i in four_piece_indices:
            input_row = data_input[i][1]
            e4 = int(input_row[3, 4])
            if e4:
                stats.policy4_sum1 += data_policy[i]
                stats.policy4_count1 += 1
            else:
                stats.policy4_sum2 += data_policy[i]
                stats.policy4_count2 += 1
            
        five_piece_indices = torch.where(num_pieces == 5)[0]
        for i in five_piece_indices:
            input_row = data_input[i][1]
            key = input_row[2:6, 2:6]
            d3 = int(input_row[2, 3])
            e3 = int(input_row[2, 4])
            c4 = int(input_row[3, 2])
            f4 = int(input_row[3, 5])
            c5 = int(input_row[4, 2])
            f5 = int(input_row[4, 5])
            d6 = int(input_row[5, 3])
            e6 = int(input_row[5, 4])
            tup = (d3, e3, c4, f4, c5, f5, d6, e6)
            value_float = float(data_value[i][0])
            stats.policy5_num[tup] += value_float
            stats.policy5_den[tup] += 1

In [121]:
pdf_loss = nn.CrossEntropyLoss()
cdf_loss = nn.MSELoss()

def loss(predicted_logits: torch.Tensor, actual_one_hot: torch.Tensor):
    predicted_probs = predicted_logits.softmax(dim=1)
    predicted_cdf = torch.cumsum(predicted_probs, dim=1)
    actual_cdf = torch.cumsum(actual_one_hot, dim=1)

    pdf_loss_val = pdf_loss(predicted_logits, actual_one_hot)
    cdf_loss_val = cdf_loss(predicted_cdf, actual_cdf)

    return pdf_loss_val + cdf_loss_val

def convert_labels(labels: torch.Tensor) -> torch.Tensor:
    # converts label from (min, max) to a one-hot encoding
    assert len(labels.shape) == 2 and labels.shape[1]==1, labels.shape
    n = labels.shape[0]
    output = torch.zeros((n, 129))
    index = labels[:, 0] + 64
    output[torch.arange(n), index.type(torch.int64)] = 1
    return output

In [47]:
model_filename = '/media/dshin/alphazero/othello/aux2/models/gen-537.ptj'
model = torch.jit.load(model_filename)

In [66]:
directory = '/media/dshin/alphazero/othello/aux2/self-play-data/gen-537/'
filenames = [f for f in os.listdir(directory) if f.endswith('.ptd')]

In [133]:
full_filename = os.path.join(directory, filenames[6])
data = torch.jit.load(full_filename).state_dict()

data_input = data['input'].type(torch.float32)
data_policy = data['policy']
data_value = data['value']
data_opp_policy = data['opp_policy']
data_score_margin = data['score_margin']
data_ownership = data['ownership'].type(torch.int64)

min_margin = min(data_score_margin.reshape((-1, ))).item()
max_margin = max(data_score_margin.reshape((-1, ))).item()

data_score_margin_one_hot = convert_labels(data_score_margin)

output = model(data_input)
output_policy = output[0]
output_value = output[1]
output_opp_policy = output[2]
output_score_margin = output[3]
output_ownership = output[4]

predicted_logits = output_score_margin
actual_one_hot = data_score_margin_one_hot

predicted_probs = predicted_logits.softmax(dim=1)
predicted_cdf = torch.cumsum(predicted_probs, dim=1)
actual_cdf = torch.cumsum(actual_one_hot, dim=1)

pdf_loss_val = pdf_loss(predicted_logits, actual_one_hot)
cdf_loss_val = cdf_loss(predicted_cdf, actual_cdf)

In [140]:
output_ownership[-1]

tensor([[[-3.9588e+00, -4.5162e+00, -4.7851e+00, -3.8363e+00, -3.9779e+00, -2.1368e+00, -3.3572e+00, -2.9171e+00],
         [-6.5644e+00, -4.2740e+00, -5.7897e+00, -3.5096e+00, -4.9134e+00, -4.2335e+00, -4.3386e+00, -3.2413e+00],
         [-2.9251e+00, -3.0048e+00, -6.0106e+00, -3.9688e+00, -5.5532e+00, -4.3814e+00, -3.7183e+00, -4.0086e+00],
         [-3.4897e+00, -4.6064e+00, -4.1984e+00, -3.5947e+00, -3.2043e+00, -4.0445e+00, -3.3898e+00, -6.4749e+00],
         [-5.6296e+00, -3.5474e+00, -5.5141e+00, -3.1548e+00, -2.8856e+00, -2.8902e+00, -2.7971e+00, -4.1198e+00],
         [-4.8932e+00, -3.6259e+00, -4.4611e+00, -3.9606e+00, -4.1449e+00, -4.6644e+00, -4.7685e+00, -3.4159e+00],
         [-4.0449e+00, -3.1671e+00, -2.7900e+00, -5.3250e+00, -4.5999e+00, -3.2975e+00, -3.5145e+00, -3.0912e+00],
         [-2.0512e+00, -3.7542e+00, -2.8160e+00, -3.8116e+00, -3.6264e+00, -3.9871e+00, -3.4940e+00, -2.5994e+00]],

        [[ 1.0285e+00, -8.5520e-01,  1.1719e+00,  8.8141e-01, -7.3973e-01,  1.

In [136]:
output_ownership[-1:].softmax(dim=1)

tensor([[[[1.7807e-03, 2.3018e-03, 2.2746e-04, 1.0510e-03, 3.6563e-03, 4.5857e-03, 1.6023e-03, 3.3635e-03],
          [2.2379e-04, 8.8415e-04, 4.5249e-04, 1.3351e-03, 5.4002e-04, 2.3010e-03, 3.0692e-04, 1.5698e-03],
          [2.8229e-03, 2.4193e-03, 1.7254e-04, 1.9203e-03, 3.4561e-04, 4.1761e-04, 1.0975e-03, 2.0418e-03],
          [1.1789e-03, 5.5625e-04, 1.4733e-03, 5.4140e-04, 2.1450e-03, 1.4277e-03, 1.2136e-03, 1.7983e-04],
          [2.6848e-04, 2.0050e-03, 8.5851e-04, 1.9214e-03, 2.1360e-03, 3.9491e-03, 2.9460e-03, 1.0959e-03],
          [1.1378e-03, 1.7610e-03, 6.1231e-04, 6.6852e-04, 1.1689e-03, 4.9417e-04, 4.2514e-04, 5.3565e-03],
          [1.0369e-03, 5.4181e-04, 3.3960e-03, 3.4217e-04, 8.0979e-04, 1.3079e-03, 9.6783e-04, 2.1645e-03],
          [2.1650e-02, 9.9266e-04, 3.9982e-03, 7.5344e-04, 5.3652e-04, 2.6604e-04, 5.6742e-04, 3.8683e-03]],

         [[2.6095e-01, 8.9539e-02, 8.7903e-02, 1.1762e-01, 9.3192e-02, 1.6767e-01, 7.2943e-02, 1.5140e-01],
          [7.6835e-01, 6.5

In [145]:
combined_input = (data_input[:, 0] * 2 + data_input[:, 1]).type(torch.int64)
# combined_input = (data_input[-1][0] * 2 + data_input[-1][1]).type(torch.int64)

print(data_ownership[-1])
print(combined_input[-1])

tensor([[1, 2, 2, 2, 2, 2, 2, 2],
        [1, 2, 2, 2, 2, 2, 2, 2],
        [1, 2, 2, 2, 2, 1, 1, 2],
        [1, 2, 2, 2, 2, 1, 2, 1],
        [1, 2, 1, 2, 2, 2, 1, 1],
        [1, 2, 1, 1, 2, 1, 1, 1],
        [1, 1, 2, 2, 1, 2, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1]])
tensor([[0, 2, 2, 2, 2, 2, 2, 2],
        [0, 1, 1, 1, 1, 1, 1, 2],
        [1, 0, 1, 2, 2, 1, 1, 2],
        [1, 1, 1, 2, 2, 1, 2, 1],
        [1, 2, 1, 2, 2, 2, 1, 1],
        [1, 2, 1, 1, 2, 1, 1, 1],
        [1, 1, 2, 2, 1, 2, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1]])


In [146]:
print(data_ownership[-2])
print(combined_input[-2])

tensor([[2, 2, 2, 1, 1, 1, 1, 1],
        [2, 2, 1, 2, 1, 1, 1, 1],
        [2, 2, 1, 1, 2, 1, 2, 1],
        [2, 2, 2, 2, 2, 2, 1, 1],
        [2, 2, 2, 2, 2, 1, 2, 1],
        [2, 2, 2, 2, 1, 1, 2, 1],
        [2, 2, 2, 2, 2, 2, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1]])
tensor([[2, 2, 2, 1, 1, 1, 1, 1],
        [2, 1, 1, 2, 1, 1, 1, 1],
        [2, 1, 1, 1, 2, 1, 2, 1],
        [2, 1, 2, 2, 2, 2, 1, 1],
        [2, 1, 2, 2, 2, 1, 2, 1],
        [2, 1, 1, 1, 1, 1, 2, 1],
        [2, 1, 0, 1, 2, 2, 1, 1],
        [0, 0, 1, 1, 1, 1, 1, 1]])


In [153]:
print(data_ownership[-9])
print(combined_input[-9])

tensor([[2, 1, 1, 1, 1, 1, 1, 1],
        [2, 1, 1, 1, 1, 1, 1, 1],
        [2, 1, 1, 1, 1, 2, 2, 1],
        [2, 1, 1, 1, 1, 2, 1, 2],
        [2, 1, 2, 1, 1, 1, 2, 2],
        [2, 1, 2, 2, 1, 2, 2, 2],
        [2, 2, 1, 1, 2, 1, 2, 2],
        [2, 2, 2, 2, 2, 2, 2, 2]])
tensor([[0, 0, 2, 1, 1, 1, 1, 1],
        [0, 0, 2, 2, 2, 1, 0, 1],
        [2, 0, 2, 2, 1, 2, 2, 1],
        [2, 2, 2, 1, 2, 2, 1, 2],
        [2, 1, 2, 1, 1, 1, 2, 2],
        [2, 1, 2, 2, 1, 2, 2, 2],
        [2, 2, 1, 1, 2, 1, 2, 2],
        [2, 2, 2, 2, 2, 2, 2, 2]])
