In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/supervised-chess/buffer_df.feather


In [2]:
from pathlib import Path
from typing import List
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset
import matplotlib.pyplot as plt

SQUARES = [file+str(rank+1) for file in "abcdefgh" for rank in range(8)]
PROMOTION_MOVES_STRAIGHT = [file+"7"+file+"8" for file in "abcdefgh"]+[file+"2"+file+"1" for file in "abcdefgh"]
PROMOTION_MOVES_DIAG = ["a7b8", "b7a8", "b7c8","c7b8", "c7d8", "d7c8", "d7e8", "e7d8", "e7f8", "f7e8", "f7g8", "g7f8", "g7h8", "h7g8"] + ["a2b1", "b2a1", "b2c1","c2b1", "c2d1", "d2c1", "d2e1", "e2d1", "e2f1", "f2g1", "f2e1", "g2f1", "g2h1", "h2g1"]
PROMOTION_MOVES = PROMOTION_MOVES_DIAG +PROMOTION_MOVES_STRAIGHT
MOVES = [i+f for i in SQUARES for f in SQUARES if i!=f]+[move+promotion for move in PROMOTION_MOVES  for promotion in "nbrq"]
LEN_MOVES = len(MOVES)

def process_buffer_to_torch_state_64(buffer: pd.DataFrame):
    # ouch
    buffer["state_64"] = buffer.state.apply(lambda x: x[:-3])
    print(len(buffer.state_64.iloc[0]))
    x = torch.tensor(np.stack(buffer.state_64.values, axis=0), dtype=torch.float32).view(-1,64)
    y_values = torch.tensor(buffer.value.values,dtype=torch.float32).view(-1,1) # [:,1]
    y_policy = torch.tensor(np.stack(buffer.policy.values, axis=0), dtype=torch.float32)
    return x.cuda(), y_values.cuda(), y_policy.cuda()

class BufferDataset(Dataset):
    def __init__(self, x, y_value, y_policy):
        super(BufferDataset, self).__init__()
        assert x.shape[0] == y_value.shape[0] == y_policy.shape[0]
        self.x = x
        self.y_value = y_value
        self.y_policy = y_policy
    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y_value[index], self.y_policy[index]

In [26]:
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
class ConvBlock(nn.Module):
    def __init__(self):
        super(ConvBlock, self).__init__()
        self.conv1 = nn.Conv2d(1, 256, 3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(256)

    def forward(self, s):
        s = s.view(-1, 1, 8, 8)  # batch_size x channels x board_x x board_y
        s = F.relu(self.bn1(self.conv1(s)))
        return s

class ResBlock(nn.Module):
    def __init__(self, inplanes=256, planes=256, stride=1, downsample=None):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = F.relu(self.bn1(out))
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = F.relu(out)
        return out
    
class OutBlock(nn.Module):
    def __init__(self):
        super(OutBlock, self).__init__()
        self.conv = nn.Conv2d(256, 1, kernel_size=1) # value head
        self.bn = nn.BatchNorm2d(1)
        self.fc1 = nn.Linear(8*8, 64)
        self.fc2 = nn.Linear(64, 1)
        
        self.conv1 = nn.Conv2d(256, 128, kernel_size=1) # policy head
        self.bn1 = nn.BatchNorm2d(128)
        self.fc = nn.Linear(8*8*128, 4208)
    
    def forward(self,s):
        v = F.relu(self.bn(self.conv(s))) # value head
        v = v.view(-1, 8*8)  # batch_size X channel X height X width
        v = F.relu(self.fc1(v))
        v = F.tanh(self.fc2(v))
        
        p = F.relu(self.bn1(self.conv1(s))) # policy head
        p = p.view(-1, 8*8*128)
        p = self.fc(p)
        p = F.log_softmax(p, dim=1)
        return v, p
    
class ChessNet(nn.Module):
    def __init__(self):
        super(ChessNet, self).__init__()
        self.conv = ConvBlock()
        for i in range(10):
            setattr(self, f"res_{i}",ResBlock())
        self.outblock = OutBlock()
    
    def forward(self,s):
        s = self.conv(s)
        for i in range(10):
            s = getattr(self, f"res_{i}")(s)
        s = self.outblock(s)
        return s

In [32]:
batch_size = 50
epochs = 20

In [30]:

buffer = pd.read_feather("/kaggle/input/supervised-chess/buffer_df.feather")

In [None]:
model = ChessNet().cuda()
model.load_state_dict(torch.load("/kaggle/working/nn_supervised_conv_kaggle.pth"))
model.eval()
x, y_value, y_policy = process_buffer_to_torch_state_64(buffer)
print("processed to torch")
dataset = BufferDataset(x=x,y_value=y_value, y_policy=y_policy)
train_dataloader = DataLoader(dataset=dataset, shuffle=True, batch_size=batch_size)

loss_v_f = torch.nn.MSELoss()
loss_policy_f = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=0)
model.train()

loss_list = []

for it in range(epochs):
    total_loss = 0
    for x, y_value, y_policy in train_dataloader:
        optimizer.zero_grad()
        y_value_pred, y_policy_pred = model(x)        
        loss_value = loss_v_f(y_value_pred, y_value)
        loss_policy = loss_policy_f(y_policy_pred, y_policy)
        loss = 40*loss_value+loss_policy
        #loss = loss_policy
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()

    #loss_list.append(loss.cpu().mean().detach().numpy())
    loss_list.append(total_loss)
    #print(f"Epoch: {it}/{epochs}, loss: {loss.mean()}")
    print(f"Epoch: {it}/{epochs}, loss per epoch: {total_loss}")
    
    if it%50==0:
        torch.save(model.state_dict(), "/kaggle/working/nn_supervised_conv_kaggle.pth")
        print("saving")
torch.save(model.state_dict(), "/kaggle/working/nn_supervised_conv_kaggle.pth")
plt.plot(loss_list)
plt.show()

64
processed to torch
Epoch: 0/20, loss per epoch: 8197.501984357834
saving
Epoch: 1/20, loss per epoch: 7754.920849323273
Epoch: 2/20, loss per epoch: 7455.1208152771
Epoch: 3/20, loss per epoch: 7148.545227766037
Epoch: 4/20, loss per epoch: 6899.642054319382
Epoch: 5/20, loss per epoch: 6669.008895397186
Epoch: 6/20, loss per epoch: 6458.516385555267
