# ChessML Netz

This neural network based on PyTorch will try to classifiy chess pieces from photos taken from the top of a chessboard.

In [1]:
import os
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
import random
import time

from torchvision import transforms
from PIL import Image

## Defining all possible classes

bb = Black Bishop
bk = Black King
bn = Black Knight
bp = Black Pawn
bq = Black Queen
br = Black Rook
...

In [2]:
CLASSES = ["bb", "bk", "bn", "bp", "bq", "br", "wb", "wk", "wn", "wp", "wq", "wr", "empty"]

## Define a normalization function for the analyzed data

The values are recommended by a PyTorch tutorial (https://youtu.be/32lHVbT09h8).

In [3]:
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)

transform = transforms.Compose([
    transforms.Resize(128),
    transforms.ToTensor(),
    normalize
])

## Defining the Neural Network

In [4]:
class ChessNet(nn.Module):
    def __init__(self):
        super(ChessNet, self).__init__()
        # Defining the convolutional layers of the net
        self.conv1 = nn.Conv2d(3, 6, kernel_size=3)
        self.conv2 = nn.Conv2d(6, 12, kernel_size=3)
        self.conv3 = nn.Conv2d(12, 24, kernel_size=3)
        self.conv4 = nn.Conv2d(24, 48, kernel_size=3)
        self.conv5 = nn.Conv2d(48, 96, kernel_size=3)

        # Defining dropout layer to prevent from memorizing
        self.dropout1 = nn.Dropout(p=0.3)
        self.dropout2 = nn.Dropout(p=0.3)

        # Defining the fully connected layers of the net
        self.fc1 = nn.Linear(384, 64)  # 96*2*2 = 384
        self.fc2 = nn.Linear(64, 13)

    def forward(self, x):
        x = self.conv1(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(x)  # Relu because it's famous

        x = self.conv2(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(x)

        x = self.conv3(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(x)

        x = self.conv4(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(x)

        x = self.conv5(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(x)

        x = self.dropout1(x)

        x = x.view(-1, 384)  # Convert 2d data to 1d

        x = self.fc1(x)
        x = self.dropout2(x)
        x = F.relu(x)

        x = self.fc2(x)

        return torch.sigmoid(x)


## Training

In [5]:
def train(model, epoch, train_data, optimizer, criterion):
    model.train()    
    
    running_loss = 0.0
    for i in range(len(train_data)):
        data, target = train_data[i][0], train_data[i][1]
        target = torch.Tensor(target)
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()

        # zero the parameter gradients
        optimizer.zero_grad()
        
        out = model(data)
        loss = criterion(out, target)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0


## Validation

In [6]:
def validate(model, validation_data, criterion):
    model.eval()
    loss = 0
    correct = 0
    
    for i in range(len(validation_data)):
        data, target = validation_data[i][0], validation_data[i][1]
        target = torch.Tensor(target)
        
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
            
        out = model(data)

        loss += criterion(out, target).item()
        
        _, prediction = torch.max(out.data, 1)
        correct += (prediction == target.data).sum().item()

    loss = loss / len(validation_data)
    print("###################################")
    print("Average loss:", loss)
    print("Accuracy:", 100. * correct / len(validation_data))
    print("###################################")


## Testing

The test function is not really necessary. It is just for a better understanding and eventually testing the neural network manually.

In [7]:
def test(model, path):
    model.eval()
    files = os.listdir(path)
    file = random.choice(files)
    img = Image.open(os.path.join(path, file))
    img_eval_tensor = transform(img)
    img_eval_tensor.unsqueeze_(0)
    if torch.cuda.is_available():
        img_eval_tensor = img_eval_tensor.cuda()
    out = model(img_eval_tensor)
    print(torch.max(out))
    print(torch.max(out, 1)[0], torch.max(out, 1)[1])
    print(torch.max(out, 1, keepdim=True))
    print(CLASSES[out.data.max(1, keepdim=True)[1]])
    img.show()
    input("")


## Helper functions

At first all available files are added to a file list. In addition the label of the file will be generated and added to the list.

In [8]:
def read_files(path):
    file_list = []
    for d in os.listdir(path):
        if os.path.isdir(os.path.join(path, d)):
            for f in os.listdir(os.path.join(path, d)):
                file_list.append([os.path.join(path, d, f), hot_encode_label(d)])
    return file_list


Generating tensors from the file list, we just created. All files get mixed up randomly.

In [9]:
def generate_tensors(file_list, batch_size):
    data_list = []
    data = []
    label_list = []
    files_count = len(file_list)
    for i in range(files_count):
        file = random.choice(file_list)
        file_list.remove(file)
        
        img = Image.open(file[0])  # Index 0: filename, Index 1: label
        img_tensor = transform(img)
        data_list.append(img_tensor)
        label_list.append(file[1])
        
        if len(data_list) >= batch_size:
            data.append((torch.stack(data_list), label_list))
            data_list = []
            label_list = []
            
            #Statistics
            print("Loaded batch", len(data), "of", int(files_count / batch_size))
            print("Percentage Done:", len(data) / int(files_count / batch_size) * 100, "%")
            if len(data) >= 1:
                break
    return data


The label will be hot encoded, since there is only one possible class for each tile of the chessboard. 
This means that for example a black knight will be labeled by the vector: 

>\[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0\]

In [10]:
def hot_encode_label(label):
    index = CLASSES.index(label)
    vector = np.zeros(len(CLASSES), np.uint8)
    vector[index] = 1
    return vector


The save model function will save the state of a model after a specific epoch. 

In [11]:
def save_model(model, epoch):
    torch.save(model.state_dict(), "model/chess-net_{}.pt".format(epoch))
    print("Checkpoint saved")

## Main

Training and evaluating the ChessNet

In [12]:
def main():
    print("Loading Training Data")
    train_files = read_files("data/train_augmented")
    train_data = generate_tensors(train_files, 32)

    print("Loading Validation Data")
    validation_files = read_files("data/validation_augmented")
    validation_data = generate_tensors(validation_files, 32)

    model = ChessNet()
    
    # Activate cuda support if available
    if torch.cuda.is_available():
        model = model.cuda()

    # Check if model is already available
    # if os.path.isfile("model/chess-net.pt"):
    #    model = torch.load("model/chess-net.pt")

    # Defining the optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    # optimizer = optim.RMSprop(model.parameters(), lr=1e-3)
    
    criterion = F.binary_cross_entropy
    
    # Start training
    start = time.time()
    for epoch in range(1,5):
        train(model, epoch, train_data, optimizer, criterion)
        validate(model, validation_data, criterion)
        # test(model, "data/train_augmented/bn")
        save_model(model, epoch)
    end = time.time()
    print("Training of the neuroal network done.")
    print("Time spent:", end-start, "s")
    
    # Start validation
    # start = time.time()
    # validate(model, eval_data, criterion)
    # end = time.time()
    # print("Evaluation of the neuroal network done.")
    # print("Time:", end-start, "s")

In [13]:
if __name__ == "__main__":
    main()


Loading Training Data
Loaded batch 1 of 706
Percentage Done: 0.141643059490085 %
Loading Validation Data
Loaded batch 1 of 139
Percentage Done: 0.7194244604316548 %


RuntimeError: The size of tensor a (32) must match the size of tensor b (13) at non-singleton dimension 1