In [2]:
import pandas as pd
import chess
import numpy as np
import tensorflow as tf
import keras
from keras.layers import Dense, Flatten, Conv2D
from keras import Model
from sklearn.model_selection import train_test_split

In [3]:
chess_dataset = pd.read_csv("games.csv")

In [4]:
chess_dataset.head()

Unnamed: 0,id,rated,created_at,last_move_at,turns,victory_status,winner,increment_code,white_id,white_rating,black_id,black_rating,moves,opening_eco,opening_name,opening_ply
0,TZJHLljE,False,1504210000000.0,1504210000000.0,13,outoftime,white,15+2,bourgris,1500,a-00,1191,d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5...,D10,Slav Defense: Exchange Variation,5
1,l1NXvwaE,True,1504130000000.0,1504130000000.0,16,resign,black,5+10,a-00,1322,skinnerua,1261,d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6...,B00,Nimzowitsch Defense: Kennedy Variation,4
2,mIICvQHh,True,1504130000000.0,1504130000000.0,61,mate,white,5+10,ischia,1496,a-00,1500,e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc...,C20,King's Pawn Game: Leonardis Variation,3
3,kWKvrqYL,True,1504110000000.0,1504110000000.0,61,mate,white,20+0,daniamurashov,1439,adivanov2009,1454,d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O...,D02,Queen's Pawn Game: Zukertort Variation,3
4,9tXo1AUZ,True,1504030000000.0,1504030000000.0,95,mate,white,30+3,nik221107,1523,adivanov2009,1469,e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N...,C41,Philidor Defense,5


In [5]:
chess_dataset = chess_dataset[["rated", "victory_status", "winner", "white_rating", "black_rating", "moves", "opening_name"]]

In [6]:
chess_dataset.head()

Unnamed: 0,rated,victory_status,winner,white_rating,black_rating,moves,opening_name
0,False,outoftime,white,1500,1191,d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5...,Slav Defense: Exchange Variation
1,True,resign,black,1322,1261,d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6...,Nimzowitsch Defense: Kennedy Variation
2,True,mate,white,1496,1500,e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc...,King's Pawn Game: Leonardis Variation
3,True,mate,white,1439,1454,d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O...,Queen's Pawn Game: Zukertort Variation
4,True,mate,white,1523,1469,e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N...,Philidor Defense


In [7]:
print(chess_dataset["rated"].value_counts())

True     16155
False     3903
Name: rated, dtype: int64


In [8]:
chess_dataset = chess_dataset[chess_dataset["rated"] == True]
print(chess_dataset.shape[0])

16155


In [9]:
chess_dataset.describe()

Unnamed: 0,white_rating,black_rating
count,16155.0,16155.0
mean,1597.102012,1593.503188
std,284.178397,289.62716
min,784.0,789.0
25%,1396.0,1390.0
50%,1577.0,1573.0
75%,1794.0,1791.0
max,2622.0,2588.0


In [10]:
chess_dataset = chess_dataset[chess_dataset["white_rating"] > 1300]
chess_dataset.shape[0]

13732

In [11]:
chess_dataset = chess_dataset[chess_dataset["victory_status"] != "outoftime"]
chess_dataset.shape[0]

12574

In [12]:
moves_data = chess_dataset[["winner", "moves"]].copy()
win_category = pd.api.types.CategoricalDtype(categories=["black", "white"], ordered=True)
moves_data["winner"] = moves_data["winner"].astype(win_category).cat.codes
moves_data.head()

Unnamed: 0,winner,moves
1,0,d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6...
2,1,e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc...
3,1,d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O...
4,1,e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N...
6,1,d4 d5 e4 dxe4 Nc3 Nf6 f3 exf3 Nxf3 Nc6 Bb5 a6 ...


White pieces:
pawn, knight, bishop, rook, queen, king - 1, 2, 3, 4, 5, 6
Black pieces:
same, plus 6
Empty tile - 0

In [13]:
# set up the starting board
starting_board = [[0 for _ in range(0)] for i in range(8)]
starting_board[0] = [4, 2, 3, 5, 6, 3, 2, 4]
starting_board[1] = [1 for _ in range(8)]
starting_board[6] = [7 for _ in range(8)]
starting_board[7] = [10, 8, 9, 11, 12, 9, 8, 10]
print(starting_board)

[[4, 2, 3, 5, 6, 3, 2, 4], [1, 1, 1, 1, 1, 1, 1, 1], [], [], [], [], [7, 7, 7, 7, 7, 7, 7, 7], [10, 8, 9, 11, 12, 9, 8, 10]]


In [14]:
# somehow do the moves
piece_values = {"P": 1, "N": 2, "B": 3, "R": 4, "Q": 5, "K": 6,
                "p": 7, "n": 8, "b": 9, "r": 10, "q": 11, "k": 12}

In [15]:
board = chess.Board()
board.fen()

'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'

In [65]:
def rewrite_board(b):
    #board_rep = [[0 for c in range(8)] for _ in range(8)]
    board_rep = np.zeros((8, 8))
    fen = b.fen()
    pieces = fen.split(" ")
    rows = pieces[0].split("/")
    for index, row in enumerate(rows):
        i = 0
        for letter in row:
            if letter in "12345678":
                i += int(letter)
            else:
                board_rep[7-index, i] = piece_values[letter]
                i += 1
    return board_rep

In [49]:
positions = []
moves = []
player = []
wins = []

In [50]:
# position, move, player, do they win?
def add_to_data(positions, moves, player, wins, row):
    moves_in_game = row["moves"].split(" ")
    color = 1
    pos = chess.Board()
    for move in moves_in_game:
        # add board_position, add move, player, win
        player.append(color)
        color += 1
        color %= 2
        wins.append(row["winner"])
        b = rewrite_board(pos)
        positions.append(np.array(b))
        moves.append(move)
        pos.push_san(move)

In [51]:
for index, row in moves_data.iterrows():
    add_to_data(positions, moves, player, wins, row)

In [52]:
print(len(positions))

777981


In [53]:
print(positions[2])

[[ 4.  2.  3.  5.  6.  3.  2.  4.]
 [ 1.  1.  1.  0.  1.  1.  1.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  8.  0.  0.  0.  0.  0.]
 [ 7.  7.  7.  7.  7.  7.  7.  7.]
 [10.  0.  9. 11. 12.  9.  8. 10.]]


In [54]:
d = {"position": positions, "move": moves, "player": player, "winner": wins}
positions_data = pd.DataFrame(d)

In [55]:
positions_data.head()

Unnamed: 0,position,move,player,winner
0,"[[4.0, 2.0, 3.0, 5.0, 6.0, 3.0, 2.0, 4.0], [1....",d4,1,0
1,"[[4.0, 2.0, 3.0, 5.0, 6.0, 3.0, 2.0, 4.0], [1....",Nc6,0,0
2,"[[4.0, 2.0, 3.0, 5.0, 6.0, 3.0, 2.0, 4.0], [1....",e4,1,0
3,"[[4.0, 2.0, 3.0, 5.0, 6.0, 3.0, 2.0, 4.0], [1....",e5,0,0
4,"[[4.0, 2.0, 3.0, 5.0, 6.0, 3.0, 2.0, 4.0], [1....",f4,1,0


In [56]:
positions_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 777981 entries, 0 to 777980
Data columns (total 4 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   position  777981 non-null  object
 1   move      777981 non-null  object
 2   player    777981 non-null  int64 
 3   winner    777981 non-null  int64 
dtypes: int64(2), object(2)
memory usage: 23.7+ MB


In [57]:
# Define the model architecture
model = tf.keras.Sequential()

# Add a hidden layer with 64 neurons and ReLU activation
model.add(tf.keras.layers.Dense(64, activation='relu'))

# Add a hidden layer with 32 neurons and ReLU activation
model.add(tf.keras.layers.Dense(32, activation='relu'))

# Add an output layer with a sigmoid activation function, which will
# predict the probability that the current player will win the game
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

# Compile the model with an Adam optimizer and binary cross-entropy loss
model.compile(optimizer='adam', loss='binary_crossentropy')


In [58]:
pos_data = positions_data[["position", "player", "winner"]]

In [59]:
pos_data.head()

Unnamed: 0,position,player,winner
0,"[[4.0, 2.0, 3.0, 5.0, 6.0, 3.0, 2.0, 4.0], [1....",1,0
1,"[[4.0, 2.0, 3.0, 5.0, 6.0, 3.0, 2.0, 4.0], [1....",0,0
2,"[[4.0, 2.0, 3.0, 5.0, 6.0, 3.0, 2.0, 4.0], [1....",1,0
3,"[[4.0, 2.0, 3.0, 5.0, 6.0, 3.0, 2.0, 4.0], [1....",0,0
4,"[[4.0, 2.0, 3.0, 5.0, 6.0, 3.0, 2.0, 4.0], [1....",1,0


In [60]:
random_seed = 7
Xtrain, Xtest, ytrain, ytest = train_test_split(pos_data.drop(columns=["winner"]), pos_data["winner"], test_size = 0.3, random_state=random_seed)

In [61]:
def choose_best_move(board, color):
    # finish this ->
    best_val = 0
    best_move = ""
    for move in list(board.legal_moves):
        new_board = board
        new_board.push_san(move)
        # Transform the position into a suitable input for the model
        x = rewrite_board(new_board)

        # Make a prediction using the model
        y_pred = model.predict(x)
        if y_pred >= best_val:
            best_val = y_pred
            best_move = move

    # Return the move with the highest predicted probability
    # return np.argmax(y_pred)
    return best_move

In [64]:
Xtrain.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 544586 entries, 633331 to 585903
Data columns (total 2 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   position  544586 non-null  object
 1   player    544586 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 12.5+ MB


In [62]:
model.fit(Xtrain, ytrain, epochs=10)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).