In [23]:
#Import packages
import sqlite3
import pandas as pd
import gym
import gym_chess
import chess
from gym_chess.alphazero import BoardEncoding
import numpy as np
import matplotlib.pyplot as plt



In [24]:
#Importing created modules
import os
import sys 

cwd = os.getcwd()
parent_directory = os.path.abspath(os.path.join(cwd, '..', '..'))
sys.path.append(parent_directory)

from utils.move_encoding import encode_move, decode_move, find_move
from utils.board_encoding import encode_board, fen_to_board


In [25]:
def find_move(fen1, fen2):
    # Create board objects from FEN positions
    # fen1 = x['fen']
    # fen2 = x['next_fen']
    
    board1 = chess.Board(fen1)
    board2 = chess.Board(fen2)

    # Find the move made between the two positions
    move = np.nan
    for possible_move in board1.legal_moves:
        # Make the move on a copy of the first board
        temp_board = board1.copy()
        temp_board.push(possible_move)
        
        # Compare the resulting FEN position with the second position
        if temp_board==board2:
            move = possible_move
            break

    return move


In [26]:
database = sqlite3.connect('../../data/test_data.db')
df = pd.read_sql_query("SELECT * FROM evaluations LIMIT 1000000", database)


In [27]:
df['board'] = df['fen'].apply(fen_to_board)
df['encoded_board'] = df['board'].apply(encode_board)
print(df['encoded_board'][0].shape)
print(df.columns)

(8, 8, 14)
Index(['id', 'fen', 'binary', 'eval', 'board', 'encoded_board'], dtype='object')


In [28]:
df['move'] = None
df['encoded_move'] = None

for i in range(len(df)-1):
    df.loc[i, 'move'] = find_move(df['fen'][i], df['fen'][i+1])
    try:
        df.loc[i, 'encoded_move'] = encode_move(df['move'][i])
    except:
        df.loc[i, 'encoded_move'] = None


In [None]:
df.dropna(subset=['encoded_move'],inplace=True)
df.reset_index(inplace=True, drop=True)

In [None]:
df.to_pickle('../../data/cleaned_data/cleaned_data_basic_encoding_1M.pkl')


In [19]:
#Test cell to see if encoding - decoding provides the same result
df['decoded_move'] = ""

for i in range(len(df)):
    df.loc[i, 'decoded_move'] = decode_move(df['encoded_move'][i])
    
df[['move', 'encoded_move', 'decoded_move']]

Unnamed: 0,move,encoded_move,decoded_move
0,d7d5,3299,d7d5
1,c2c4,666,c2c4
2,e7e6,3372,e7e6
3,c4d5,1699,c4d5
4,e6d5,2851,e6d5
...,...,...,...
94908,g8h7,4023,g8h7
94909,c8f5,3749,c8f5
94910,g7g6,3502,g7g6
94911,f5f7,2421,f5f7
