# load

In [None]:
import pickle

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
BASE_PATH = '/content/drive/MyDrive/NLP/Data/'
games_data_path = BASE_PATH + 'FEN/games_data'
NUMER_OF_DATA_DIRS = 13

In [None]:
paths = [f'{games_data_path}{i+1}.p' for i in range(NUMER_OF_DATA_DIRS)]

In [None]:
data = []
for path in paths:
    with open(path, 'rb') as file:
        raw_data = pickle.load(file)
    data.extend(raw_data)

In [None]:
len(data)

350027

In [None]:
len(list(set(data)))

349015

In [None]:
data = list(set(data))

# board

In [None]:
col_names = 'abcdefgh'
row_names = '87654321'
pieces = {'r': 'black rook',
          'n': 'black knight',
          'b': 'black bishop',
          'q': 'black queen', 
          'k': 'black king',
          'p': 'black pawn',
          'R': 'white rook',
          'N': 'white knight',
          'B': 'white bishop',
          'Q': 'white queen', 
          'K': 'white king',
          'P': 'white pawn'}

In [None]:
def board_to_text(board):
  text = ""
  rows = board.split("/")
  for inx, row in enumerate(row_names):
    row_values = rows[inx]
    col_num = 0
    for value in row_values:
      if value in '12345678':
        for i in range(int(value)):
          text += col_names[col_num] + row + ": , "
          col_num += 1
      elif value == '-':
        print('STRANGE!')
        text += col_names[col_num] + row + ": " + pieces['p'] + ", "
        col_num += 1
      else:
        text +=  col_names[col_num] + row + ": " + pieces[value] + ", "
        col_num += 1
  return text[:-2]

# move

In [None]:
def map_move_piece(x):
    if x=="K":
        return "king"
    elif x=="Q":
        return "queen"
    elif x=="R":
        return "rook"
    elif x=="B":
        return "bishop"
    elif x=="N":
        return "knight"
    else:
        return "pawn "+x

In [None]:
def parse_one_move(move):
  parsed = ""

  # Nxg2+
  if move[-1]=="+":
    parsed = "check: "
    move = move[:-1]

  # Rd2#
  elif move[-1]=="#":
    parse = "checkmate: "
    move = move[:-1]

  # O-O
  if move == 'O-O':
    parse+= 'castling'
    return parse

  len_move = len(move)
  
  if "x" not in move: 
    # h6 
    if len_move == 2:
      parsed+= "pawn "+ move
    
    # Ng4
    elif len_move == 3:
      parsed+=  map_move_piece(move[0]) + " " + move[1:]
    
    # Nbc6
    elif len_move == 4:
      parsed+=  map_move_piece(move[0]) + " " + move[1] + " " + move[2:]
    
    # Ra5a6
    elif len_move == 5:
      parsed+=  map_move_piece(move[0]) + " " + move[1:3] + " " + move[3:]

  else: # "x" in move
    # Qxf6
    if len_move == 4:
      parsed+=  map_move_piece(move[0]) + " takes " + move[2:]

    # Ngxe7, Nfxh7
    elif len_move == 5: 
      parsed+=  map_move_piece(move[0]) + " " + move[1] + " takes " + move[3:]

    # Re6xe7
    elif len_move == 6:
      parsed+=  map_move_piece(move[0]) + " " + move[1:3] + " takes " + move[4:]

  if parsed == "":
    print("strangeMove: ", move)
    return "<strangeMove>"

  return parsed

# python-chess

https://python-chess.readthedocs.io/en/latest/

In [None]:
!pip install python-chess



In [None]:
import chess

In [None]:
board = chess.Board('rnb1kb1r/3ppppp/5n2/qNpP4/8/8/PP2PPPP/R1BQKBNR w KQkq - 0 0')

In [None]:
print(board)

r n b . k b . r
. . . p p p p p
. . . . . n . .
q N p P . . . .
. . . . . . . .
. . . . . . . .
P P . . P P P P
R . B Q K B N R


In [None]:
if board.is_game_over():
  print("GAME OVER") 
elif board.is_stalemate():
  print("STALE situation")

In [None]:
board.turn

True

castling

In [None]:
castling_rights = ""
if board.has_castling_rights(chess.WHITE):
  castling_rights += "white"
if board.has_castling_rights(chess.BLACK):
  castling_rights += "black" if castling_rights == "" else " and black"

In [None]:
castling_rights

'white and black'

legal moves

In [None]:
def get_legal_moves(board):
  legal_moves = set()
  for move in board.legal_moves:
    uci_move = board.uci(move)
    legal_moves.add(uci_move)
    
  return legal_moves

In [None]:
get_legal_moves(board)

{'b2b4', 'b5c3', 'c1d2', 'd1d2'}

In [None]:
legal_moves = set()
for move in board.legal_moves: 
  legal_moves.add(board.san(board.parse_uci(str(move))))

In [None]:
legal_moves

{'Bd2', 'Nc3', 'Qd2', 'b4'}

In [None]:
# Pseudo-legal moves might leave or put the king in check, but are otherwise valid.
pseudo_legal_moves = set()
for move in board.pseudo_legal_moves: 
  pseudo_legal_moves.add(board.san(chess.Move.from_uci(str(move))))

In [None]:
pseudo_legal_moves

{'Bd2',
 'Be3',
 'Bf4',
 'Bg5',
 'Bh6',
 'Kd2',
 'Na3',
 'Na7',
 'Nc3',
 'Nc7+',
 'Nd4',
 'Nd6+',
 'Nf3',
 'Nh3',
 'Qa4',
 'Qb3',
 'Qc2',
 'Qd2',
 'Qd3',
 'Qd4',
 'Rb1',
 'a3',
 'a4',
 'b3',
 'b4',
 'd6',
 'e3',
 'e4',
 'f3',
 'f4',
 'g3',
 'g4',
 'h3',
 'h4'}

attack

In [None]:
# ranks = rows referred to by numbers. Files = columns.
def square_index(file_rank_string): 
  file_to_value = {"a":0, "b":1, "c":2, "d":3, "e":4 , "f":5, "g":6, "h":7}
  file, rank = file_rank_string[0], file_rank_string[1]
  file = file_to_value[file]
  index = chess.square(file_index=file, rank_index=int(rank)-1)
  return index

In [None]:
def getAttackers(board, square):
    if board.turn:
        player = chess.BLACK  # board.turn == True if White's turn, so Black did the last move.
    else:
        player = chess.WHITE
    
    attackers = board.attackers(player, square_index(square))
    attacks = board.attacks(square_index(square))
    
    attackers_list = [chess.square_name(attacker) for attacker in attackers]
    attacks_list = [chess.square_name(attack) for attack in attacks]

    return attackers_list, attacks_list

In [None]:
getAttackers(board, 'a5')

check

In [None]:
# board.checkers() 
# board.gives_check(legal_moves)

In [None]:
board.is_check()

True

In [None]:
board.is_checkmate()

False

# save new data

In [None]:
from tqdm import tqdm 

In [None]:
new_data = []
counter = 0

In [None]:
for trio in tqdm(data, position=0, leave=True):
  FEN = trio[0][:trio[0].find("|")]
  moves = trio[1]
  comment = trio[2].lower()

  # last move 
  last_move_UCI =  trio[0][trio[0].find("=")+1:]
  last_move_SAN = moves.split(',')[-1].replace(" ", "")
  last_move_desc = parse_one_move(last_move_SAN) if moves!='' else ''

  # board 
  if '3-4' in FEN:
    print("STRANGE!")  # one sample with bug..
    FEN = FEN.replace('3-4', '3p4')

  # board_desc = board_to_text(FEN[:FEN.find(' ')])

  board = chess.Board(FEN+' 0 0')

  # if board.turn:
  #   turn = "white"
  # else:
  #   turn = "black"

  # legal moves 
  legal_moves = set()
  for move in board.legal_moves: 
    legal_moves.add(board.san(board.parse_uci(str(move))))
  legal_moves = list(legal_moves)
  legal_moves = ", ".join(legal_moves)
  
  # attack
  attackers_list, attacks_list = getAttackers(board, last_move_UCI[-2:])
  attackers_list, attacks_list = ", ".join(attackers_list), ", ".join(attacks_list)

  #sample = (FEN, board_desc, moves, last_move_desc, turn, legal_moves, attackers_list, attacks_list, comment)
  sample = (FEN, moves, last_move_desc, legal_moves, attackers_list, attacks_list, comment)

  new_data.append(sample)

  counter += 1
  if counter%30000==0:
    saved = str(int(counter/30000))
    pickle.dump(new_data, open(BASE_PATH+"NEW/games_data"+saved+".p","wb"))
    new_data = []

 16%|█▌        | 56640/349015 [01:40<08:52, 548.73it/s]

STRANGE!


100%|██████████| 349015/349015 [10:23<00:00, 559.49it/s]


In [None]:
len(new_data)

19015

In [None]:
# don't forget to save leftovers ;)
saved = '12'
pickle.dump(new_data, open(BASE_PATH+"NEW/games_data"+saved+".p","wb"))

check if new data looks ok:

In [None]:
new_games_data_path = BASE_PATH + 'NEW/games_data'
new_NUMER_OF_DATA_DIRS = 12

In [None]:
new_paths = [f'{new_games_data_path}{i+1}.p' for i in range(new_NUMER_OF_DATA_DIRS)]

In [None]:
length = []
upgraded_data = []
for path in new_paths:
    with open(path, 'rb') as file:
        upgraded_raw_data = pickle.load(file)
    length.append(len(upgraded_raw_data))
    upgraded_data.extend(upgraded_raw_data)

In [None]:
length

[30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 19015]

In [None]:
len(upgraded_data)

349015

In [None]:
upgraded_data[39000]

('6k1/p1R3p1/bp1N4/3rB3/8/4R3/PPK2PP1/3r4 b - -',
 'Be5',
 'bishop e5',
 'Rh1, R5d3, Rxe5, Rg1, Rc5+, Bb7, Kh8, Re1, R1d4, Kh7, Rb1, Rxd6, R1d2+, Kf8, Bf1, Bb5, R5d4, Bc8, Ra5, g5, Ra1, Rb5, R1d3, Bc4, Rc1+, g6, b5, Rf1, Be2, R5d2+, Bd3+',
 'e3',
 'b2, h2, c3, g3, d4, f4, d6, f6, g7',
 '"forces a trade or so i felt". i agree, i would have played this. fritz prefers 34. rxa7 but it is getting academic white is a clear knight and pawn up and black would probably resign, if it wasn\'t for the fact that it is was a very close match.')

In [None]:
upgraded_data[111111]

('2k5/5rp1/2p5/4n3/Np2P1P1/3P2K1/P2R4/8 w - -',
 'Kg3, Nxe5',
 'knight takes e5',
 'Rd1, Rf2, Kh4, Nb6+, Re2, Rb2, Kg2, Nb2, Rc2, Kh3, Rh2, g5, Nc5, Kh2, a3, Rg2, d4, Nc3',
 '',
 'd3, f3, c4, g4, c6, g6, d7, f7',
 '1 point down??? uh, it was a long time ago, when i was in such a good! position...')