# load

In [1]:
import pickle

In [2]:

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
BASE_PATH = '/content/drive/MyDrive/NLP/Data/'
games_data_path = BASE_PATH + 'FEN/games_data'
NUMER_OF_DATA_DIRS = 13

In [4]:
paths = [f'{games_data_path}{i+1}.p' for i in range(NUMER_OF_DATA_DIRS)]

In [5]:
data = []
for path in paths:
    with open(path, 'rb') as file:
        raw_data = pickle.load(file)
    data.extend(raw_data)

In [6]:
len(data)

350027

In [7]:
len(list(set(data)))

349015

In [8]:
data = list(set(data))

# board

In [None]:
col_names = 'abcdefgh'
row_names = '87654321'
pieces = {'r': 'black rook',
          'n': 'black knight',
          'b': 'black bishop',
          'q': 'black queen', 
          'k': 'black king',
          'p': 'black pawn',
          'R': 'white rook',
          'N': 'white knight',
          'B': 'white bishop',
          'Q': 'white queen', 
          'K': 'white king',
          'P': 'white pawn'}

In [None]:
def board_to_text(board):
  text = ""
  rows = board.split("/")
  for inx, row in enumerate(row_names):
    row_values = rows[inx]
    col_num = 0
    for value in row_values:
      if value in '12345678':
        for i in range(int(value)):
          text += col_names[col_num] + row + ": , "
          col_num += 1
      elif value == '-':
        print('STRANGE!')
        text += col_names[col_num] + row + ": " + pieces['p'] + ", "
        col_num += 1
      else:
        text +=  col_names[col_num] + row + ": " + pieces[value] + ", "
        col_num += 1
  return text[:-2]

# move

In [9]:
def map_move_piece(x):
    if x=="K":
        return "king"
    elif x=="Q":
        return "queen"
    elif x=="R":
        return "rook"
    elif x=="B":
        return "bishop"
    elif x=="N":
        return "knight"
    else:
        return "pawn "+x

In [10]:
def parse_one_move(move):
  parsed = ""

  # Nxg2+
  if move[-1]=="+":
    parsed = "check: "
    move = move[:-1]

  # Rd2#
  elif move[-1]=="#":
    parse = "checkmate: "
    move = move[:-1]

  # O-O
  if move == 'O-O':
    parse+= 'castling'
    return parse

  len_move = len(move)
  
  if "x" not in move: 
    # h6 
    if len_move == 2:
      parsed+= "pawn "+ move
    
    # Ng4
    elif len_move == 3:
      parsed+=  map_move_piece(move[0]) + " " + move[1:]
    
    # Nbc6
    elif len_move == 4:
      parsed+=  map_move_piece(move[0]) + " " + move[1] + " " + move[2:]
    
    # Ra5a6
    elif len_move == 5:
      parsed+=  map_move_piece(move[0]) + " " + move[1:3] + " " + move[3:]

  else: # "x" in move
    # Qxf6
    if len_move == 4:
      parsed+=  map_move_piece(move[0]) + " takes " + move[2:]

    # Ngxe7, Nfxh7
    elif len_move == 5: 
      parsed+=  map_move_piece(move[0]) + " " + move[1] + " takes " + move[3:]

    # Re6xe7
    elif len_move == 6:
      parsed+=  map_move_piece(move[0]) + " " + move[1:3] + " takes " + move[4:]

  if parsed == "":
    print("strangeMove: ", move)
    return "<strangeMove>"

  return parsed

# python-chess

https://python-chess.readthedocs.io/en/latest/

In [None]:
!pip install python-chess

In [12]:
import chess

In [16]:
board = chess.Board('rnb1kb1r/3ppppp/5n2/qNpP4/8/8/PP2PPPP/R1BQKBNR w KQkq - 0 0')

In [None]:
print(board)

r n b . k b . r
. . . p p p p p
. . . . . n . .
q N p P . . . .
. . . . . . . .
. . . . . . . .
P P . . P P P P
R . B Q K B N R


In [None]:
if board.is_game_over():
  print("GAME OVER") 
elif board.is_stalemate():
  print("STALE situation")

In [None]:
board.turn

True

castling

In [None]:
castling_rights = ""
if board.has_castling_rights(chess.WHITE):
  castling_rights += "white"
if board.has_castling_rights(chess.BLACK):
  castling_rights += "black" if castling_rights == "" else " and black"

In [None]:
castling_rights

'white and black'

legal moves

In [13]:
def get_legal_moves(board):
  legal_moves = set()
  for move in board.legal_moves:
    uci_move = board.uci(move)
    legal_moves.add(uci_move)
    
  return legal_moves

In [17]:
get_legal_moves(board)

{'b2b4', 'b5c3', 'c1d2', 'd1d2'}

In [18]:
legal_moves = set()
for move in board.legal_moves: 
  legal_moves.add(board.san(board.parse_uci(str(move))))

In [19]:
legal_moves

{'Bd2', 'Nc3', 'Qd2', 'b4'}

In [20]:
# Pseudo-legal moves might leave or put the king in check, but are otherwise valid.
pseudo_legal_moves = set()
for move in board.pseudo_legal_moves: 
  pseudo_legal_moves.add(board.san(chess.Move.from_uci(str(move))))

In [21]:
pseudo_legal_moves

{'Bd2',
 'Be3',
 'Bf4',
 'Bg5',
 'Bh6',
 'Kd2',
 'Na3',
 'Na7',
 'Nc3',
 'Nc7+',
 'Nd4',
 'Nd6+',
 'Nf3',
 'Nh3',
 'Qa4',
 'Qb3',
 'Qc2',
 'Qd2',
 'Qd3',
 'Qd4',
 'Rb1',
 'a3',
 'a4',
 'b3',
 'b4',
 'd6',
 'e3',
 'e4',
 'f3',
 'f4',
 'g3',
 'g4',
 'h3',
 'h4'}

attack

In [22]:
# ranks = rows referred to by numbers. Files = columns.
def square_index(file_rank_string): 
  file_to_value = {"a":0, "b":1, "c":2, "d":3, "e":4 , "f":5, "g":6, "h":7}
  file, rank = file_rank_string[0], file_rank_string[1]
  file = file_to_value[file]
  index = chess.square(file_index=file, rank_index=int(rank)-1)
  return index

In [23]:
def getAttackers(board, square):
    if board.turn:
        player = chess.BLACK  # board.turn == True if White's turn, so Black did the last move.
    else:
        player = chess.WHITE
    
    attackers = board.attackers(player, square_index(square))
    attacks = board.attacks(square_index(square))
    
    attackers_list = [chess.square_name(attacker) for attacker in attackers]
    attacks_list = [chess.square_name(attack) for attack in attacks]

    return attackers_list, attacks_list

In [24]:
attackers_list, attacks_list = getAttackers(board, 'a5')

In [25]:
square_with_pieces = [chess.square_name(key) for key in board.piece_map().keys()]

In [26]:
attacks_with_pieces = list(set(attacks_list).intersection(set(square_with_pieces)))

In [27]:
attacks_with_pieces

['a8', 'e1', 'a2', 'b5']

check

In [None]:
# board.checkers() 
# board.gives_check(legal_moves)

In [None]:
board.is_check()

True

In [None]:
board.is_checkmate()

False

# save new data

In [28]:
from tqdm import tqdm 

In [29]:
new_data = []
counter = 0

In [None]:
for trio in tqdm(data, position=0, leave=True):
  FEN = trio[0][:trio[0].find("|")]
  moves = trio[1]
  comment = trio[2].lower()

  # last move 
  last_move_UCI =  trio[0][trio[0].find("=")+1:]
  last_move_SAN = moves.split(',')[-1].replace(" ", "")
  last_move_desc = parse_one_move(last_move_SAN) if moves!='' else ''

  # board 
  if '3-4' in FEN:
    print("STRANGE!")  # one sample with bug..
    FEN = FEN.replace('3-4', '3p4')

  # board_desc = board_to_text(FEN[:FEN.find(' ')])

  board = chess.Board(FEN+' 0 0')

  # if board.turn:
  #   turn = "white"
  # else:
  #   turn = "black"

  # legal moves 
  legal_moves = set()
  for move in board.legal_moves: 
    legal_moves.add(board.san(board.parse_uci(str(move))))
  legal_moves = list(legal_moves)
  legal_moves = ", ".join(legal_moves)
  
  # attack
  attackers_list, attacks_list = getAttackers(board, last_move_UCI[-2:])
  
  square_with_pieces = [chess.square_name(key) for key in board.piece_map().keys()]
  attacks_list = list(set(attacks_list).intersection(set(square_with_pieces)))

  attackers_list, attacks_list = ", ".join(attackers_list), ", ".join(attacks_list)

  #sample = (FEN, board_desc, moves, last_move_desc, turn, legal_moves, attackers_list, attacks_list, comment)
  sample = (FEN, moves, last_move_desc, legal_moves, attackers_list, attacks_list, comment)

  new_data.append(sample)

  counter += 1
  if counter%30000==0:
    saved = str(int(counter/30000))
    pickle.dump(new_data, open(BASE_PATH+"NEW_fix/games_data"+saved+".p","wb"))
    new_data = []

In [31]:
len(new_data)

19015

In [32]:
# don't forget save leftovers ;)
saved = '12'
pickle.dump(new_data, open(BASE_PATH+"NEW_fix/games_data"+saved+".p","wb"))

check if new data looks ok:

In [33]:
new_games_data_path = BASE_PATH + 'NEW_fix/games_data'
new_NUMER_OF_DATA_DIRS = 12

In [34]:
new_paths = [f'{new_games_data_path}{i+1}.p' for i in range(new_NUMER_OF_DATA_DIRS)]

In [35]:
length = []
upgraded_data = []
for path in new_paths:
    with open(path, 'rb') as file:
        upgraded_raw_data = pickle.load(file)
    length.append(len(upgraded_raw_data))
    upgraded_data.extend(upgraded_raw_data)

In [36]:
length

[30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 19015]

In [37]:
len(upgraded_data)

349015

In [38]:
upgraded_data[66]

('8/8/p2b1pp1/1p1kp2p/P2P1P1P/2PK2P1/1B6/8 w - -',
 'e5',
 'pawn e5',
 'Kd2, Kc2, g4, Ba1, c4+, f5, a5, dxe5, axb5, Bc1, fxe5, Ke2, Ke3, Ba3',
 'd5, d6, f6',
 'f4, d4',
 'black continues with his plan.')

In [39]:
upgraded_data[654]

('r6r/ppknp2p/2p1bpp1/8/4P3/P3NP2/1PP1B1PP/2KR3R b - -',
 'Be2',
 'bishop e2',
 'Rad8, Bf5, Bg8, Raf8, c5, Kb6, Kd8, Kc8, b6, Bf7, Ne5, Bh3, Rab8, h6, Nf8, Bg4, h5, f5, a5, Bb3, Rae8, Nc5, a6, Rac8, Nb6, Ba2, g5, Rhd8, Bc4, Nb8, Rag8, b5, Rhe8, Rhc8, Bd5, Kb8, Rhg8, Rhb8, Rhf8',
 '',
 'd1, f3',
 'the only move, in all variations... complete development... the temporary closure of the d-file gives us (both) precious time to re-organize, and use the half-open d-file to our benefits... black has to start a new plan...')

In [40]:
upgraded_data[39000]

('2rr2k1/1b3ppp/p1q1p3/1pBpP3/1Pn2P1Q/P2B4/2P3PP/R4RK1 b - -',
 'Rfd8, Qh4',
 'queen h4',
 'Qxc5+, Rd6, Qb6, Ne3, g6, f6, Nb2, Qe8, Nd6, Ra8, Rd7, Re8, Rb8, Na5, h6, Qd7, h5, f5, a5, Nb6, g5, d4, Rc7, Nd2, Nxe5, Qd6, Nxa3, Kh8, Qc7, Rf8, Ba8',
 '',
 'd8, h7, f4, h2',
 'threatening qxh7 checkmate,  of course.')

In [41]:
upgraded_data[66666]

('r1bqkb1r/pp3ppp/2nppn2/2p3N1/Q1B1P3/2PP4/PP3PPP/RNB2RK1 b kq -',
 'Qa4',
 'queen a4',
 'd5, Qb6, Nh5, Ng8, g6, Ng4, b6, Kd7, Rb8, h6, Qe7, Qd7, Ke7, Qa5, h5, a5, a6, Rg8, Nxe4, b5, Bd7, Nd5, e5, Nd7, Be7, Qc7',
 '',
 'c6, a7, c4, a2',
 'para ano? e di dito sa kabila.')

In [42]:
upgraded_data[111111]

('8/p3R3/1p3p1k/2p3rp/7r/5K1P/8/8 w - -',
 'fxg5+, Rxg5+, Kf3, Rxh4',
 'rook takes h4',
 'Re4, Rc7, Rg7, Ke3, Kf2, Re2, Re1, Rxa7, Re6, Rd7, Re5, Re8, Rh7+, Re3, Ke2, Rb7, Rf7',
 '',
 'h5, h3',
 '44............. rxh4 - now i am the equivalent of a queen up but still no white flag!')