# load

In [None]:
import pickle

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
BASE_PATH = '/content/drive/MyDrive/NLP/Data/'
games_data_path = BASE_PATH + 'FEN/games_data'
NUMER_OF_DATA_DIRS = 13

In [None]:
paths = [f'{games_data_path}{i+1}.p' for i in range(NUMER_OF_DATA_DIRS)]

In [None]:
data = []
for path in paths:
    with open(path, 'rb') as file:
        raw_data = pickle.load(file)
    data.extend(raw_data)

In [None]:
len(data)

350027

In [None]:
len(list(set(data)))

349015

In [None]:
data = list(set(data))

# board

In [None]:
col_names = 'abcdefgh'
row_names = '87654321'
pieces = {'r': 'black rook',
          'n': 'black knight',
          'b': 'black bishop',
          'q': 'black queen', 
          'k': 'black king',
          'p': 'black pawn',
          'R': 'white rook',
          'N': 'white knight',
          'B': 'white bishop',
          'Q': 'white queen', 
          'K': 'white king',
          'P': 'white pawn'}

In [None]:
def board_to_text(board):
  text = ""
  rows = board.split("/")
  for inx, row in enumerate(row_names):
    row_values = rows[inx]
    col_num = 0
    for value in row_values:
      if value in '12345678':
        for i in range(int(value)):
          text += col_names[col_num] + row + ": , "
          col_num += 1
      elif value == '-':
        print('STRANGE!')
        text += col_names[col_num] + row + ": " + pieces['p'] + ", "
        col_num += 1
      else:
        text +=  col_names[col_num] + row + ": " + pieces[value] + ", "
        col_num += 1
  return text[:-2]

# move

In [None]:
def map_move_piece(x):
    if x=="K":
        return "king"
    elif x=="Q":
        return "queen"
    elif x=="R":
        return "rook"
    elif x=="B":
        return "bishop"
    elif x=="N":
        return "knight"
    else:
        return "pawn "+x

In [None]:
def parse_one_move(move):
  parsed = ""

  # Nxg2+
  if move[-1]=="+":
    parsed = "check: "
    move = move[:-1]

  # Rd2#
  elif move[-1]=="#":
    parse = "checkmate: "
    move = move[:-1]

  # O-O
  if move == 'O-O':
    parse+= 'castling'
    return parse

  len_move = len(move)
  
  if "x" not in move: 
    # h6 
    if len_move == 2:
      parsed+= "pawn "+ move
    
    # Ng4
    elif len_move == 3:
      parsed+=  map_move_piece(move[0]) + " " + move[1:]
    
    # Nbc6
    elif len_move == 4:
      parsed+=  map_move_piece(move[0]) + " " + move[1] + " " + move[2:]
    
    # Ra5a6
    elif len_move == 5:
      parsed+=  map_move_piece(move[0]) + " " + move[1:3] + " " + move[3:]

  else: # "x" in move
    # Qxf6
    if len_move == 4:
      parsed+=  map_move_piece(move[0]) + " takes " + move[2:]

    # Ngxe7, Nfxh7
    elif len_move == 5: 
      parsed+=  map_move_piece(move[0]) + " " + move[1] + " takes " + move[3:]

    # Re6xe7
    elif len_move == 6:
      parsed+=  map_move_piece(move[0]) + " " + move[1:3] + " takes " + move[4:]

  if parsed == "":
    print("strangeMove: ", move)
    return "<strangeMove>"

  return parsed

# python-chess

https://python-chess.readthedocs.io/en/latest/

In [None]:
!pip install python-chess



In [None]:
import chess

In [None]:
board = chess.Board('rnb1kb1r/3ppppp/5n2/qNpP4/8/8/PP2PPPP/R1BQKBNR w KQkq - 0 0')

In [None]:
print(board)

r n b . k b . r
. . . p p p p p
. . . . . n . .
q N p P . . . .
. . . . . . . .
. . . . . . . .
P P . . P P P P
R . B Q K B N R


In [None]:
if board.is_game_over():
  print("GAME OVER") 
elif board.is_stalemate():
  print("STALE situation")

In [None]:
board.turn

True

castling

In [None]:
castling_rights = ""
if board.has_castling_rights(chess.WHITE):
  castling_rights += "white"
if board.has_castling_rights(chess.BLACK):
  castling_rights += "black" if castling_rights == "" else " and black"

In [None]:
castling_rights

'white and black'

legal moves

In [None]:
def get_legal_moves(board):
  legal_moves = set()
  for move in board.legal_moves:
    uci_move = board.uci(move)
    legal_moves.add(uci_move)
    
  return legal_moves

In [None]:
get_legal_moves(board)

{'b2b4', 'b5c3', 'c1d2', 'd1d2'}

In [None]:
legal_moves = set()
for move in board.legal_moves: 
  legal_moves.add(board.san(board.parse_uci(str(move))))

In [None]:
legal_moves

{'Bd2', 'Nc3', 'Qd2', 'b4'}

In [None]:
# Pseudo-legal moves might leave or put the king in check, but are otherwise valid.
pseudo_legal_moves = set()
for move in board.pseudo_legal_moves: 
  pseudo_legal_moves.add(board.san(chess.Move.from_uci(str(move))))

In [None]:
pseudo_legal_moves

{'Bd2',
 'Be3',
 'Bf4',
 'Bg5',
 'Bh6',
 'Kd2',
 'Na3',
 'Na7',
 'Nc3',
 'Nc7+',
 'Nd4',
 'Nd6+',
 'Nf3',
 'Nh3',
 'Qa4',
 'Qb3',
 'Qc2',
 'Qd2',
 'Qd3',
 'Qd4',
 'Rb1',
 'a3',
 'a4',
 'b3',
 'b4',
 'd6',
 'e3',
 'e4',
 'f3',
 'f4',
 'g3',
 'g4',
 'h3',
 'h4'}

attack

In [None]:
# ranks = rows referred to by numbers. Files = columns.
def square_index(file_rank_string): 
  file_to_value = {"a":0, "b":1, "c":2, "d":3, "e":4 , "f":5, "g":6, "h":7}
  file, rank = file_rank_string[0], file_rank_string[1]
  file = file_to_value[file]
  index = chess.square(file_index=file, rank_index=int(rank)-1)
  return index

In [None]:
def getAttackers(board, square):
    if board.turn:
        player = chess.BLACK  # board.turn == True if White's turn, so Black did the last move.
    else:
        player = chess.WHITE
    
    attackers = board.attackers(player, square_index(square))
    attacks = board.attacks(square_index(square))
    
    attackers_list = [chess.square_name(attacker) for attacker in attackers]
    attacks_list = [chess.square_name(attack) for attack in attacks]

    return attackers_list, attacks_list

In [None]:
getAttackers(board, 'a5')

(['a8'],
 ['e1',
  'a2',
  'd2',
  'a3',
  'c3',
  'a4',
  'b4',
  'b5',
  'a6',
  'b6',
  'a7',
  'c7',
  'a8',
  'd8'])

check

In [None]:
# board.checkers() 
# board.gives_check(legal_moves)

In [None]:
board.is_check()

True

In [None]:
board.is_checkmate()

False

# save new data

In [None]:
from tqdm import tqdm 

In [None]:
new_data = []
counter = 0

In [None]:
for trio in tqdm(data, position=0, leave=True):
  FEN = trio[0][:trio[0].find("|")]
  moves = trio[1]
  comment = trio[2].lower()

  # last move 
  last_move_UCI =  trio[0][trio[0].find("=")+1:]
  last_move_SAN = moves.split(',')[-1].replace(" ", "")
  last_move_desc = parse_one_move(last_move_SAN) if moves!='' else ''

  # board 
  if '3-4' in FEN:
    print("STRANGE!")  # one sample with bug..
    FEN = FEN.replace('3-4', '3p4')

  board_desc = board_to_text(FEN[:FEN.find(' ')])

  board = chess.Board(FEN+' 0 0')

  if board.turn:
    turn = "white"
  else:
    turn = "black"

  # legal moves 
  legal_moves = set()
  for move in board.legal_moves: 
    legal_moves.add(board.san(board.parse_uci(str(move))))
  legal_moves = list(legal_moves)

  # attack
  attackers_list, attacks_list = getAttackers(board, last_move_UCI[-2:])

  sample = (FEN, board_desc, moves, last_move_desc, turn, legal_moves, attackers_list, attacks_list, comment)

  new_data.append(sample)

  counter += 1
  if counter%30000==0:
    saved = str(int(counter/30000))
    pickle.dump(new_data, open(BASE_PATH+"upgraded/games_data"+saved+".p","wb"))
    new_data = []

 24%|██▍       | 83764/349015 [02:34<08:13, 537.38it/s]

STRANGE!


100%|██████████| 349015/349015 [10:50<00:00, 536.90it/s]


In [None]:
len(new_data)

19015

In [None]:
# don't forget save leftovers ;)
saved = '12'
pickle.dump(new_data, open(BASE_PATH+"upgraded/games_data"+saved+".p","wb"))

check if new data looks ok:

In [None]:
new_games_data_path = BASE_PATH + 'upgraded/games_data'
new_NUMER_OF_DATA_DIRS = 12

In [None]:
new_paths = [f'{new_games_data_path}{i+1}.p' for i in range(new_NUMER_OF_DATA_DIRS)]

In [None]:
length = []
upgraded_data = []
for path in new_paths:
    with open(path, 'rb') as file:
        upgraded_raw_data = pickle.load(file)
    length.append(len(upgraded_raw_data))
    upgraded_data.extend(upgraded_raw_data)

In [None]:
length

[30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 19015]

In [None]:
len(upgraded_data)

349015

In [None]:
upgraded_data[0]

('3r4/pp4pp/8/2nBP3/P1P5/2k1K3/6PP/5R2 w - -',
 'a8: , b8: , c8: , d8: black rook, e8: , f8: , g8: , h8: , a7: black pawn, b7: black pawn, c7: , d7: , e7: , f7: , g7: black pawn, h7: black pawn, a6: , b6: , c6: , d6: , e6: , f6: , g6: , h6: , a5: , b5: , c5: black knight, d5: white bishop, e5: white pawn, f5: , g5: , h5: , a4: white pawn, b4: , c4: white pawn, d4: , e4: , f4: , g4: , h4: , a3: , b3: , c3: black king, d3: , e3: white king, f3: , g3: , h3: , a2: , b2: , c2: , d2: , e2: , f2: , g2: white pawn, h2: white pawn, a1: , b1: , c1: , d1: , e1: , f1: white rook, g1: , h1: ',
 'Kc3',
 'king c3',
 'white',
 ['Rb1',
  'Kf3',
  'Ke2',
  'Bxb7',
  'Rh1',
  'Rg1',
  'Bc6',
  'Rc1+',
  'g4',
  'Bf7',
  'Rf4',
  'Rf6',
  'Bg8',
  'g3',
  'Rf7',
  'Bf3',
  'Rf3',
  'Be6',
  'Be4',
  'a5',
  'h3',
  'Rf8',
  'Ra1',
  'Rf5',
  'Kf2',
  'Re1',
  'h4',
  'Rd1',
  'Rf2',
  'Kf4',
  'e6'],
 [],
 ['b2', 'c2', 'd2', 'b3', 'd3', 'b4', 'c4', 'd4'],
 'another beautiful variation: if 37. ... re8, att

In [None]:
upgraded_data[9999]

('r1bqk1nr/pp1p1ppp/2n1p3/2b5/3NP3/2N1B3/PPP2PPP/R2QKB1R b KQkq -',
 'a8: black rook, b8: , c8: black bishop, d8: black queen, e8: black king, f8: , g8: black knight, h8: black rook, a7: black pawn, b7: black pawn, c7: , d7: black pawn, e7: , f7: black pawn, g7: black pawn, h7: black pawn, a6: , b6: , c6: black knight, d6: , e6: black pawn, f6: , g6: , h6: , a5: , b5: , c5: black bishop, d5: , e5: , f5: , g5: , h5: , a4: , b4: , c4: , d4: white knight, e4: white pawn, f4: , g4: , h4: , a3: , b3: , c3: white knight, d3: , e3: white bishop, f3: , g3: , h3: , a2: white pawn, b2: white pawn, c2: white pawn, d2: , e2: , f2: white pawn, g2: white pawn, h2: white pawn, a1: white rook, b1: , c1: , d1: white queen, e1: white king, f1: white bishop, g1: , h1: white rook',
 'Be3',
 'bishop e3',
 'black',
 ['Nb8',
  'Nh6',
  'h6',
  'Nf6',
  'Qf6',
  'Ne5',
  'Ba3',
  'Nce7',
  'h5',
  'Qc7',
  'Be7',
  'Qg5',
  'Nb4',
  'Qb6',
  'Qh4',
  'b6',
  'Na5',
  'd5',
  'Ke7',
  'a5',
  'Rb8',
  'Nxd4',


In [None]:
upgraded_data[-1]

('r1b1r1k1/1pqn1pbp/p2p1np1/2pP4/2N1P3/2N2P2/PP2B1PP/R1BQ1RK1 w - -',
 'a8: black rook, b8: , c8: black bishop, d8: , e8: black rook, f8: , g8: black king, h8: , a7: , b7: black pawn, c7: black queen, d7: black knight, e7: , f7: black pawn, g7: black bishop, h7: black pawn, a6: black pawn, b6: , c6: , d6: black pawn, e6: , f6: black knight, g6: black pawn, h6: , a5: , b5: , c5: black pawn, d5: white pawn, e5: , f5: , g5: , h5: , a4: , b4: , c4: white knight, d4: , e4: white pawn, f4: , g4: , h4: , a3: , b3: , c3: white knight, d3: , e3: , f3: white pawn, g3: , h3: , a2: white pawn, b2: white pawn, c2: , d2: , e2: white bishop, f2: , g2: white pawn, h2: white pawn, a1: white rook, b1: , c1: white bishop, d1: white queen, e1: , f1: white rook, g1: white king, h1: ',
 'Qc7',
 'queen c7',
 'white',
 ['Bd2',
  'Nb1',
  'Rb1',
  'Qd3',
  'Qe1',
  'Ne3',
  'Ne5',
  'g4',
  'Qc2',
  'Qa4',
  'g3',
  'Qd2',
  'Kh1',
  'b3',
  'a4',
  'Nb6',
  'Na5',
  'Be3',
  'f4',
  'Na3',
  'h3',
  'Na4',
  

In [None]:
upgraded_data[66666]

('r1bqnrk1/pppnp1bp/3p1ppB/8/2PPP3/2NB1N2/PP1Q1PPP/R3K2R b KQ -',
 'a8: black rook, b8: , c8: black bishop, d8: black queen, e8: black knight, f8: black rook, g8: black king, h8: , a7: black pawn, b7: black pawn, c7: black pawn, d7: black knight, e7: black pawn, f7: , g7: black bishop, h7: black pawn, a6: , b6: , c6: , d6: black pawn, e6: , f6: black pawn, g6: black pawn, h6: white bishop, a5: , b5: , c5: , d5: , e5: , f5: , g5: , h5: , a4: , b4: , c4: white pawn, d4: white pawn, e4: white pawn, f4: , g4: , h4: , a3: , b3: , c3: white knight, d3: white bishop, e3: , f3: white knight, g3: , h3: , a2: white pawn, b2: white pawn, c2: , d2: white queen, e2: , f2: white pawn, g2: white pawn, h2: white pawn, a1: white rook, b1: , c1: , d1: , e1: white king, f1: , g1: , h1: white rook',
 'Bh6, Nd7, Bd3',
 'bishop d3',
 'black',
 ['Bh8',
  'c6',
  'Nb8',
  'Kf7',
  'Ne5',
  'Rf7',
  'Nb6',
  'b6',
  'd5',
  'Kh8',
  'a5',
  'Rb8',
  'c5',
  'Bxh6',
  'Nc5',
  'e5',
  'b5',
  'g5',
  'f5',
  'a