# load

In [1]:
import pickle

In [2]:

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
BASE_PATH = '/content/drive/MyDrive/NLP/Data/'
games_data_path = BASE_PATH + 'FEN/games_data'
NUMER_OF_DATA_DIRS = 13

In [4]:
paths = [f'{games_data_path}{i+1}.p' for i in range(NUMER_OF_DATA_DIRS)]

In [6]:
data = []
for path in paths:
    with open(path, 'rb') as file:
        raw_data = pickle.load(file)
    data.extend(raw_data)

In [7]:
len(data)

350027

In [8]:
len(list(set(data)))

349015

In [9]:
data = list(set(data))

# board

In [None]:
col_names = 'abcdefgh'
row_names = '87654321'
pieces = {'r': 'black rook',
          'n': 'black knight',
          'b': 'black bishop',
          'q': 'black queen', 
          'k': 'black king',
          'p': 'black pawn',
          'R': 'white rook',
          'N': 'white knight',
          'B': 'white bishop',
          'Q': 'white queen', 
          'K': 'white king',
          'P': 'white pawn'}

In [None]:
def board_to_text(board):
  text = ""
  rows = board.split("/")
  for inx, row in enumerate(row_names):
    row_values = rows[inx]
    col_num = 0
    for value in row_values:
      if value in '12345678':
        for i in range(int(value)):
          text += col_names[col_num] + row + ": , "
          col_num += 1
      elif value == '-':
        print('STRANGE!')
        text += col_names[col_num] + row + ": " + pieces['p'] + ", "
        col_num += 1
      else:
        text +=  col_names[col_num] + row + ": " + pieces[value] + ", "
        col_num += 1
  return text[:-2]

# move

In [10]:
def map_move_piece(x):
    if x=="K":
        return "king"
    elif x=="Q":
        return "queen"
    elif x=="R":
        return "rook"
    elif x=="B":
        return "bishop"
    elif x=="N":
        return "knight"
    else:
        return "pawn "+x

In [11]:
def parse_one_move(move):
  parsed = ""

  # Nxg2+
  if move[-1]=="+":
    parsed = "check: "
    move = move[:-1]

  # Rd2#
  elif move[-1]=="#":
    parse = "checkmate: "
    move = move[:-1]

  # O-O
  if move == 'O-O':
    parse+= 'castling'
    return parse

  len_move = len(move)
  
  if "x" not in move: 
    # h6 
    if len_move == 2:
      parsed+= "pawn "+ move
    
    # Ng4
    elif len_move == 3:
      parsed+=  map_move_piece(move[0]) + " " + move[1:]
    
    # Nbc6
    elif len_move == 4:
      parsed+=  map_move_piece(move[0]) + " " + move[1] + " " + move[2:]
    
    # Ra5a6
    elif len_move == 5:
      parsed+=  map_move_piece(move[0]) + " " + move[1:3] + " " + move[3:]

  else: # "x" in move
    # Qxf6
    if len_move == 4:
      parsed+=  map_move_piece(move[0]) + " takes " + move[2:]

    # Ngxe7, Nfxh7
    elif len_move == 5: 
      parsed+=  map_move_piece(move[0]) + " " + move[1] + " takes " + move[3:]

    # Re6xe7
    elif len_move == 6:
      parsed+=  map_move_piece(move[0]) + " " + move[1:3] + " takes " + move[4:]

  if parsed == "":
    print("strangeMove: ", move)
    return "<strangeMove>"

  return parsed

# python-chess

https://python-chess.readthedocs.io/en/latest/

In [None]:
!pip install python-chess

In [13]:
import chess

In [14]:
board = chess.Board('rnb1kb1r/3ppppp/5n2/qNpP4/8/8/PP2PPPP/R1BQKBNR w KQkq - 0 0')

In [15]:
print(board)

r n b . k b . r
. . . p p p p p
. . . . . n . .
q N p P . . . .
. . . . . . . .
. . . . . . . .
P P . . P P P P
R . B Q K B N R


In [16]:
if board.is_game_over():
  print("GAME OVER") 
elif board.is_stalemate():
  print("STALE situation")

In [17]:
board.turn

True

castling

In [18]:
castling_rights = ""
if board.has_castling_rights(chess.WHITE):
  castling_rights += "white"
if board.has_castling_rights(chess.BLACK):
  castling_rights += "black" if castling_rights == "" else " and black"

In [19]:
castling_rights

'white and black'

legal moves

In [20]:
def get_legal_moves(board):
  legal_moves = set()
  for move in board.legal_moves:
    uci_move = board.uci(move)
    legal_moves.add(uci_move)
    
  return legal_moves

In [21]:
get_legal_moves(board)

{'b2b4', 'b5c3', 'c1d2', 'd1d2'}

In [22]:
legal_moves = set()
for move in board.legal_moves: 
  legal_moves.add(board.san(board.parse_uci(str(move))))

In [23]:
legal_moves

{'Bd2', 'Nc3', 'Qd2', 'b4'}

In [24]:
# Pseudo-legal moves might leave or put the king in check, but are otherwise valid.
pseudo_legal_moves = set()
for move in board.pseudo_legal_moves: 
  pseudo_legal_moves.add(board.san(chess.Move.from_uci(str(move))))

In [25]:
pseudo_legal_moves

{'Bd2',
 'Be3',
 'Bf4',
 'Bg5',
 'Bh6',
 'Kd2',
 'Na3',
 'Na7',
 'Nc3',
 'Nc7+',
 'Nd4',
 'Nd6+',
 'Nf3',
 'Nh3',
 'Qa4',
 'Qb3',
 'Qc2',
 'Qd2',
 'Qd3',
 'Qd4',
 'Rb1',
 'a3',
 'a4',
 'b3',
 'b4',
 'd6',
 'e3',
 'e4',
 'f3',
 'f4',
 'g3',
 'g4',
 'h3',
 'h4'}

attack

In [26]:
# ranks = rows referred to by numbers. Files = columns.
def square_index(file_rank_string): 
  file_to_value = {"a":0, "b":1, "c":2, "d":3, "e":4 , "f":5, "g":6, "h":7}
  file, rank = file_rank_string[0], file_rank_string[1]
  file = file_to_value[file]
  index = chess.square(file_index=file, rank_index=int(rank)-1)
  return index

In [27]:
def getAttackers(board, square):
    turn = board.turn

    attackers = board.attackers(turn, square_index(square))
    attacks = board.attacks(square_index(square))
    
    attackers_list = [chess.square_name(attacker) for attacker in attackers]
    attacks_list = [chess.square_name(attack) for attack in attacks]

    piece_map = board.piece_map()
    square_with_opponent_pieces = [chess.square_name(key) for key in piece_map.keys() if piece_map[key].color == turn]
    attacks_with_pieces = list(set(attacks_list).intersection(set(square_with_opponent_pieces)))

    return attackers_list, attacks_with_pieces

In [28]:
board = chess.Board('rnb1kb1r/3ppppp/5n2/qNpP4/8/8/PP2PPPP/R1BQKBNR w KQkq - 0 0')

In [32]:
attackers_list, attacks_list = getAttackers(board, 'a5')

In [33]:
attackers_list

[]

In [34]:
attacks_list

['e1', 'b5', 'a2']

check

In [None]:
# board.checkers() 
# board.gives_check(legal_moves)

In [35]:
board.is_check()

True

In [36]:
board.is_checkmate()

False

# save new data

In [37]:
from tqdm import tqdm 

In [38]:
new_data = []
counter = 0

In [39]:
for trio in tqdm(data, position=0, leave=True):
  FEN = trio[0][:trio[0].find("|")]
  moves = trio[1]
  comment = trio[2].lower()

  # last move 
  last_move_UCI =  trio[0][trio[0].find("=")+1:]
  last_move_SAN = moves.split(',')[-1].replace(" ", "")
  last_move_desc = parse_one_move(last_move_SAN) if moves!='' else ''

  # board 
  if '3-4' in FEN:
    print("STRANGE!")  # one sample with bug..
    FEN = FEN.replace('3-4', '3p4')

  # board_desc = board_to_text(FEN[:FEN.find(' ')])

  board = chess.Board(FEN+' 0 0')

  # if board.turn:
  #   turn = "white"
  # else:
  #   turn = "black"

  # legal moves 
  legal_moves = set()
  for move in board.legal_moves: 
    legal_moves.add(board.san(board.parse_uci(str(move))))
  legal_moves = list(legal_moves)
  legal_moves = ", ".join(legal_moves)
  
  # attack
  attackers_list, attacks_list = getAttackers(board, last_move_UCI[-2:])
  attackers_list, attacks_list = ", ".join(attackers_list), ", ".join(attacks_list)

  #sample = (FEN, board_desc, moves, last_move_desc, turn, legal_moves, attackers_list, attacks_list, comment)
  sample = (FEN, moves, last_move_desc, legal_moves, attackers_list, attacks_list, comment)

  new_data.append(sample)

  counter += 1
  if counter%30000==0:
    saved = str(int(counter/30000))
    pickle.dump(new_data, open(BASE_PATH+"NEW_attack/games_data"+saved+".p","wb"))
    new_data = []

 24%|██▍       | 84716/349015 [02:49<08:32, 515.27it/s]

STRANGE!


100%|██████████| 349015/349015 [11:34<00:00, 502.43it/s]


In [40]:
len(new_data)

19015

In [41]:
# don't forget save leftovers ;)
saved = '12'
pickle.dump(new_data, open(BASE_PATH+"NEW_attack/games_data"+saved+".p","wb"))

check if new data looks ok:

In [42]:
new_games_data_path = BASE_PATH + 'NEW_attack/games_data'
new_NUMER_OF_DATA_DIRS = 12

In [43]:
new_paths = [f'{new_games_data_path}{i+1}.p' for i in range(new_NUMER_OF_DATA_DIRS)]

In [44]:
length = []
upgraded_data = []
for path in new_paths:
    with open(path, 'rb') as file:
        upgraded_raw_data = pickle.load(file)
    length.append(len(upgraded_raw_data))
    upgraded_data.extend(upgraded_raw_data)

In [45]:
length

[30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 30000,
 19015]

In [46]:
len(upgraded_data)

349015

In [55]:
upgraded_data[0]

('Q7/7p/8/7k/6q1/8/3K4/6Q1 b - -',
 'Qxg1',
 'queen takes g1',
 'Qh3, Kg6, Qb4+, Qd7+, Qf4+, Qd1+, Qe2+, Qg3, Qf5, Kh6, Qh4, Qe6, Qg2+, Qd4+, Qe4, Qa4, Qg6, Qg8, Kh4, Kg5, Qc4, h6, Qf3, Qg5+, Qg7, Qc8, Qxg1',
 'g4',
 'g4',
 'although black emerges a pawn ahead, the position is a safe draw after this.  but white might still have tried exploiting the awkward placing of the black queens by playing 91.qe8ch.  for example, after 91.qe8ch kg5?? 92.qxg1 qxg1 93.qg8ch skewers the black king and queen and wins.  so: 91.qe8ch kh4  92.qh6ch kg3  93.qe5ch ... at this point black might escape the checks by ...kf2 or ...kg2, but 93...kf3?? 94.qe2ch kg3  95.qxg4ch kxg4 96.qg7ch and another skewer.  again: there was little to lose by inviting black to go wrong.')

In [56]:
upgraded_data[1]

('rn1k3r/pp3ppp/1bpp2q1/4p3/Q1B1P1n1/B1P2NPb/P2P1P1P/RN2R1K1 w - -',
 'g3, Ng4',
 'knight g4',
 'Bc5, Qd1, Bf1, Ba6, Qb3, Qa6, Be2, d3, Kh1, Ng5, Qxc6, Rc1, Qa5, Qb4, Nd4, Bb5, Bd5, Bxf7, Nh4, Bb2, Bxd6, Qc2, Nxe5, Bc1, Re2, Bd3, Qb5, Rf1, Rd1, d4, Re3, Bb4, Qxa7, Be6, Bb3',
 '',
 'h2, f2',
 'double threat on f2!')

In [60]:
upgraded_data[4]

('rn1qkbnr/ppp2ppp/3p4/4P3/4P3/5Q2/PPP2PPP/RNB1KB1R b KQkq -',
 'Qxf3',
 'queen takes f3',
 'Nd7, f6, Ne7, Qg5, Qd7, a5, Be7, c6, Qe7, Nf6, Qh4, f5, Qf6, c5, g5, h5, dxe5, b6, Nh6, h6, Ke7, b5, Kd7, Nc6, d5, Na6, g6, Qc8, a6',
 '',
 'f7',
 'so i take with the queen, developing')

In [61]:
upgraded_data[5]

('r6r/ppp1n2p/4P1p1/3k4/4p1B1/6P1/PPPP3P/R1B1K2R b KQ -',
 'Nxe7, e6',
 'pawn e6',
 'Rac8, Rae8, Rag8, Rab8, a5, Kc5, Rad8, Kd4, Rhc8, c6, c5, g5, Rhf8, Rhe8, Ng8, Kc4, h5, Kc6, Ke5, b6, Raf8, h6, Nf5, Rhb8, b5, Kd6, Rhg8, Nc6, Rhd8, Nc8, e3, a6',
 'd5',
 '',
 "also allowed me to advance pawn further where he was protected by bishop, this pawn could be a real thorn in blacks side. although his passed pawn had to be watched. i was still feeling this game could be vey drawish as if i exchanged all the pieces from here on in i would still prob only end up a bishop better off which wouldn't be enough to win. i found this quite depressing as i had won back the queen with a nice fork.")