In [1]:
import chess
import chess.pgn as pgn
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.ticker as ticker
import copy
from matplotlib.pyplot import figure
import matplotlib
import seaborn as sns

# Data ETL
**Caution: If the directory "./position-analysis" existed and had data, you can pass this part.**


This part is used to extract, transform and load data from the raw chess game data in order to create a new dataset

In [None]:
players = ['Caruana', 'Polgar', 'Fischer', 'Morphy', 'Botvinnik', 'Tal',
 'Kasparov', 'Anand', 'Carlsen', 'Nakamura', 'Alekhine', 'Capablanca']

In [None]:
chess_pieces_type = [i for i in range (1, 7)]
mapp = {1: [0] * 64, 2: [0] * 64, 3: [0] * 64, 4: [0] * 64, 5: [0] * 64, 6: [0] * 64}

for player in players:
  gameList = os.listdir('../raw_game/' + player)
  for game in gameList:
    gameLink = '../raw_game/' + player + '/' + game
    
    pgn = open(gameLink)

    first_game = chess.pgn.read_game(pgn)
    board = first_game.board()
    

    for turn, move in enumerate(first_game.mainline_moves()):
      board.push(move)
      for piece in chess_pieces_type:
        li = list(board.pieces(piece_type=piece, color=True))
        # print(li)
        for i in li:
          mapp[piece][i] += 1

white_data = pd.DataFrame(mapp)
white_data.rename(columns = {1:'P', 2:'N', 3:'B', 4:'R', 5:'Q', 6:'K'}, inplace=True)

In [None]:
chess_pieces_type = [i for i in range (1, 7)]
mapp = {1: [0] * 64, 2: [0] * 64, 3: [0] * 64, 4: [0] * 64, 5: [0] * 64, 6: [0] * 64}

for player in players:
  gameList = os.listdir('../raw_game/' + player)
  for game in gameList:
    gameLink = '../raw_game/' + player + '/' + game
    
    pgn = open(gameLink)

    first_game = chess.pgn.read_game(pgn)
    board = first_game.board()
    

    for turn, move in enumerate(first_game.mainline_moves()):
      board.push(move)
      for piece in chess_pieces_type:
        li = list(board.pieces(piece_type=piece, color=False))
        # print(li)
        for i in li:
          mapp[piece][i] += 1

black_data = pd.DataFrame(mapp)
black_data.rename(columns = {1:'p', 2:'n', 3:'b', 4:'r', 5:'q', 6:'k'}, inplace=True)

In [None]:
piece_data = white_data.join(black_data)
piece_data.insert(loc=0, column = 'Square', value = range(0, len(piece_data)))
piece_data.to_csv('position-analysis.csv', index=False)

# Data EDA

In [2]:
piece_data = pd.read_csv("position-analysis.csv")

In [3]:
piece_data.describe()

Unnamed: 0,Square,P,N,B,R,Q,K,p,n,b,r,q,k
count,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0
mean,31.5,161811.1,30510.25,34035.0625,43862.90625,19705.859375,27924.984375,161315.5,30758.578125,34050.046875,43821.6875,19606.28125,27924.984375
std,18.618987,266291.5,72898.152553,71857.005108,121351.318001,64468.863275,98718.521858,260497.3,72060.841456,80732.009111,128639.054064,71050.921635,102295.573135
min,0.0,0.0,192.0,328.0,2721.0,770.0,174.0,0.0,149.0,476.0,2989.0,664.0,74.0
25%,15.75,1876.5,2697.0,3828.75,6084.75,3523.25,903.5,1217.25,2040.25,2754.75,5450.75,3073.5,791.75
50%,31.5,35064.5,6986.0,8973.0,9747.0,5518.5,3630.5,24428.0,7500.0,7814.5,9010.0,5789.0,3427.0
75%,47.25,188297.8,20119.75,27065.0,14823.5,13606.25,11164.75,235682.2,21341.75,21877.25,13205.5,12477.25,11676.25
max,63.0,1013422.0,418422.0,441167.0,766010.0,507827.0,689945.0,1121826.0,435996.0,499455.0,831051.0,564510.0,695153.0


In [None]:
chess_pieces = ['P', 'R', 'N', 'B', 'Q', 'K', 
            'p', 'r', 'n', 'b', 'q', 'k']

columns = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
rows = [i for i in range(1, 9)]

sumOfChessPostion = piece_data.sum(axis=0)

# Calculate the probability of each postion with each piece
for piece in chess_pieces:
  piece_data[piece] = piece_data[piece] / sumOfChessPostion[piece]

# Transform sqare index to the actual position in board in order to make visual
piece_data['Row'] = piece_data['Square'].apply(
  lambda x: rows[int(x / 8)]
)

piece_data['Column'] = piece_data['Square'].apply(
  lambda x: columns[x % 8]
)


In [None]:
piece_data

In [None]:
norm = colors.LogNorm(vmin=1e-4, vmax=1e-1) 
cmap = copy.copy(matplotlib.cm.get_cmap('coolwarm'))
cmap.set_bad(cmap(0))
titleOfPiece = {'p': 'Pawn', 'n': 'Knight', 'b': 'Bishop', 'r': 'Rook', 'q': 'Queen', 'k': 'King'}

fig, ax = plt.subplots(4, 3)
fig.set_figheight(15)
fig.set_figwidth(15)

iter = 0
for key in chess_pieces:
  this_ax = ax[np.unravel_index(iter, (4, 3))]
  iter += 1

  color = 'White' if (key.isupper()) else 'Black'
  title = color + ' ' + titleOfPiece[key.lower()]

  iloc = piece_data.columns.get_loc(key)
  data = piece_data.iloc[::-1,  iloc].to_numpy().reshape((8, 8))

  im = this_ax.imshow(data, norm = norm, cmap = cmap)
  
  this_ax.xaxis.set_major_formatter(ticker.NullFormatter())
  this_ax.xaxis.set_major_locator(ticker.FixedLocator([-0.5, 0.5,1.5,2.5,3.5,4.5,5.5, 6.5]))
  this_ax.xaxis.set_minor_locator(ticker.FixedLocator([0, 1, 2, 3, 4, 5, 6, 7]))
  this_ax.xaxis.set_minor_formatter(ticker.FixedFormatter(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']))

  this_ax.yaxis.set_major_formatter(ticker.NullFormatter())
  this_ax.yaxis.set_major_locator(ticker.FixedLocator([-0.5, 0.5,1.5,2.5,3.5,4.5,5.5, 6.5]))
  this_ax.yaxis.set_minor_locator(ticker.FixedLocator([0, 1, 2, 3, 4, 5, 6, 7]))
  this_ax.yaxis.set_minor_formatter(ticker.FixedFormatter(['1', '2', '3', '4', '5', '6', '7', '8']))

  this_ax.set_title(title)
  this_ax.grid(color = 'black')


cbar_ax = fig.add_axes([0.9, 0.15, 0.05, 0.7])
cbar = fig.colorbar(im, extend = 'both', cax=cbar_ax)

fig.suptitle('Probability of Chess Piece Positions', fontsize = 24)

plt.subplots_adjust(left=0.1,
                    bottom=0.1, 
                    right=0.9, 
                    top=0.9, 
                    hspace=0.3)

fig.patch.set_facecolor('white')
plt.savefig('../analysis/probanility-chess-position.png', transparent=False)
plt.show()
