In [1]:
import chess.pgn
import collections
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
import pycountry_convert
from geopy.geocoders import Nominatim
import folium
import cartopy.crs as ccrs
from tqdm import tqdm

In [2]:
pgn = open("caissabase.pgn") # DB containing ~4 million chess games

In [None]:
# VISUALIZATION 1----------------------------------------------------HEATMAP OF LETHAL ZONES
kill_moves = []    # store killing moves (containing 'x')
death_squares = [] # store square positions where captures occured
pbar = tqdm(total=4000) # set up progress bar

# parse 500 games
for i in range(0, 250):
    game = chess.pgn.read_game(pgn) # read in game
    board = game.board()
    
    # loop through all moves in this game
    for move in game.mainline_moves(): 
        if 'x' in board.san(move): # if this is a capturing move, append to kill_moves
            kill_moves.append(board.san(move))
        board.push(move) # next move
    pbar.update(1) # update progress bar
    
pbar.close() # close progress bar


#----------------------------------------------------------------------------------------
# Here I find the capture position for each capture move.
# The 2-character position always follows after the 'x', 
# so I can generalize and obtain it by concatenating
# the characters at [index(x)+1] and [index(x)+2]
for elem in kill_moves:
    xi = elem.index('x') # find index for 'x'
    death_squares.append(elem[xi + 1] + elem[xi + 2]) # get position and append to list

# get capture occurrences for each square and store them in a dictionary
dic = collections.Counter(death_squares)
# sort keys alphabetically and numerically (a1,a2,...,h7,h8)
dic = collections.OrderedDict(sorted(dic.items()))      


#-------------------------------------------------------------------------------------
chessboard = np.array(list(dic.values()) ).reshape(8,8) # transform to 2D numpy array
chessboard = chessboard.transpose() # transpose (swap rows with columns)
chessboard = np.flipud(chessboard)  # flip vertically to match chessboard layout     
 
    
#-------------------------------------------------------------------------------------    
# convert numpy array to dataframe for easier use with seaborn
df = pd.DataFrame(data=chessboard, 
                  index=["8", "7", "6", "5", "4", "3", "2", "1"], 
                  columns=["a", "b", "c", "d", "e", "f", "g", "h"])       
        
        
#-------------------------------------------------------------------------------------     
 # configure and plot heatmap
fig, ax = plt.subplots(figsize=(20,16))
sns.set(font_scale = 3)
ax.set_title('Lethal Zone Heatmap')
ax = sns.heatmap(df, linewidths=0)
plt.savefig('fig/heatmap.png')
plt.show()