[REFERENCE](https://rstudio-pubs-static.s3.amazonaws.com/335466_20f0c84f1fe040c3ba64bcd32499cfb3.html)

In [1]:
import os
import re
import pandas as pd
import numpy as np

In [2]:
from itertools import product

In [3]:
FOLDER = './tidy_data/'

In [4]:
fnames = list(filter(lambda fname: fname.endswith('.pgn'), os.listdir(FOLDER)))

In [5]:
games = {}
pat = '(?:.*_)(\d+)(?:\.pgn)'

for fname in fnames:
    gid = re.search(pat, fname).groups()[0]
    
    with open(FOLDER + fname, 'r') as file:
        game = file.read()
        games[gid] = game.split(' ')


***

In [6]:
states = ['Initial', 'oCapture', 'oCheck', 'Both', 'Else']

In [7]:
def isOnlyCapture(ply):
    '''
    Informs whether the ply was a capture
    without check.
    '''
    return 'x' in ply and not '+' in ply

def isOnlyCheck(ply):
    '''
    Informs whether the ply was a check
    without capture.
    '''
    return '+' in ply and not 'x' in ply

def isBoth(ply):
    '''
    Informs whether the ply was a check
    WITH capture.
    '''
    return '+' in ply and 'x' in ply

def isElse(ply):
    '''
    Informs whether the ply was neither a check
    nor a capture.
    '''
    return not '+' in ply and not 'x' in ply

In [8]:
mapped_plies = {}

for gid, plies in games.items():
    mapped = {'Initial': [True] + [False] * (len(plies) - 1),
              'oCapture': list(map(isOnlyCapture, plies)),
              'oCheck': list(map(isOnlyCheck, plies)),
              'Both': list(map(isBoth, plies)),
              'Else': list(map(isElse, plies))}

    mapped_plies[gid] = mapped

In [9]:
emp_data = {gid: {key: sum(val)/len(val) for key, val in mapped.items()}\
            for gid, mapped in mapped_plies.items()}
count_vec = {gid: {key: sum(val[:-1]) for key, val in mapped.items()}
            for gid, mapped in mapped_plies.items()}

In [10]:
emp_data = pd.DataFrame.from_dict(emp_data, orient='index')
count_vec = pd.DataFrame.from_dict(count_vec, orient='index').sum()

In [11]:
count_vec

Initial      11
oCapture    175
oCheck       17
Both         12
Else        734
dtype: int64

In [12]:
transitions = pd.DataFrame(0, index=states, columns=states)

for gid, plies in mapped_plies.items():
    for src, tgt in product(plies.keys(), repeat=2):
        arr_src = np.array(plies[src], dtype=int)[:-1]
        arr_tgt = np.roll(np.array(plies[tgt], dtype=int), -1)[:-1]
        count = arr_src.dot(arr_tgt)

        transitions.loc[src, tgt] = transitions.loc[src, tgt] + count

transition_matrix = transitions.divide(count_vec, axis=0)

In [13]:
transition_matrix

Unnamed: 0,Initial,oCapture,oCheck,Both,Else
Initial,0.0,0.0,0.0,0.0,1.0
oCapture,0.0,0.457143,0.022857,0.034286,0.485714
oCheck,0.0,0.058824,0.0,0.0,0.941176
Both,0.0,0.666667,0.0,0.083333,0.25
Else,0.0,0.117166,0.017711,0.008174,0.856948


**Simulation**:

In [14]:
s0 = pd.Series([int(s == 'Initial') for s in states], index=states)

sn = s0.copy()
for i in range(1000):
    sn_1 = sn.copy()
    sn = sn.dot(transition_matrix)
    
    if np.allclose(sn_1, sn, rtol=1e-8, atol=1e-8):
        print(f'Step {i}: CONVERGED.')
        stationary_state = sn.copy()
        break

Step 18: CONVERGED.


In [15]:
stationary_state

Initial     0.000000
oCapture    0.187551
oCheck      0.018108
Both        0.013974
Else        0.780367
dtype: float64

Using `numpy`:

In [16]:
eigvals, eigvecs = np.linalg.eig(transition_matrix.T)
non_normed_stat = eigvecs[:, np.isclose(eigvals, 1)]
stat = non_normed_stat/non_normed_stat.sum()

In [17]:
pd.Series(np.ravel(stat), index=states)

Initial     0.000000
oCapture    0.187551
oCheck      0.018108
Both        0.013974
Else        0.780367
dtype: float64