# Sample for katawrap with Python

In [1]:
import pandas as pd
import numpy as np

Edit the next line if you want to try your own data. See README.md on how to prepare *.josnl.

In [2]:
input_file = 'sample_result.jsonl'

Load results of katawrap.

In [3]:
df = pd.read_json(input_file, lines=True)

Show some basic fields.

In [4]:
keys = ['sgfFile', 'turnNumber', 'winrate', 'scoreLead']

df[keys]

Unnamed: 0,sgfFile,turnNumber,winrate,scoreLead
0,./sgf/sample001.sgf,0,0.324822,-0.235385
1,./sgf/sample001.sgf,1,0.341502,-0.298111
2,./sgf/sample001.sgf,2,0.563822,0.422854
3,./sgf/sample001.sgf,3,0.183020,-1.007094
4,./sgf/sample001.sgf,4,0.562894,0.496323
...,...,...,...,...
74,./sgf/sample009.sgf,22,0.890335,4.595317
75,./sgf/sample009.sgf,23,0.888172,4.433803
76,./sgf/sample009.sgf,24,0.995477,14.349731
77,./sgf/sample009.sgf,25,0.013710,-19.501979


...and more fields

In [5]:
keys = [k for k in [
    'sgfFile', 'PB', 'PW', 'RE', 'rules', 'komi',
    'turnNumber', 'currentPlayer',
    'winrate', 'scoreLead', 'unsettledness',
    'nextMove', 'nextMoveColor', 'nextMoveSign',
    'nextMoveRank', 'nextWinrateGain', 'nextScoreGain',
] if k in df.keys()]

df[keys]

Unnamed: 0,sgfFile,PB,PW,RE,rules,komi,turnNumber,currentPlayer,winrate,scoreLead,unsettledness,nextMove,nextMoveColor,nextMoveSign,nextMoveRank,nextWinrateGain,nextScoreGain
0,./sgf/sample001.sgf,kuy,sor,B+R,japanese,6.5,0,B,0.324822,-0.235385,0.000000,E5,B,1.0,0.0,0.016679,-0.062726
1,./sgf/sample001.sgf,kuy,sor,B+R,japanese,6.5,1,W,0.341502,-0.298111,0.064090,D6,W,-1.0,17.0,-0.222321,-0.720965
2,./sgf/sample001.sgf,kuy,sor,B+R,japanese,6.5,2,B,0.563822,0.422854,0.509938,F3,B,1.0,11.0,-0.380802,-1.429947
3,./sgf/sample001.sgf,kuy,sor,B+R,japanese,6.5,3,W,0.183020,-1.007094,0.328673,C3,W,-1.0,16.0,-0.379874,-1.503417
4,./sgf/sample001.sgf,kuy,sor,B+R,japanese,6.5,4,B,0.562894,0.496323,0.676532,D5,B,1.0,1.0,-0.177610,-0.734718
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,./sgf/sample009.sgf,kuy,sor,W+R,japanese,6.5,22,B,0.890335,4.595317,7.794062,F3,B,1.0,0.0,-0.002164,-0.161513
75,./sgf/sample009.sgf,kuy,sor,W+R,japanese,6.5,23,W,0.888172,4.433803,8.346299,G3,W,-1.0,,-0.107306,-9.915927
76,./sgf/sample009.sgf,kuy,sor,W+R,japanese,6.5,24,B,0.995477,14.349731,6.343064,C4,B,1.0,11.0,-0.981767,-33.851709
77,./sgf/sample009.sgf,kuy,sor,W+R,japanese,6.5,25,W,0.013710,-19.501979,9.216337,F2,W,-1.0,1.0,-0.001270,-10.619469


List all field names.

In [6]:
df.keys()

Index(['id', 'isDuringSearch', 'moveInfos', 'ownership', 'rootInfo',
       'turnNumber', 'includeUnsettledness', 'sgfFile', 'visits', 'last',
       'moves', 'boardXSize', 'boardYSize', 'komi', 'maxVisits',
       'analyzeLastTurn', 'analyzeTurns', 'includeOwnership', 'rules',
       'sgfProp', 'sgf', 'SZ', 'KM', 'PB', 'PW', 'RE', 'currentPlayer',
       'rawStScoreError', 'rawStWrError', 'rawVarTimeLeft', 'scoreLead',
       'scoreSelfplay', 'scoreStdev', 'symHash', 'thisHash', 'utility',
       'weight', 'winrate', 'nextMove', 'nextMoveColor', 'nextMoveSign',
       'nextMoveRank', 'query', 'board', 'unsettledness', 'nextRootInfo',
       'nextWinrateGain', 'nextScoreGain'],
      dtype='object')

Find the worst 5 blunders excluding those that have nothing to do with winning or losing.

In [7]:
keys = [
    'sgfFile', 'PB', 'PW', 'turnNumber', 'nextMoveColor',
    'scoreLead', 'nextScoreGain', 'nextWinrateGain',
]

df.query('nextWinrateGain < -0.1') \
    .sort_values('nextScoreGain') \
    .head(5)[keys]

Unnamed: 0,sgfFile,PB,PW,turnNumber,nextMoveColor,scoreLead,nextScoreGain,nextWinrateGain
34,./sgf/sample001.sgf,kuy,sor,34,B,8.981466,-34.360468,-0.788142
76,./sgf/sample009.sgf,kuy,sor,24,B,14.349731,-33.851709,-0.981767
31,./sgf/sample001.sgf,kuy,sor,31,W,-22.662463,-30.830034,-0.803197
67,./sgf/sample009.sgf,kuy,sor,15,W,-5.514918,-11.595791,-0.961586
75,./sgf/sample009.sgf,kuy,sor,23,W,4.433803,-9.915927,-0.107306


Sort games by peak unsettledness except for extreme winrates.

In [8]:
keys = ['turnNumber', 'unsettledness']

df.query('0.2 < winrate and winrate < 0.8') \
    .groupby('sgfFile').max('unsettledness') \
    .sort_values('unsettledness', ascending=False)[keys]

Unnamed: 0_level_0,turnNumber,unsettledness
sgfFile,Unnamed: 1_level_1,Unnamed: 2_level_1
./sgf/sample001.sgf,33,7.953114
./sgf/sample009.sgf,19,6.334619


Calculate the match rates with KataGo's top 3 suggestions in first 50 moves.

In [9]:
keys = ['sgfFile', 'PB', 'PW', 'RE', 'nextMoveColor']

def match_rate(ary, k):
    return np.count_nonzero(ary < k) / ary.size

df.query('0 <= turnNumber and turnNumber < 50') \
    .groupby(keys)['nextMoveRank'] \
    .agg(lambda a: match_rate(a, 3))

sgfFile              PB   PW   RE   nextMoveColor
./sgf/sample001.sgf  kuy  sor  B+R  B                0.840000
                                    W                0.720000
./sgf/sample009.sgf  kuy  sor  W+R  B                0.538462
                                    W                0.384615
Name: nextMoveRank, dtype: float64