In [None]:
# Tracking pieces
# Knight N: square_to dist == 5^1/2
# Bishop: white/black; 
# King: easy
# Queen: easy (if 1 otherwise idk)

# Pawns: ??
# Rook R: ??

In [None]:
import pandas as pd
import plotly.express as px

In [None]:
class Piece:
    def __init__(self, is_white, piece):
        self.is_white = is_white
        self.piece = piece
        self.value = {
            'R': 5, 'Q': 9, 'N': 3, 'B': 3, 'P': 1, 'K': 0
        }[piece]

    def __repr__(self):
        if self.is_white: 
            return f'{self.piece.lower()}'
        else:
            return f'{self.piece.upper()}'

class Move:
    def __init__(self, string, is_white):
        is_white, piece, to_square, is_capture, is_promotion = self.parse(string, is_white)
        self.is_white = is_white
        self.piece = piece
        self.to_square = to_square
        self.is_capture = is_capture
        self.is_promotion = is_promotion

    @staticmethod
    def parse(string, is_white):
        move = str(string)
        move = (
            move
            .replace('?', '')
            .replace('!', '')
            .replace('#', '')
            .replace('+', '')
        )

        is_capture = ('x' in string)

        if move[0] in 'RNBQK':
            piece = move[0]
        elif move[0] in 'abcdefgh':
            piece = 'P'
        else:
            piece = '?'
        
        is_promotion = False
        for i in 'RNBQ':
            promotion_to = f'={i}'
            if promotion_to in move:
                piece = i
                is_promotion = True
                move = move.replace(promotion_to, '')

        to_square = move[-2:]

        if move == 'O-O':
            piece = 'R'
            to_square = 'f1' if is_white else 'f8'
        elif move == 'O-O-O':
            piece = 'R'
            to_square = 'd1' if is_white else 'd8'

        return (is_white, piece, to_square, is_capture, is_promotion)

    def __repr__(self):
        return str(self.__dict__)

class Board:
    def __init__(self):
        board = {
            rank: [None, None, None, None, None, None, None, None, None]
            for rank in 'abcdefgh'
        }
        for file, piece in zip('abcdefgh', 'RNBQKBNR'):
            board[file][1] = Piece(is_white=True, piece=piece)
            board[file][2] = Piece(is_white=True, piece='P')
            board[file][7] = Piece(is_white=False, piece='P')
            board[file][8] = Piece(is_white=False, piece=piece)
        
        self.board = board
        self.material_white = 8 + 9 + 2*(3+3+5)
        self.material_black = 8 + 9 + 2*(3+3+5)
        self.history_white = list()
        self.history_black = list()
        
    def move(self, move):
        to_file, to_rank = list(move.to_square)
        if move.is_capture:
            if move.is_white:
                self.material_black -= self.board[to_file][int(to_rank)].value
            else:
                self.material_white -= self.board[to_file][int(to_rank)].value
        
        if move.is_promotion:
            if move.is_white:
                self.material_white += {
                    'R': 5, 'Q': 9, 'N': 3, 'B': 3
                }[move.piece] - 1  # -1 because we replace pawn
            else:
                self.material_black += {
                    'R': 5, 'Q': 9, 'N': 3, 'B': 3
                }[move.piece] - 1
            
        
        self.board[to_file][int(to_rank)] = Piece(is_white=move.is_white, piece=move.piece)
        self.history_white.append(self.material_white)
        self.history_black.append(self.material_black)

In [None]:
game = """1. d4 { [%eval 0.13] [%clk 0:05:00] } 1... e5? { [%eval 1.74] [%clk 0:05:00] } 2. Bf4?? { [%eval -5.59] [%clk 0:04:59] } 2... d6?? { [%eval 0.83] [%clk 0:04:59] } 3. dxe5?? { [%eval 1.19] [%clk 0:04:57] } 3... dxe5?? { [%eval 1.59] [%clk 0:04:58] } 4. Qxd8+?? { [%eval 2.53] [%clk 0:04:55] } 4... Kxd8?? { [%eval 2.47] [%clk 0:04:56] } 5. Bxe5?? { [%eval 1.59] [%clk 0:04:54] } 5... Nc6?? { [%eval 1.65] [%clk 0:04:54] } 6. Nc3?? { [%eval -3.87] [%clk 0:04:49] } 6... Nxe5?? { [%eval -3.84] [%clk 0:04:51] } 7. e3?? { [%eval -4.48] [%clk 0:04:43] } 7... Bb4?? { [%eval -3.7] [%clk 0:04:48] } 8. Bb5?! { [%eval -4.69] [%clk 0:04:27] } 8... Bxc3+?! { [%eval -4.6] [%clk 0:04:46] } 9. bxc3?! { [%eval -4.57] [%clk 0:04:27] } 9... c6?! { [%eval -4.67] [%clk 0:04:44] } 10. O-O-O+?! { [%eval -4.8] [%clk 0:03:57] } 10... Kc7?! { [%eval -4.74] [%clk 0:04:42] } 11. Ba4?! { [%eval -5.16] [%clk 0:03:51] } 11... Be6?! { [%eval -4.83] [%clk 0:04:31] } 12. Ne2?! { [%eval -4.78] [%clk 0:03:42] } 12... Nf6?! { [%eval -4.81] [%clk 0:04:30] } 13. Nd4?! { [%eval -5.2] [%clk 0:03:41] } 13... Bxa2?! { [%eval -5.34] [%clk 0:04:24] } 14. f4?! { [%eval -5.85] [%clk 0:03:26] } 14... Ng6?! { [%eval -4.87] [%clk 0:04:16] } 15. h3?! { [%eval -5.2] [%clk 0:03:18] } 15... Ne4?! { [%eval -5.38] [%clk 0:04:13] } 16. Rhf1?! { [%eval -6.19] [%clk 0:03:13] } 16... Nxc3?! { [%eval -6.25] [%clk 0:04:12] } 17. Rd3?! { [%eval -7.15] [%clk 0:03:03] } 17... Nxa4?! { [%eval -7.24] [%clk 0:04:09] } 18. Ra3?! { [%eval -7.1] [%clk 0:02:59] } 18... c5?! { [%eval -5.75] [%clk 0:03:52] } 19. Nf3?! { [%eval -6.94] [%clk 0:02:47] } 19... Bc4?! { [%eval -6.97] [%clk 0:03:46] } 20. Rd1?! { [%eval -6.93] [%clk 0:02:39] } 20... Nb6?! { [%eval -6.98] [%clk 0:03:37] } 21. Rc3?! { [%eval -7.06] [%clk 0:02:33] } 21... Kc6?! { [%eval -6.13] [%clk 0:03:28] } 22. Kd2?! { [%eval -7.15] [%clk 0:02:26] } 22... Rhd8+?! { [%eval -7.88] [%clk 0:03:23] } 23. Ke1?! { [%eval -7.25] [%clk 0:02:24] } 23... Rxd1+?! { [%eval -7.24] [%clk 0:03:22] } 24. Kxd1?! { [%eval -7.61] [%clk 0:02:22] } 24... Rd8+?! { [%eval -7.01] [%clk 0:03:20] } 25. Ke1?! { [%eval -7.21] [%clk 0:02:19] } 25... Bd5?! { [%eval -6.89] [%clk 0:03:12] } 26. Ne5+?! { [%eval -9.29] [%clk 0:02:16] } 26... Nxe5?! { [%eval -9.25] [%clk 0:03:10] } 27. fxe5?! { [%eval -9.25] [%clk 0:02:14] } 27... Bxg2?! { [%eval -9.34] [%clk 0:03:08] } 28. Kf2?! { [%eval -9.89] [%clk 0:02:08] } 28... Bxh3?! { [%eval -10.16] [%clk 0:03:06] } 29. Kg3?! { [%eval -10.56] [%clk 0:02:03] } 29... Bf5?! { [%eval -10.07] [%clk 0:03:05] } 30. Kf4?! { [%eval -42.67] [%clk 0:02:01] } 30... g6?! { [%eval -10.16] [%clk 0:03:03] } 31. e4?! { [%eval -11.39] [%clk 0:01:59] } 31... Be6?! { [%eval -12.03] [%clk 0:02:56] } 32. Ra3?! { [%eval -11.92] [%clk 0:01:50] } 32... Rd4?! { [%eval -11.25] [%clk 0:02:54] } 33. Rxa7?! { [%eval -10.79] [%clk 0:01:47] } 33... Nd5+?! { [%eval -11.24] [%clk 0:02:53] } 34. Kg5?! { [%eval -11.21] [%clk 0:01:42] } 34... Rxe4?! { [%eval -11.29] [%clk 0:02:46] } 35. Ra8?! { [%eval -14.49] [%clk 0:01:39] } 35... Rxe5+?! { [%eval -13.49] [%clk 0:02:43] } 36. Kh6?! { [%eval -14.22] [%clk 0:01:36] } 36... Nf6?! { [%eval -12.76] [%clk 0:02:39] } 37. c4?! { [%eval -16.28] [%clk 0:01:30] } 37... Rh5+?! { [%eval -14.26] [%clk 0:02:37] } 38. Kg7?! { [%eval -16.55] [%clk 0:01:28] } 38... Nd7?! { [%eval -16.78] [%clk 0:02:16] } 39. Rc8+?! { [%eval -18.28] [%clk 0:01:24] } 39... Kb6?! { [%eval -17.14] [%clk 0:02:09] } 40. Rh8?! { [%eval -15.55] [%clk 0:01:06] } 40... Ne5?! { [%eval -15.44] [%clk 0:02:06] } 41. Rxh7?! { [%eval #-11] [%clk 0:01:03] } 41... Rxh7+?! { [%eval #-16] [%clk 0:02:04] } 42. Kxh7?! { [%eval #-12] [%clk 0:01:03] } 42... Nxc4?! { [%eval #-13] [%clk 0:02:00] } 43. Kg7?! { [%eval #-9] [%clk 0:01:01] } 43... g5?! { [%eval #-8] [%clk 0:01:58] } 44. Kf6?! { [%eval #-8] [%clk 0:01:01] } 44... g4?! { [%eval #-7] [%clk 0:01:57] } 45. Ke7?! { [%eval #-7] [%clk 0:00:53] } 45... g3?! { [%eval #-6] [%clk 0:01:56] } 46. Kd8?! { [%eval #-6] [%clk 0:00:50] } 46... g2?! { [%eval #-5] [%clk 0:01:54] } 47. Ke8?! { [%eval #-5] [%clk 0:00:48] } 47... g1=Q?! { [%eval #-4] [%clk 0:01:53] } 48. Kd8?! { [%eval #-4] [%clk 0:00:48] } 48... Qg5+?! { [%eval #-3] [%clk 0:01:51] } 49. Ke8?! { [%eval #-3] [%clk 0:00:48] } 49... Qg8+?! { [%eval #-3] [%clk 0:01:48] } 50. Ke7?! { [%eval #-3] [%clk 0:00:46] } 50... Qg6?! { [%eval #-3] [%clk 0:01:37] } 51. Kf8?! { [%eval #-3] [%clk 0:00:44] } 51... f5?! { [%eval #-2] [%clk 0:01:32] } 52. Ke7?! { [%eval #-2] [%clk 0:00:42] } 52... Qf7+?! { [%eval #-1] [%clk 0:01:29] } 53. Kd8?! { [%eval #-1] [%clk 0:00:40] } 53... Qd7#?! { [%clk 0:01:28] } 0-1\n"""

In [None]:
game[:50]

In [None]:
r = [i.split()[0] for i in game.split('. ')[1:]]
r = [Move(string, i%2) for i, string in enumerate(r, start=1)]

In [None]:
df = pd.DataFrame([i.__dict__ for i in r])

In [None]:
df[df['is_promotion']]

In [None]:
board = Board()
for move in r:
    board.move(move)

In [None]:
px.line(
    y=[board.history_white, board.history_black],
    template='plotly_white'
)

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px

from os.path import getsize

In [None]:
def get_next_analysed_game():
    # Until we find a game with evals
    while True:
        # Record game info
        buffer = []
        while True:
            line = f.readline()

            if line == "\n":
                continue

            buffer.append(line)

            # Moves always start with 1.
            if line.startswith("1."):
                break
                
        # We need games with move evaluations
        if ("%eval" in line):
            return buffer

In [None]:
def params_to_dict(str_list):

    return {
        a: b.strip('"') 
        for a, b in [
            i.strip("\n").strip("[]").split(" ", 1) 
            for i in str_list
        ]
    }

In [None]:
def moves_to_df(moves):
    s = moves.replace("[", "").replace("]", "")
    s = s.split(" ")
    s = s[:-1]
    
    # Small fix for when last move is mate
    if len(s) % 8 != 0:
        s.insert(-3, "%eval")
        s.insert(-3, "#0")

    df = pd.DataFrame.from_dict({
        "Move": s[1::8],
        "Eval": s[4::8],
        "Clock": s[6::8]
    }, orient="index").transpose()
    
    df["MoveNumber"] = df.index + 1
    
    # Only first 200 moves have analysis
    df = df.head(200)

    return df

In [None]:
def sigmoid(x, a1, a2, a3, a4):
    s = 1 + np.exp( -a3*(x-a2) )
    y = a1 / s + a4
    return y

def inverse(x, a, b, c):
    return a/(x-b) + c

popt1 = [1.741759, 5.016247, 0.003036, -0.866097]
popt2 = [0.54774, -2.797004, 0.790583]
popt3 = [0.548174, 2.829586, -0.785972]

def eval_to_winodds(x):
    try:
        return sigmoid(float(x)*100, *popt1)
    except:
        try:
            if float(x[1:]) > 0:
                return inverse(float(x[1:]), *popt2)
            elif float(x[1:]) < 0:
                return inverse(float(x[1:]), *popt3)
        except:
            return 0
    return 0

In [None]:
PGN_FILE = "pgn/lichess_db_standard_rated_2024-01.pgn"
print(f"PGN file size (bytes): {getsize(PGN_FILE):,}")

f = open(PGN_FILE, mode="r")

# OFFSET = 1_000_000_000
# f.seek(OFFSET)
# while True:
#     line = f.readline()
#     if line.startswith("1."):
#         break

In [None]:
def get_next_game_analysis():
    a = get_next_analysed_game()
    params = params_to_dict(a[:-1])
    df = moves_to_df(a[-1])
    df['Link'] = params['Site']
    board = Board()
    for i, move in enumerate(df['Move'].values, start=1):
        board.move(Move(move, i%2))
    df['MaterialWhite'] = board.history_white
    df['MaterialBlack'] = board.history_black
    df['MaterialDiff'] = df['MaterialWhite'] - df['MaterialBlack']
    df['Eval2'] = df['Eval'].map(eval_to_winodds)
    return df

In [None]:
df = pd.DataFrame()
for i in range(1, 5000+1):
    if i % 100 == 0:
        print(i, end='\r')
    
    try:
        new_data = get_next_game_analysis()
        df = pd.concat([df, new_data])
    except:
        pass

In [None]:
fig = px.scatter(
    df.sample(5000), y='Eval2', x='MaterialDiff',
    template='plotly_white'
)
fig.update_layout(height=800, width=800)
fig.show()

In [None]:
df['Eval2Abs'] = df['Eval2'].abs()
strongest = df.merge(df[
    (
        (df['Eval2'] >= 0.80) & 
        (df['MaterialDiff'] <= 0) & 
        (df['MaterialWhite'] >= 10) & (df['MaterialBlack'] >= 10)
    ) | (
        (df['Eval2'] <= -0.80) & 
        (df['MaterialDiff'] >= 0) & 
        (df['MaterialWhite'] >= 10) & (df['MaterialBlack'] >= 10)
    )
    ].groupby('Link', as_index=False).agg({'MoveNumber': 'first'}),
    on=['Link', 'MoveNumber']
)

In [None]:
strongest['Link2'] = strongest['Link'] + '#' + strongest['MoveNumber'].astype(str)

In [None]:
strongest.sample(10)

In [None]:
# Interesting:
# https://lichess.org/fiTBWiil#52
# https://lichess.org/62HD2mRx#62
# https://lichess.org/xyElBcIj#46
# https://lichess.org/pDy8IoT7#36
# https://lichess.org/xu3FEZi6#50

**Elo stability check**

In [None]:
def get_next_game():
    # Until we find a game with evals
    while True:
        # Record game info
        buffer = []
        while True:
            line = f.readline()

            if line == "\n":
                continue

            buffer.append(line)

            # Moves always start with 1.
            if line.startswith("1."):
                break
                
        # We need games with move evaluations
        return buffer

In [None]:
players = dict()
for i in range(1, 100_000+1):
    if i % 100 == 0:
        print(i, end='\r')
    a = get_next_game()       
    p = params_to_dict(a)
    
    white = p['White']
    black = p['Black']
    white_elo = p['WhiteElo']
    black_elo = p['BlackElo']

    if not (white in players):
        players[white] = list()
    if not (black in players):
        players[black] = list()

    players[white].append(int(white_elo))
    players[black].append(int(black_elo))

In [None]:
freq_players = {
    i: j 
    for i, j in players.items() 
    if len(j) >= 10
}

In [None]:
fig = px.line(
    pd.DataFrame(list(freq_players.values())).loc[:, 0:10].sample(100).T
)
fig.update_layout(height=2000)
fig.show()

In [None]:
get_next_game()