In [None]:
import numpy as np
import plotly.express as px

from compression.zstd import ZstdFile
from chessf.clean import *

test_file_path = 'pgn/lichess_db_standard_rated_2024-04.pgn.zst'
file = ZstdFile(test_file_path)

In [None]:
for _ in range(100):
    line = file.readline().decode().strip()
    print(line)

In [None]:
x = []
y = []

j = 0
while j < 100_000:
    game = get_next_game_with_eval(file)
    meta, moves = parse_game(game)

    if len(moves['eval_values']) < 40*2:
        continue

    if not ( meta['TimeControl'].startswith('600+') or meta['TimeControl'].startswith('900+') ):
        continue

    if meta['WhiteElo'] == 1500 or meta['BlackElo'] == 1500:
        continue
        
    cp_loss = np.abs(np.diff(moves['eval_values']))
    mean_cp_loss = np.mean(cp_loss)
    median_cp_loss = np.median(cp_loss)
    q90_cp_loss = np.quantile(cp_loss, 0.90)
    q10_cp_loss = np.quantile(cp_loss, 0.10)
    loss_std = np.std(cp_loss)
    adv_at = np.abs(moves['eval_values'][50])
    early_loss = np.sum(cp_loss[:50])

    mean_loss_diff = np.mean(cp_loss[0::2]) - np.mean(cp_loss[1::2])
    median_loss_diff = np.median(cp_loss[0::2]) - np.median(cp_loss[1::2])
    q90_loss_diff = np.quantile(cp_loss[0::2], 0.90) - np.quantile(cp_loss[1::2], 0.90)
    q10_loss_diff = np.quantile(cp_loss[0::2], 0.10) - np.quantile(cp_loss[1::2], 0.10)
    early_loss_diff = np.sum(cp_loss[0:50:2]) - np.sum(cp_loss[1:50:2])
    
    x.append([
        1, mean_cp_loss, median_cp_loss, q90_cp_loss, 
        q10_cp_loss, loss_std, adv_at, early_loss,
        
        np.abs(mean_loss_diff), np.abs(median_loss_diff), np.abs(q90_loss_diff), 
        np.abs(q10_loss_diff), np.abs(early_loss_diff)
    ])
    y.append(meta['MeanElo'])

    j += 1
    if j % 5_000 == 0:
        print(j)   

In [None]:
x = np.array(x)
y = np.array(y)
# y = np.clip(y, -100, 100)

In [None]:
y

In [None]:
g = []
for i in range(1, 10):
    
    mask = np.digitize(x[:, -1], 
        np.quantile(x[:, -1], [0.00, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00])
    ) == i

    mean = y[mask].mean()
    g.append(mean)

In [None]:
px.line(g, template='plotly_white')

In [None]:
import statsmodels.api as sm

In [None]:
model = sm.OLS(y, x)

In [None]:
results = model.fit()

In [None]:
print(results.summary())

In [None]:
y_hat = results.predict()

In [None]:
mae = np.mean(np.abs((y - y_hat)))
print(mae)

In [None]:
px.line(results.pvalues, template='plotly_white')