In [2]:
import polars as pl
import re
from lib.chess import fen_labels


def replace_fen(x: str):
    words = x.split()

    first_word = words[0]
    processed_first_word = re.sub(r"\d", lambda m: "z" * int(m.group(0)), first_word)
    arr = (
        list(processed_first_word)
        + [" "]
        + ["black" if words[1] == "b" else "white"]
        + [" "]
        + list(words[2])
        + ["-"] * (4 - len(words[2]))
        + [" "]
        + [words[3]]
    )
    q = list(fen_labels.transform(arr))
    return q


def replace_eval(x: str) -> int:
    if "#" in x:
        if "+" in x:
            return 10
        else:
            return 20

    val = int(x)

    if val > 0:
        if val >= 451:
            return 10
        else:
            return (val - 1) // 50 + 1
    else:
        neg_val = -val
        if neg_val == 0:
            return 11
        elif neg_val >= 451:
            return 20
        else:
            return 11 + (neg_val - 1) // 50


df = pl.concat(
    [
        pl.read_csv("resources/eval/chessData.csv"),
        pl.read_csv("resources/eval/random_evals.csv"),
        pl.read_csv("resources/eval/tactic_evals.csv", columns=["FEN", "Evaluation"]),
    ]
)

df = df.with_columns(
    [
        pl.col("FEN").map_elements(
            replace_fen, return_dtype=pl.List(pl.Int64), skip_nulls=False
        ),
        pl.col("Evaluation").map_elements(replace_eval, return_dtype=pl.Int64),
    ]
)

df


FEN,Evaluation
list[i64],i64
"[31, 28, … 1]",11
"[31, 28, … 1]",2
"[31, 28, … 1]",11
"[31, 28, … 1]",2
"[31, 28, … 1]",11
…,…
"[33, 33, … 1]",20
"[31, 33, … 1]",5
"[7, 33, … 1]",10
"[33, 33, … 1]",20


In [None]:
print(df["Evaluation"].unique())
df["FEN"].list.len().unique()
df = df.with_columns(lengths=pl.col("FEN").list.to_array(80))

shape: (20,)
Series: 'Evaluation' [i64]
[
	1
	2
	3
	4
	5
	…
	16
	17
	18
	19
	20
]


FEN
u32
80


In [6]:
df.write_parquet("resources/eval/eval.parquet")
