In [3]:
import pandas as pd
import chess.pgn
import io

In [4]:
CSV_FILE_PATH = "/home/joao/workspace/lichess/datasets_csv/clean_classical_above_800_2023-01.csv"

df = pd.read_csv(CSV_FILE_PATH, index_col=0, header=0)

In [5]:
print(df.shape)
print(df.columns)

(519228, 4)
Index(['WhiteElo', 'BlackElo', 'Result', 'Moves'], dtype='object')


In [6]:
df = df[(df['WhiteElo'] <= 1000) & (df['BlackElo'] <= 1000)]
df.shape

(4364, 4)

In [7]:
tmp = df.iloc[0]['Moves']

In [8]:
pgn = chess.pgn.read_game(io.StringIO(tmp))
board = pgn.board()

for move in pgn.mainline_moves():
    board.push(move)
    print(board.fen(), move)

rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1 e2e4
rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 2 e7e5
rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQKB1R b KQkq - 1 2 g1f3
rnbqkbnr/pppp2pp/5p2/4p3/4P3/5N2/PPPP1PPP/RNBQKB1R w KQkq - 0 3 f7f6
rnbqkbnr/pppp2pp/5p2/4p3/4P2N/8/PPPP1PPP/RNBQKB1R b KQkq - 1 3 f3h4
rnbqkbnr/pppp3p/5p2/4p1p1/4P2N/8/PPPP1PPP/RNBQKB1R w KQkq - 0 4 g7g5
rnbqkbnr/pppp3p/5p2/4pNp1/4P3/8/PPPP1PPP/RNBQKB1R b KQkq - 1 4 h4f5
rnbqkbnr/ppp4p/3p1p2/4pNp1/4P3/8/PPPP1PPP/RNBQKB1R w KQkq - 0 5 d7d6
rnbqkbnr/ppp4p/3p1p2/1B2pNp1/4P3/8/PPPP1PPP/RNBQK2R b KQkq - 1 5 f1b5
rn1qkbnr/pppb3p/3p1p2/1B2pNp1/4P3/8/PPPP1PPP/RNBQK2R w KQkq - 2 6 c8d7
rn1qkbnr/pppb3p/3N1p2/1B2p1p1/4P3/8/PPPP1PPP/RNBQK2R b KQkq - 0 6 f5d6
rn1qk1nr/pppb3p/3b1p2/1B2p1p1/4P3/8/PPPP1PPP/RNBQK2R w KQkq - 0 7 f8d6
rn1qk1nr/pppB3p/3b1p2/4p1p1/4P3/8/PPPP1PPP/RNBQK2R b KQkq - 0 7 b5d7
rn2k1nr/pppq3p/3b1p2/4p1p1/4P3/8/PPPP1PPP/RNBQK2R w KQkq - 0 8 d8d7
rn2k1nr/pppq3p/3b1p2/4p1p1/4P1P1/8/PPPP

In [12]:
# define custom function to transform each row into one row per move available from the PGN
def process_row(row: pd.Series):
    moves = row['Moves']
    dict_result = {'Id': [], 'WhiteElo': [], 'BlackElo': [], 'Result': [], 'Board':[]}

    pgn = chess.pgn.read_game(io.StringIO(moves))
    board = pgn.board()

    for move in pgn.mainline_moves():
        dict_result['Id'].append(row.name)
        dict_result['WhiteElo'].append(row['WhiteElo'])
        dict_result['BlackElo'].append(row['BlackElo'])
        dict_result['Result'].append(row['Result'])
        dict_result['Board'].append(board.fen())
        board.push(move)
        # print(board.fen(), move)
    return pd.DataFrame(dict_result)
    
# apply custom function to each row in dataframe and concatenate results
result = pd.concat(df.apply(process_row, axis=1).tolist(), ignore_index=True).set_index('Id')


In [13]:
result

Unnamed: 0_level_0,WhiteElo,BlackElo,Result,Board
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
https://lichess.org/9xFY5fim,956,880,0-1,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...
https://lichess.org/9xFY5fim,956,880,0-1,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...
https://lichess.org/9xFY5fim,956,880,0-1,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...
https://lichess.org/9xFY5fim,956,880,0-1,rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQK...
https://lichess.org/9xFY5fim,956,880,0-1,rnbqkbnr/pppp2pp/5p2/4p3/4P3/5N2/PPPP1PPP/RNBQ...
...,...,...,...,...
https://lichess.org/42ssxwWO,991,897,1-0,2r1k2r/p2nb1pp/b1Q2p2/q7/3P4/5N2/PPPB1PPP/RN2K...
https://lichess.org/42ssxwWO,991,897,1-0,2r1k2r/p2nb1pp/b4p2/q7/3PQ3/5N2/PPPB1PPP/RN2K2...
https://lichess.org/42ssxwWO,991,897,1-0,2r1k2r/p2nb1pp/b7/q4p2/3PQ3/5N2/PPPB1PPP/RN2K2...
https://lichess.org/42ssxwWO,991,897,1-0,2r1k2r/p2nb1pp/b7/q4p2/3P4/4QN2/PPPB1PPP/RN2K2...
