In [1]:
import chess.pgn

# define the two PGN files to merge
pgn_files = ["C:/Users/sebas/Downloads/chess_com_games_2023-02-202.pgn", 
             "C:/Users/sebas/Downloads/chess_com_games_2023-02-20.pgn"]

# create a new PGN file to store the merged games
merged_pgn_file = open("merged.pgn", "w")

# loop through each PGN file and add the games to the merged file
for pgn_file in pgn_files:
    with open(pgn_file) as f:
        while True:
            game = chess.pgn.read_game(f)
            if game is None:
                break
            headers = game.headers
            # write the game to the merged PGN file
            merged_pgn_file.write(str(game) + "\n\n")

In [2]:
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from chess.pgn import read_game
from datetime import datetime
import csv

# Define the headers we want to extract
headers = ["Event", "Site", "Date", "Round", "White", "Black", "Result", "WhiteElo", "BlackElo", "Moves"]

# Define a function to extract the headers from a game
def extract_headers(game):
    header_data = {}
    for header in headers:
        header_data[header] = game.headers.get(header, "")
    header_data["Date"] = datetime.strptime(header_data["Date"], "%Y.%m.%d")
    return header_data

# Define a function to extract the moves from a game
def extract_moves(game):
    moves = []
    node = game
    while node.variations:
        next_node = node.variations[0]
        moves.append(str(node.board().san(next_node.move)))
        node = next_node
    return " ".join(moves)

# Open the merged PGN file
with open("merged.pgn") as f:
    games = []
    while True:
        game = read_game(f)
        if game is None:
            break
        headers = extract_headers(game)
        moves = extract_moves(game)
        headers["Moves"] = moves
        games.append(headers)

# Convert the list of games to a Pandas DataFrame
df = pd.DataFrame(games)

# Export the DataFrame to CSV
df.to_csv("merged.csv", index=False, sep="|", quoting=csv.QUOTE_NONNUMERIC)

# Convert the Pandas DataFrame to a PyArrow Table
table = pa.Table.from_pandas(df)

# Export the PyArrow Table to Parquet
pq.write_table(table, "merged.parquet")

In [3]:
#Final file
df = pd.read_parquet("merged.parquet")

In [4]:
df.head()

Unnamed: 0,Event,Site,Date,Round,White,Black,Result,WhiteElo,BlackElo,Moves
0,Live Chess,Chess.com,2022-11-03,-,suliborski30,CELTACOL6,0-1,658,880,e4 e6 Nf3 f6 d4 c6 c3 d6 Be2 b6 O-O Nd7 Re1 g6...
1,5|0 Blitz,Chess.com,2022-11-04,2,suliborski30,RyanRodeb,0-1,631,1074,e4 e5 Nf3 Nf6 Nc3 Bb4 d3 Qe7 Bd2 d6 a3 Ba5 g3 ...
2,5|0 Blitz,Chess.com,2022-11-04,3,Sonohr,suliborski30,0-1,1016,877,e4 c5 f4 Nc6 c3 e6 Bb5 d5 Nf3 a6 Bxc6+ bxc6 ex...
3,5|0 Blitz,Chess.com,2022-11-04,4,suliborski30,Elheji,1-0,1016,1001,e4 c6 Nf3 d5 e5 c5 d4 c4 b3 b5 Bb2 Bf5 Nc3 a6 ...
4,5|0 Blitz,Chess.com,2022-11-04,5,vetleFRAtau,suliborski30,0-1,893,1070,e4 c5 Nf3 Nc6 c3 e6 d4 cxd4 cxd4 d5 e5 f6 Bb5 ...
