In [1]:
from collections import defaultdict

import pandas as pd
import igraph as ig
from chess.pgn import read_headers

from chessnet.utils import DATA_DIR, ARTIFACTS_DIR

In [2]:
filename = DATA_DIR / "sample.pgn"
pgn = read_headers(open(filename, "r"))
pgn

Headers(Event='Online Olym Top DivD 2021', Site='chess.com INT', Date='2021.09.08', Round='1.1', White='Ozen, Deniz', Black='Gallego Alcaraz, Andres Felipe', Result='1-0', BlackElo='2512', BlackFideId='4402669', BlackTeam='Colombia', BlackTitle='GM', Board='1', ECO='A45', EventDate='2021.09.08', EventType='team', Opening="Queen's pawn game", WhiteElo='2397', WhiteFideId='6349196', WhiteTeam='Turkey', WhiteTitle='IM')

In [18]:
filename = DATA_DIR / "onlineolymtopd21.pgn"
file = open(filename, "r")

data = defaultdict(list)
fields = ["White", "Black", "WhiteElo", "BlackElo", "Result"]
while pgn := read_headers(file):
    for field in fields:
        data[field].append(pgn[field])

In [20]:
type(pgn)

NoneType

In [4]:
df = pd.DataFrame(data)
print(df.shape)
df

(270, 5)


Unnamed: 0,White,Black,WhiteElo,BlackElo,Result
0,"Ozen, Deniz","Gallego Alcaraz, Andres Felipe",2397,2512,1-0
1,"Martinez Romero, Martin","Soysal, Serkan",2409,2389,1/2-1/2
2,"Caglar, Sila","Franco Valencia, Angela",2250,2089,1-0
3,"Chirivi C, Jenny Astrid","Kocyigit, Buse Naz",2124,2034,1-0
4,"Celik, Hasan Huseyin","Avila Pavas, Santiago",2370,2395,0-1
...,...,...,...,...,...
265,"Soysal, Serkan","McShane, Luke J",2389,2674,1/2-1/2
266,"Head, Louise","Onur, Cigdem",2090,2015,0-1
267,"Sahin, Hayrun Nisa","Varney, Zoe",2019,1917,1/2-1/2
268,"Derakhshani, Borna","Gurel, Ediz",2353,2353,1-0


In [21]:
filename = DATA_DIR / "om_datasets" / "OM_OTB_201609.pgn"
file = open(filename, "r")

data = defaultdict(list)
fields = ["White", "Black", "WhiteElo", "BlackElo", "Result"]
fields += ["ECO", "PlyCount", "Date", "SourceDate"]
i = 0

pgn = read_headers(file)
while pgn is not None:
    if i % 100000 == 0:
        print(i)
    i += 1
    for field in fields:
        value = pgn[field] if field in pgn else None
        data[field].append(value)
    try:
        pgn = read_headers(file)
    except UnicodeDecodeError:
        print("Unidecode error:", i)
        print(pgn)
        pass

0
100000
200000
300000
400000
500000
600000
700000
800000
900000
1000000
1100000
1200000
1300000
1400000
Unidecode error: 1494164
Headers(Event='1st Clanska Liga-Vzhod', Site='?', Date='2011.10.22', Round='?', White='Cernjak, Ambroz', Black='Spacapan, Simon', Result='1/2-1/2', BlackElo='1502', ECO='B80', EventDate='2011.??.??', PlyCount='31', Source='Opening Master', SourceDate='2016.09.17', SourceTitle='OM OTB 201609', WhiteElo='1530')
1500000
Unidecode error: 1515253
Headers(Event='Belgrade-chT', Site='Belgrade', Date='2011.10.01', Round='1.6', White='Stojanovic, Sasa M', Black='Sarenac, David', Result='0-1', BlackElo='2273', ECO='B90', EventCountry='SRB', EventDate='2011.10.01', EventRounds='11', EventType='team-tourn', PlyCount='84', Source='Opening Master', SourceDate='2016.09.17', SourceTitle='OM OTB 201609', WhiteElo='2070')
1600000
1700000
1800000
1900000
2000000
2100000
2200000
2300000
2400000
2500000
2600000
2700000
2800000
2900000
3000000
3100000
3200000
3300000
3400000
3500

In [17]:
read_headers(file)

Headers(Event='Ch (team) (1 liga)', Site='Cetinje (Montenegro)', Date='2011.10.22', Round='8', White='Saranovic, Mitar', Black='Abramovic, Dragoljub', Result='1/2-1/2', BlackElo='2105', ECO='B00', EventCountry='YUG', EventDate='2011.??.??', EventRounds='3', PlyCount='26', Source='Opening Master', SourceDate='2016.09.17', SourceTitle='OM OTB 201609', WhiteElo='2146')

In [14]:
pgn

Headers()

In [22]:
df = pd.DataFrame(data)
df.to_csv(ARTIFACTS_DIR / "om_otb.csv", index=False)
print(df.shape)
df.head()

(7852388, 9)


Unnamed: 0,White,Black,WhiteElo,BlackElo,Result,ECO,PlyCount,Date,SourceDate
0,"Kravchenko, Vladimir V","Smirnova, Ekaterina",2377,2130,0-1,A05,98,2016.09.08,2016.09.17
1,"Evdokimov, Alexey","Karpov, Alexander",2249,2242,1-0,A89,95,2016.09.08,2016.09.17
2,"Egorov, Pavel","Golubev, Roman",2249,2334,1/2-1/2,E90,90,2016.09.08,2016.09.17
3,"Nikologorskiy, Konstantin","Kravchenko, Vladimir V",2410,2377,1-0,B06,107,2016.09.08,2016.09.17
4,"Egorov, Pavel","Evdokimov, Alexey",2249,2249,1/2-1/2,E90,61,2016.09.08,2016.09.17


In [23]:
df.isna().sum()

White              46
Black              45
WhiteElo      2311711
BlackElo      2332379
Result             45
ECO                45
PlyCount           45
Date               46
SourceDate         45
dtype: int64

In [29]:
df = df.dropna(subset=["White", "Black", "BlackElo", "WhiteElo"])

In [30]:
edges = df[["White", "Black"]].drop_duplicates()
g = ig.Graph.DataFrame(edges)

In [32]:
print(g.as_undirected().summary())

IGRAPH UN-- 235456 3908953 -- 
+ attr: name (v)


In [11]:
df[["White", "Black"]].drop_duplicates()

Unnamed: 0,White,Black
0,"Kravchenko, Vladimir V","Smirnova, Ekaterina"
1,"Evdokimov, Alexey","Karpov, Alexander"
2,"Egorov, Pavel","Golubev, Roman"
3,"Nikologorskiy, Konstantin","Kravchenko, Vladimir V"
4,"Egorov, Pavel","Evdokimov, Alexey"
...,...,...
1494159,"Mackie, Norman","Munsey, J."
1494160,"Moreno Simo, Javier","Bendayan Claros, Aaron Arturo"
1494161,"Martinez Vicario, Jose Maria","Jianu, Vlad Cristian"
1494162,"Montilla, Jorcerys","Rovira, Tairu"


In [27]:
df.tail()

Unnamed: 0,White,Black,WhiteElo,BlackElo,Result,ECO,PlyCount,Date,SourceDate
7852383,"Herejk, Petr","Sladek, Jaroslav",2094.0,1869.0,1/2-1/2,D13,17,????.??.??,2016.09.17
7852384,"Hermans, Guy","Verstraeten, Roland",,,1-0,D52,89,????.??.??,2016.09.17
7852385,"Kvist, Markus","Lindmark, Crister",,,1/2-1/2,B13,32,????.??.??,2016.09.17
7852386,"Ladron De Guevara, Miguel","Sergio, Sandra",,,1-0,C80,75,????.??.??,2016.09.17
7852387,"Laleman, Manu","Peeters, Jozef",1640.0,1640.0,1/2-1/2,B14,91,????.??.??,2016.09.17


In [28]:
from pathlib import Path
Path("foo/bar/this.py").stem

'this'

In [33]:
df

Unnamed: 0,White,Black,WhiteElo,BlackElo,Result,ECO,PlyCount,Date,SourceDate
0,"Kravchenko, Vladimir V","Smirnova, Ekaterina",2377,2130,0-1,A05,98,2016.09.08,2016.09.17
1,"Evdokimov, Alexey","Karpov, Alexander",2249,2242,1-0,A89,95,2016.09.08,2016.09.17
2,"Egorov, Pavel","Golubev, Roman",2249,2334,1/2-1/2,E90,90,2016.09.08,2016.09.17
3,"Nikologorskiy, Konstantin","Kravchenko, Vladimir V",2410,2377,1-0,B06,107,2016.09.08,2016.09.17
4,"Egorov, Pavel","Evdokimov, Alexey",2249,2249,1/2-1/2,E90,61,2016.09.08,2016.09.17
...,...,...,...,...,...,...,...,...,...
7852376,"Lagunes, Matthieu","Fageot, Julien",1868,1985,1-0,C14,21,????.??.??,2016.09.17
7852378,Juan,"Cortes, Maria Angelica",1712,1770,1/2-1/2,C50,27,????.??.??,2016.09.17
7852381,"Phan Dinh Nhat, Khanh","Nguyen, Van Hai",1967,2312,1/2-1/2,A17,41,????.??.??,2016.09.17
7852383,"Herejk, Petr","Sladek, Jaroslav",2094,1869,1/2-1/2,D13,17,????.??.??,2016.09.17
