# Creating dataframes from chess.com

In [88]:
from chessdotcom import get_titled_players, get_player_profile, get_player_stats, get_player_games_by_month
import pandas as pd
from tqdm import tqdm
import datetime
import chess.pgn
from io import StringIO
import random

## Loading information about Grandmasters

In [2]:
# Names of all chess.com accounts belonging to chess grandmasters
gm_list = get_titled_players('GM')
print(gm_list.players[:10])

['123lt', '124chess', '1977ivan', '1stsecond', '4thd-alpeacefulmoon', '64aramis64', '64arthos64', '64atilla64', '64dartagnan64', '64genghis64']


In [4]:
with open('gm_list.txt', 'w+') as f:
    for gm in gm_list.players:
        f.write(f"{gm}\n")

In [5]:
file = open('gm_list.txt', 'r')
gm_list = file.read()
gm_list = gm_list.split("\n")
file.close()

### Player profiles

In [6]:
# Pulling GMs profiles from chess.com
gm_profiles = []

for player in gm_list.players:
    gm_profiles.append(get_player_profile(player).json['player'])

In [7]:
# Change into pandas dataframe
profiles_df = pd.DataFrame(gm_profiles)

In [10]:
# Save the dataframe into csv
profiles_df.to_csv("GM_profiles.csv")

### Player stats

In [5]:
# Pulling GMs stats from chess.com
gm_stats = []

for player in tqdm(gm_list.players):
    temp_stat = get_player_stats(player, tts=2).json['stats']
    temp_stat['player'] = player
    gm_stats.append(temp_stat)

stats_df = pd.DataFrame(pd.json_normalize(gm_stats))
stats_df.to_csv("GM_stats.csv")

100%|██████████| 1466/1466 [1:09:10<00:00,  2.83s/it]


In [20]:
caruana = get_player_stats('fabianocaruana').json['stats']

In [21]:
pd.json_normalize(caruana, sep="_")

Unnamed: 0,fide,chess_rapid.last.rating,chess_rapid.last.date,chess_rapid.last.rd,chess_rapid.best.rating,chess_rapid.best.date,chess_rapid.best.game,chess_rapid.record.win,chess_rapid.record.loss,chess_rapid.record.draw,...,chess_blitz.best.game,chess_blitz.record.win,chess_blitz.record.loss,chess_blitz.record.draw,tactics.highest.rating,tactics.highest.date,tactics.lowest.rating,tactics.lowest.date,puzzle_rush.best.total_attempts,puzzle_rush.best.score
0,2835,2702,1664820808,56,3045,1567282891,https://www.chess.com/game/live/39727720047,80,28,73,...,https://www.chess.com/game/live/39128394713,784,259,150,3459,1532884703,1354,1363907400,67,64


### Latest player games

In [9]:
current_time = datetime.datetime.now()

In [111]:
# Pulling GMs latest games from chess.com
gm_games = []

for player in tqdm(gm_list):
    games = get_player_games_by_month(player, year=current_time.year, month=current_time.month-1, tts=random.randint(0,2))
    for game in games.json['games']:
        try:
            pgn = StringIO(game['pgn'])
            game = chess.pgn.read_game(pgn)
            game_dict = {'White': game.headers['White'], 'Black': game.headers['Black'], 'Result': game.headers['Result'], \
            'White_opening': [str(move)[2:] for move in game.mainline_moves()][0], 'Black_opening': [str(move)[2:] for move in game.mainline_moves()][1]}
            gm_games.append(game_dict)
        except:
            pass
        
games_df = pd.DataFrame(gm_games)
games_df.to_csv("GM_games.csv")

100%|█████████▉| 1466/1467 [45:43<00:01,  1.87s/it] 


ChessDotComError: <class 'chessdotcom.types.ChessDotComError'>(status_code=404, text={"code":0,"message":"Data provider not found for key \"/pub/player//games/2022/11\"."})

In [113]:
games_df = pd.DataFrame(gm_games)
games_df.to_csv("GM_games.csv")