# Chess.com Exporter

### Download Data from User Stats and Games Played Results and Export to CSV

----

API Docs:

* https://www.chess.com/club/chess-com-developer-community
* https://www.chess.com/news/view/published-data-api

-----

## Configure User Info

In [1]:
my_user = "markwk"

------

## Libraries and Dependencies

In [2]:
import requests
import time
import json
from datetime import datetime

import numpy as np
import pandas as pd

-----

# Get User Info and Current Stats

In [3]:
def getChessUser(username):
    r = requests.get("https://api.chess.com/pub/player/"+username+"/")
    user_info = r.json()
    return user_info

In [4]:
user_info = getChessUser(my_user)
# print(json.dumps(user_info, indent=4, sort_keys=True))

In [5]:
joined_date = datetime.utcfromtimestamp(user_info['joined'])

print(joined_date)

2018-08-30 09:43:58


In [6]:
def getChessRatings(username):
    r = requests.get("https://api.chess.com/pub/player/"+username+"/stats")
    user_stats = r.json()
    return user_stats

In [7]:
user_stats = getChessRatings(my_user)
# print(json.dumps(user_stats, indent=4, sort_keys=True))

In [8]:
# latest blitz rating
print(user_stats['chess_blitz']['last'])

{'rating': 980, 'date': 1537067051, 'rd': 45}


-----

# Get Archives List

In [9]:
def getChessUserArchives(username):
    r = requests.get("https://api.chess.com/pub/player/"+username+"/games/archives")
    a = r.json()
    archives_list = a['archives']
    
    return archives_list

In [10]:
archives = getChessUserArchives(my_user)

In [11]:
# total archives / months with games
len(archives)

1

In [12]:
monthlist = []

for i in archives:
    full_archive_link = i
    month = full_archive_link.replace('https://api.chess.com/pub/player/' + my_user + '/games/',"")
    monthlist.append(month)

In [13]:
# helper function
def months(start_month, start_year, end_month, end_year):

    monthlist = []
    month, year = end_month, end_year

    while (year, month) >= (start_year, start_month):

        str = format(year) + '/' + '{:02d}'.format(month)
        monthlist.append(str)

        month -= 1
        if month < 1:
            month = 12
            year -= 1
    
    return monthlist

In [14]:
# Alternative Option 2: Get all Months Since Joined
start_year = joined_date.year
start_month = joined_date.month

end_year = datetime.now().year
end_month = datetime.now().month

In [15]:
# Alternative Option 3: manually set dates

# start_year = 2018
# start_month = 1

# end_year = 2018
# end_month = 6

In [16]:
# uncoment to run option 2 or 3
# monthlist = months(start_month, start_year, end_month, end_year)

----

# Download Data on Chess.com Games Played

In [17]:
def getChessGamesPlayed(username, monthlist):
    
    games_list = []
    
    for i in range(0,len(monthlist)):
        print("Getting Game Data for " + monthlist[i])
        
        r = requests.get("https://api.chess.com/pub/player/" + 
                         username + "/games/" + monthlist[i] +"/")
        
        games_obj = r.json()
        games_list.extend(games_obj['games'])
        
        # Wait for 10 seconds
        time.sleep(10)
                  
    return games_list

In [18]:
# get all games stats
games_list = getChessGamesPlayed(my_user, monthlist)

Getting Game Data for 2018/09


In [19]:
# function to unpack nested data
def unpack(df, column, fillna=None):
    ret = None
    if fillna is None:
        ret = pd.concat([df, pd.DataFrame((d for idx, d in df[column].iteritems()))], axis=1)
        del ret[column]
    else:
        ret = pd.concat([df, pd.DataFrame((d for idx, d in df[column].iteritems())).fillna(fillna)], axis=1)
        del ret[column]
    return ret

## Initial Games Played Data Cleanup

In [23]:
# unpack the nested data 
games_played = pd.DataFrame(games_list)

games_played = unpack(games_played, 'black', 0)
games_played = unpack(games_played, 'white', 0)

# if don't play daily then not all games will have a start time
if 'start_time' in games_played.columns:
    games_played.columns = ['end_time', 'fen', 'pgn', 'rated', 'rules', 'start_time', 'time_class',
       'time_control', 'url', 'black_id', 'black_rating', 'black_result',
       'black_username', 'white_id', 'white_rating', 'white_result', 'white_username']
    games_played['start_time'] = pd.to_datetime(games_played['start_time'], unit='s')
    games_played['end_time'] = pd.to_datetime(games_played['end_time'], unit='s')
else: 
    games_played.columns = ['end_time', 'fen', 'pgn', 'rated', 'rules', 'time_class',
       'time_control', 'url', 'black_id', 'black_rating', 'black_result',
       'black_username', 'white_id', 'white_rating', 'white_result', 'white_username']
    games_played['end_time'] = pd.to_datetime(games_played['end_time'], unit='s')
    
# set which side I played
games_played['my_side'] = np.where(games_played['white_username'] == my_user, 'white', 'black')

# get and assign my result
game_results = []

for index, row in games_played.iterrows():
    if row['white_username'] == my_user:
        # print("I was white")
        game_results.append(row['white_result'])
    else:
        # print("I was black")
        game_results.append(row['black_result'])
        
games_played['my_result'] = game_results

In [24]:
# export to csv
games_played.to_csv("data/" + my_user + "_games_played" + ".csv")

In [25]:
# total games played
total_games_played = len(games_played)

In [26]:
total_games_played

74

-------

## Advanced PGN Parsing: Extract Additional Game Info from PGN Game Files

**NOTE:** To use this section, you need to install the Chess-Python Library, which you can do on the command line using the following command: 

> pip install python-chess

In [27]:
import chess.pgn
import io
import re

In [29]:
# Advanced Parsing of PGN

terminations = []
total_moves = []
start_times = []
end_times  = []
moves_times_white = []
moves_times_black = []

# TODO: Additional Data to Exract
# Result
# ECO
# ECOUrl
# WhiteElo
# BlackElo

for index, row in games_played.iterrows():
    raw_pgn = io.StringIO(row['pgn'])
    game = chess.pgn.read_game(raw_pgn)
    
    terminations.append(game.headers['Termination'] )

    start_time = game.headers['Date'] + " " + game.headers['StartTime']
    start_times.append(start_time)  

    end_time = game.headers['Date'] + " " + game.headers['EndTime']
    end_times.append(end_time) 
        
    move_times = re.findall('\{\[%clk (.*?)\]\}', row['pgn'])
    
    total_moves.append(len(move_times))
    
    # last move time: 
    # print(move_times[-1])
    
    moves_times_white.append(', '.join(move_times[0:][::2]))
    moves_times_black.append(', '.join(move_times[1:][::2]))

games_played['termination'] = terminations 
games_played['total_moves'] = total_moves                       
                       
games_played['start_time'] = start_times   
games_played['start_time'] = pd.to_datetime(games_played['start_time'])

games_played['pgn_end_time'] = end_times   
games_played['pgn_end_time'] = pd.to_datetime(games_played['pgn_end_time'])

games_played['moves_times_white'] = moves_times_white
games_played['moves_times_black'] = moves_times_black

In [30]:
# TODO: 
# * Get the total play time of each player from last timestamp minus total seconds of your side game
# * Calculate the average move time 

In [31]:
# games_played.tail()

In [32]:
# export detailed stats to csv
games_played.to_csv("data/" + my_user + "_games_played_detailed" + ".csv")