In [1]:
# pip install chess.com
# https://www.youtube.com/watch?v=KYNbHGs-qG4


In [2]:
from chessdotcom import get_player_profile, get_player_stats, get_player_game_archives
import requests
import pprint
import pandas as pd

# make json data easier to read
pp = pprint.PrettyPrinter(width=41, compact=True)

def get_user_archives(username, months):
    """
    get archive monthly files of specific chess.com player
    input:
    username - username of the chess.com player
    months - target months that we want to get the archives
    
    output:
    target_month - files of archives according to months parameter
    """
    archives = get_player_game_archives(username).json['archives']
    target_month = []
    for archive in archives:
        if archive[-7:] in months:
            target_month.append(archive)
    return target_month
    
#files = get_user_archives("tianminlyu",["2023/01"])


In [3]:
def get_archive_games(filename):
    """
    return games in one archive file
    
    input:
    filename - filename that contains game urls
    
    output: 
    """
    games = requests.get(filename).json()['games']
    return games

#games = get_archive_games(files[-1])

In [4]:
def game_df(username,files):
    """
    import data from archive files and turn relevant data parameters into data frames
    
    input:
    username - username of the player
    files - file archived of the player
    
    output:
    a dataframe contains wanted columns
                        'username',
                         'urls', 
                         'time_control',
                        'end_time',
                        'uuid',
                        'initial_setup',
                        'time_class',
                        'rules',
                        'white_rating',
                        'white_username',
                        'black_rating',
                        'black_username',
                        'pgn'
                        
    """
    print("Player " + username + " is processing...")
    usernames = []
    urls = []
    time_control = [] 
    end_time = [] 
    uuid = []
    initial_setup = [] 
    time_class = [] 
    rules = [] 
    white_rating = [] 
    white_username = [] 
    black_rating = [] 
    black_username = []
    pgn = []
    
    for file in files:
        print(file + " " + " is processing...")
        games = get_archive_games(file)
        for game in games:
            usernames.append(username)
            urls.append(game['url'])
            time_control.append(game['time_control'])
            end_time.append(game['end_time'])
            uuid.append(game['uuid'])
            initial_setup.append(game['initial_setup'])
            time_class.append(game['time_class'])
            rules.append(game['rules'])
            white_rating.append(game['white']['rating'])
            white_username.append(game['white']['username'])
            black_rating.append(game['black']['rating'])
            black_username.append(game['black']['username'])
            pgn.append(game['pgn'])
    print("data fetch work is done.")
    
    df = pd.DataFrame(list(zip(usernames,
                               urls, 
                           time_control,
                          end_time,
                           uuid,
                           initial_setup,
                           time_class,
                           rules,
                           white_rating,
                           white_username,
                           black_rating,
                           black_username,
                           pgn
                          )),
               columns =['username',
                         'urls', 
                         'time_control',
                        'end_time',
                        'uuid',
                        'initial_setup',
                        'time_class',
                        'rules',
                        'white_rating',
                        'white_username',
                        'black_rating',
                        'black_username',
                         'pgn'
                        ])
    print("dataframe importing is done.")
    return df
    
#game_df('tianminlyu',files)

In [5]:
players = ['AGcuber19',
'TLPAWN',
'xiaoanwu',
'EmmaXLi',
'akfunchess66',
'Marsboom', 
'Claraqiu',
'Ravenclawfairy', 
'Zora_zhu',
'BurleyWalrus',
'taionemm',
'augustinewz',
'oscarzhang818',
'yaohengli',
'Wallacewang1214',
'SophiaZ2022',
'AliceCLi',
'yumitang',
'james2945',
'Oinkoinkw',
'Cathye1',
'lunathekitsune',
'ArthurRocket',
'vivianwwww20',
'ChloeWang16',
'Tyzalex',
'ZhichengW',
'Haochen1123',
'jaydenlan0118',
'ImRacoonie']

In [6]:
df_players = []
error_players = []

In [7]:
# pgn is missing https://www.chess.com/game/live/69027164491
data = get_archive_games('https://api.chess.com/pub/player/agcuber19/games/2023/02')

In [8]:
for i in data:
    print(i)
    print("----")

{'url': 'https://www.chess.com/game/live/69024314043', 'pgn': '[Event "Live Chess"]\n[Site "Chess.com"]\n[Date "2023.02.01"]\n[Round "-"]\n[White "Captainrobert07"]\n[Black "AGcuber19"]\n[Result "0-1"]\n[CurrentPosition "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq -"]\n[Timezone "UTC"]\n[UTCDate "2023.02.01"]\n[UTCTime "21:02:15"]\n[WhiteElo "709"]\n[BlackElo "746"]\n[TimeControl "600"]\n[Termination "AGcuber19 won by resignation"]\n[StartTime "21:02:15"]\n[EndDate "2023.02.01"]\n[EndTime "21:02:15"]\n[Link "https://www.chess.com/game/live/69024314043"]\n\n0-1\n', 'time_control': '600', 'end_time': 1675285335, 'rated': True, 'tcn': '', 'uuid': 'b2de386f-a273-11ed-8d9d-78ac4409ff3c', 'initial_setup': 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1', 'fen': 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq -', 'time_class': 'rapid', 'rules': 'chess', 'white': {'rating': 709, 'result': 'resigned', '@id': 'https://api.chess.com/pub/player/captainrobert07', 'username': 'C

In [None]:
for player in players:
    try:
        files = get_user_archives(player,["2023/02",
                                          "2023/01",
                                          "2022/12"])
        df = game_df(player, files)
        df_players.append(df)
    except:
        print("This player account " + player + " has issues.")
        error_players.append(player)

Player AGcuber19 is processing...
https://api.chess.com/pub/player/agcuber19/games/2022/12  is processing...
https://api.chess.com/pub/player/agcuber19/games/2023/01  is processing...
https://api.chess.com/pub/player/agcuber19/games/2023/02  is processing...
This player account AGcuber19 has issues.
Player TLPAWN is processing...
https://api.chess.com/pub/player/tlpawn/games/2022/12  is processing...
https://api.chess.com/pub/player/tlpawn/games/2023/01  is processing...
https://api.chess.com/pub/player/tlpawn/games/2023/02  is processing...
data fetch work is done.
dataframe importing is done.


In [None]:
error_players

In [None]:
print("% of players whose username are not valid")
print(len(error_players) * 100/ len(players))

In [None]:
players_df = pd.concat(df_players)

In [None]:
players_df.head()

In [None]:
players_df.loc[players_df['username'] == 'AGcuber19']

In [None]:
players_df.shape

In [None]:
def player_rating(row):  
    if row['username'].lower() == row['white_username'].lower():
        return row['white_rating']
    else:
        return row['black_rating']
    
players_df['player_rating'] = players_df.apply(lambda row: player_rating(row), axis=1)

In [None]:
players_df['timestamp'] = pd.to_datetime(players_df['end_time'],unit='s')


In [None]:
players_df.date = pd.to_datetime(players_df.timestamp)
players_df.set_index('timestamp',inplace=True)
players_df.head()

In [None]:
players_df['year']=players_df.index.year
players_df['month']=players_df.index.month
players_df['day']=players_df.index.day
players_df.head()


In [None]:
import plotly.express as px

fig = px.line(players_df.reset_index(), x="timestamp", y="player_rating", color = 'username')
fig.show()

In [None]:
players_df['username'].nunique()