In [1]:
import sys
sys.path.append('../koalas/')

In [2]:
def chess(player:str, year:int, month:int):
    """
    Fetches results of chess games played on https://wwww.chess.com by the player with the given
    user name in the given month and tear.

    Parameters
    ----------
        player: str
        year: int
        month: int

    Yield
    -----
        dict

    Example
    -------
        >>> results = fetch_chess_com_results('magnuscarlsen', 2023, 10)
        >>> next(results)
            {
                'Moves': ['1. e4', '1... c5', '2. Nf3', '2... d6', ..., '36. Rc8+', '36... Qd8', '37. Rxd8#'],
                'TimeControl': '180+1',
                'Color': 'Black',
                'Outcome': 'Lose',
                'Rating': 2524,
                'OpponentRating': 3278
            }
    """   
    import re
    import requests 
    games = requests.get(
        f'https://api.chess.com/pub/player/{player}/games/{str(year)}/{str(month).rjust(2, "0")}',
        headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'}
    ).json()['games']
    moves_pattern = re.compile(r'[0-9]+[\.]+ [0-9A-Za-z-#+=]+')
    for game in games:
        color = 'white' if game['white']['username'] == player else 'black'
        yield dict(
            Moves=moves_pattern.findall(game['pgn'].splitlines()[-1]),
            TimeControl=game['time_control'],
            Color=color.title(),
            Outcome={'abandoned': 'Lose', 'win': 'Win', 'resigned': 'Lose', 'checkmated': 'Lose', 'stalemate': 'Draw', 'timeout': 'Lose', 'timevsinsufficient': 'Draw', 'agreed': 'Draw', 'repetition': 'Draw', 'insufficient': 'Draw'}[game[color]['result']],
            Rating=game[color]['rating'],
            Opponent=game['white' if color == 'black' else 'black']['username'],
            OpponentRating=game['white' if color == 'black' else 'black']['rating']
        )

In [3]:
from koalas import DataFrame

In [4]:
# Generate the DataFrame given the chess results, which are already in the form of records.
df = DataFrame.from_records(chess('TineBerger', 2023, 9))
df[:3]

Color Moves                                         Opponent      OpponentRating Outcome Rating TimeControl
----- -----                                         --------      -------------- ------- ------ -----------
Black ['1. e4', '1... Nf6', '2. Nc3', '2... d5',... alconcha      1576           Draw    1582   180        
White ['1. Nc3', '1... g6', '2. e4', '2... Bg7',... Matiafigueroa 1637           Win     1591   180        
White ['1. Nc3', '1... d5', '2. e4', '2... d4', ... MadMauriceXIX 1556           Draw    1590   180        

In [5]:
# Get the breakdown of how many games have been won, lost and drawn.
(
    df
        .group('Outcome')
        .apply('Count', len, 'Color')
        .select('Outcome', 'Count')
)

Outcome Count
------- -----
Draw    177  
Lose    1307 
Win     1361 

In [6]:
# Identify the most difficult players.
(
    df
        .filter('Outcome', 'Lose')
        .group('Opponent')
        .apply('Losses', len, 'Outcome')
        .select('Opponent', 'Losses')
        .sort('Losses')
        .reverse()
        [:5]
)

Opponent          Losses
--------          ------
chess_enjoyer03   4     
architecturalpain 4     
searb             3     
arcanatack        3     
alwaysplexed      3     

In [7]:
# Identify which openings are most and least successful when playing as black.
def summary(df):
    return (
        df
            .filter('Color', 'Black')
            .apply('Opening', lambda moves: ' '.join(moves[:1]), 'Moves')
            .group('Opening')
            .apply('Count', len, 'Outcome')
            .apply('Win Rate', lambda outcomes: outcomes.count('Win') / len(outcomes), 'Outcome')                
            .apply('Win Percentage', lambda rate: round(100. * rate, 2), 'Win Rate')
            .sort('Win Percentage')
            .reverse()
            .apply('Minimum', lambda count: count > 20, 'Count')
            .filter('Minimum', True)
            .select('Opening', 'Win Percentage')
    )
summary(df)

Opening Win Percentage
------- --------------
1. e3   50.0          
1. e4   48.58         
1. d4   45.6          
1. Nf3  39.13         
1. c4   36.17         

In [8]:
# Compare opening performance against another player
left = summary(df).rename('Win Percentage', 'Win Percentage (Left)')
right = summary(DataFrame.from_records(chess('architecturalpain', 2023, 9))).rename('Win Percentage', 'Win Percentage (Right)')
comparison = left.join(right, 'Opening').apply('Difference', lambda a, b: round(b - a, 2), 'Win Percentage (Left)', 'Win Percentage (Right)').export('csv', 'performance.csv')
comparison

Opening Win Percentage (Left) Win Percentage (Right) Difference
------- --------------------- ---------------------- ----------
1. d4   45.6                  51.4                   5.8       
1. e4   48.58                 50.2                   1.62      