In [2]:
from koalas.koalas import DataFrame
from example import chess

ModuleNotFoundError: No module named 'koalas'

In [None]:
# Generate the DataFrame given the chess results, which are already in the form of records.
df = DataFrame.from_records(chess('TineBerger', 2023, 9))
df[:3]

In [None]:
# Get the breakdown of how many games have been won, lost and drawn.
(
    df
        .group('Outcome')
        .apply('Count', len, 'Color')
        .select('Outcome', 'Count')
)

In [None]:
# Identify the most difficult players.
(
    df
        .filter('Outcome', 'Lose')
        .group('Opponent')
        .apply('Losses', len, 'Outcome')
        .select('Opponent', 'Losses')
        .sort('Losses')
        .reverse()
        [:5]
)

In [None]:
# Identify which openings are most and least successful when playing as black.
def summary(df):
    return (
        df
            .filter('Color', 'Black')
            .apply('Opening', lambda moves: ' '.join(moves[:1]), 'Moves')
            .group('Opening')
            .apply('Count', len, 'Outcome')
            .apply('Win Rate', lambda outcomes: outcomes.count('Win') / len(outcomes), 'Outcome')                
            .apply('Win Percentage', lambda rate: round(100. * rate, 2), 'Win Rate')
            .sort('Win Percentage')
            .reverse()
            .apply('Minimum', lambda count: count > 20, 'Count')
            .filter('Minimum', True)
            .select('Opening', 'Win Percentage')
    )
summary(df)

In [None]:
# Compare opening performance against another player
left = summary(df).rename('Win Percentage', 'Win Percentage (Left)')
right = summary(DataFrame.from_records(chess('architecturalpain', 2023, 9))).rename('Win Percentage', 'Win Percentage (Right)')
comparison = left.join(right, 'Opening').apply('Difference', lambda a, b: round(b - a, 2), 'Win Percentage (Left)', 'Win Percentage (Right)').export('csv', 'performance.csv')
comparison