In [1]:
from koalas import DataFrame
from example import chess

In [2]:
# Generate the DataFrame given the chess results, which are already in the form of records.
df = DataFrame.from_records(chess('TineBerger', 2023, 9))
df[2]

{'Color': 'White',
 'Moves': ['1. Nc3',
  '1... d5',
  '2. e4',
  '2... d4',
  '3. Nb5',
  '3... a6',
  '4. Na3',
  '4... e5',
  '5. Bc4',
  '5... Nf6',
  '6. Nf3',
  '6... Bd6',
  '7. h3',
  '7... O-O',
  '8. O-O',
  '8... Nxe4',
  '9. d3',
  '9... Nf6',
  '10. Bg5',
  '10... Be7',
  '11. Nxe5',
  '11... Nbd7',
  '12. Nf3',
  '12... b5',
  '13. Bb3',
  '13... Bb7',
  '14. Nxd4',
  '14... h6',
  '15. Bxf6',
  '15... Bxf6',
  '16. Nf3',
  '16... Nc5',
  '17. Rb1',
  '17... Nxb3',
  '18. axb3',
  '18... Re8',
  '19. c3',
  '19... Qd6',
  '20. d4',
  '20... Bd5',
  '21. b4',
  '21... Re6',
  '22. Re1',
  '22... Rae8',
  '23. Rxe6',
  '23... Rxe6',
  '24. Ne5',
  '24... Bxe5',
  '25. dxe5',
  '25... Qxe5',
  '26. Qd4',
  '26... Qg5',
  '27. Qg4',
  '27... Qd2',
  '28. c4',
  '28... bxc4',
  '29. Nxc4',
  '29... Bxc4',
  '30. Qxc4',
  '30... Re1+',
  '31. Rxe1',
  '31... Qxe1+',
  '32. Kh2',
  '32... Qe5+',
  '33. f4',
  '33... Qxb2',
  '34. Qxc7',
  '34... Qxb4',
  '35. Qd8+',
  '35... Kh7

In [3]:
# Get the breakdown of how many games have been won, lost and drawn.
(
    df
        .group('Outcome')
        .apply('Count', len, 'Color')
        .select('Outcome', 'Count')
)

Outcome Count
------- -----
Draw    177  
Lose    1307 
Win     1361 

In [4]:
# Identify the most difficult players.
(
    df
        .filter('Outcome', 'Lose')
        .group('Opponent')
        .apply('Losses', len, 'Outcome')
        .select('Opponent', 'Losses')
        .sort('Losses', ascending=False)
        [:5]
)

Opponent          Losses
--------          ------
architecturalpain 4     
chess_enjoyer03   4     
DaiLongDa888      3     
Lightyagami322311 3     
Martia-Loup       3     

In [5]:
# Identify which openings are most and least successful when playing as black.
def summary(df):
    return (
        df
            .filter('Color', 'Black')
            .apply('Opening', lambda moves: ' '.join(moves[:1]), 'Moves')
            .group('Opening')
            .apply('Count', len, 'Outcome')
            .apply('Win Rate', lambda outcomes: outcomes.count('Win') / len(outcomes), 'Outcome')                
            .apply('Win Percentage', lambda rate: round(100. * rate, 2), 'Win Rate')
            .sort('Win Percentage', ascending=False)
            .apply('Minimum', lambda count: count > 20, 'Count')
            .filter('Minimum', True)
            .select('Opening', 'Win Percentage')
    )
summary(df)

Opening Win Percentage
------- --------------
1. e3   50.0          
1. e4   48.58         
1. d4   45.6          
1. Nf3  39.13         
1. c4   36.17         

In [6]:
# Compare opening performance against another player
left = summary(df).rename('Win Percentage', 'Win Percentage (Left)')
right = summary(DataFrame.from_records(chess('architecturalpain', 2023, 9))).rename('Win Percentage', 'Win Percentage (Right)')
comparison = left.join(right, 'Opening').apply('Difference', lambda a, b: round(b - a, 2), 'Win Percentage (Left)', 'Win Percentage (Right)')        
comparison

NameError: name 'opening_performance' is not defined