In [2]:
from koalas import DataFrame
from example import chess

In [3]:
# Generate the DataFrame given the chess results, which are already in the form of records.
df = DataFrame.from_records(list(chess('TineBerger', 2023, 10)))
df[2]

{'Color': 'Black',
 'Moves': ['1. d4',
  '1... Nf6',
  '2. Nc3',
  '2... d5',
  '3. Bg5',
  '3... Bf5',
  '4. Bxf6',
  '4... exf6',
  '5. e3',
  '5... Nc6',
  '6. Bd3',
  '6... Bxd3',
  '7. Qxd3',
  '7... Be7',
  '8. Nge2',
  '8... O-O',
  '9. Ng3',
  '9... a5',
  '10. Nf5',
  '10... Nb4',
  '11. Qe2',
  '11... Qd7',
  '12. Qg4',
  '12... Qe6',
  '13. Qxg7#'],
 'Opponent': 'Bradshort21',
 'OpponentRating': 1559,
 'Outcome': 'Lose',
 'Rating': 1606,
 'TimeControl': '180'}

In [4]:
# Get the breakdown of how many games have been won, lost and drawn.
(
    df
        .group('Outcome')
        .apply('Color', len)
        .rename('Color', 'Count')
        .select('Outcome', 'Count')
)

Outcome Count
------- -----
Win     985  
Lose    943  
Draw    151  

In [5]:
# Identify the most difficult players.
(
    df
        .filter('Outcome', 'Lose')
        .group('Opponent')
        .apply('Outcome', len)
        .rename('Outcome', 'Losses')
        .select('Opponent', 'Losses')
        .sort('Losses', False)
        [:5]
)

Opponent    Losses
--------    ------
Fabrizio265 3     
scoott33    3     
drmaj       3     
Kaiidlu     3     
timari      3     

In [6]:
# Identify which openings are most and least successful when playing as white.
(
    df
        .filter('Color', 'White')
        .apply('Moves', lambda moves: ' '.join(moves[:4]))
        .group('Moves')
        .apply('Outcome', len, 'Count')
        .apply('Outcome', lambda outcomes: outcomes.count('Win') / (outcomes.count('Win') + outcomes.count('Draw') + outcomes.count('Lose')))                
        .rename('Outcome', 'Win Rate')
        .apply('Win Rate', lambda rate: round(100. * rate, 2))
        .sort('Win Rate', False)
        .apply('Count', lambda count: count > 25, 'Minimum')
        .filter('Minimum', True)
        .select('Moves', 'Win Rate', 'Count')
)

Moves                          Win Rate Count
-----                          -------- -----
1. Nc3 1... Nf6 2. e4 2... d6  67.44    43   
1. Nc3 1... d5 2. e4 2... d4   58.96    173  
1. Nc3 1... Nf6 2. e4 2... e5  55.74    61   
1. Nc3 1... Nf6 2. e4 2... g6  54.84    31   
1. Nc3 1... e5 2. e4 2... Nf6  51.79    56   
1. Nc3 1... g6 2. e4 2... Bg7  50.0     56   
1. Nc3 1... c6 2. e4 2... d5   48.15    54   
1. Nc3 1... d5 2. e4 2... dxe4 47.54    61   
1. Nc3 1... c5 2. e4 2... Nc6  43.18    44   
1. Nc3 1... e6 2. e4 2... d5   39.47    38   
1. Nc3 1... e5 2. e4 2... Nc6  34.55    55   