In [1]:
from chessdotcom import get_player_game_archives, get_leaderboards
from queue import Queue
import requests
import json

## Retrieve Games within a Certain ELO Range

In [13]:
def get_data(player, elo_lb, elo_ub, num_players_cap, num_games_cap):
    '''
    performs a bfs to get game data from a list of users of a desired elo.
    player: the player origin of the bfs.
    elo_lb: lower bound.
    elo_ub: upper bound.
    num_players_cap: number of players we want to cap our requests to.
    num_games_cap: number of games per player we want to cap our exploration at.
    '''

    games_lst = [] # populate a list of games.


    # Initialize a Queue
    user_list = Queue()
    user_list.put(player)

    # Mark the start node as visited
    visited = {player}

    # Loop until the queue is empty
    while len(visited) < num_players_cap and len(visited) != 0: # stop when we have requested num_players_cap players.

        # Dequeue a vertex from the queue
        curr_user = user_list.get()

        # Request player games from server.
        requested_player = get_player_game_archives(curr_user)
        
        # print(requested_player) # stuck on requests
        print("requesting")
        this_playergames = requests.get(url=requested_player.json["archives"][-1]).json()["games"]
        print("fulfilled")

        # Add all adjacent vertices to the queue
        for i in range(min(len(this_playergames), num_games_cap)): # loop through the first num_games_cap games
            print("Searching")
            # Make sure it's a 5 min blitz
            if this_playergames[i]['time_control'] == '180':
                print("Found a game!")

                # Detect what color and elo the opponent is
                black_player = this_playergames[i]['black']['username']
                
                if curr_user == black_player:
                    opponent_elo = this_playergames[i]['black']['rating']
                    opponent_user = black_player
                else:
                    opponent_elo = this_playergames[i]['white']['rating']
                    opponent_user = this_playergames[i]['white']['username']

                if opponent_elo >= elo_lb and opponent_elo <= elo_ub and opponent_user not in visited:

                    games_lst.append(this_playergames[i])

                    visited.add(opponent_user)
                    user_list.put(opponent_user)

    return (games_lst, visited)

In [3]:
def store_games(games, file_name):
    '''
    Stores game dictionaries in a text file, separated by new line characters.

    games: a list of dictionaries of games to be stored.
    file_name: the path to a valid .txt file to store.
    '''
    with open(file_name, 'w') as f:
        for game in games:
            # json.dumps converts games to a string for storage
            f.write(json.dumps(game))
            f.write("\n")

def read_games(file_name):
    '''
    Retrieves games stored in a text file.

    file_name: the path to a valid .txt file with stored games.
    '''
    games = []

    with open(file_name, 'r') as f:
        for line in f:
            games.append(json.loads(line))

    return games

In [7]:
requested_player = get_player_game_archives("colinsong1")
requests.get(url=requested_player.json["archives"][-1]).json()["games"][-1]

{'url': 'https://www.chess.com/game/live/75158463523',
 'pgn': '[Event "Live Chess"]\n[Site "Chess.com"]\n[Date "2023.04.13"]\n[Round "-"]\n[White "colinsong1"]\n[Black "OnlyKMac"]\n[Result "1-0"]\n[CurrentPosition "r2qk2R/1b1pnr2/p1n1p1Q1/2p5/1p2P1P1/1BNP1P2/PPP1N3/2KR4 b - -"]\n[Timezone "UTC"]\n[ECO "B06"]\n[ECOUrl "https://www.chess.com/openings/Modern-Defense-with-1-e4-2.Nc3-Bg7"]\n[UTCDate "2023.04.13"]\n[UTCTime "20:55:02"]\n[WhiteElo "1557"]\n[BlackElo "1550"]\n[TimeControl "60"]\n[Termination "colinsong1 won by resignation"]\n[StartTime "20:55:02"]\n[EndDate "2023.04.13"]\n[EndTime "20:55:52"]\n[Link "https://www.chess.com/game/live/75158463523"]\n\n1. e4 {[%clk 0:00:59.9]} 1... g6 {[%clk 0:00:59.7]} 2. Nc3 {[%clk 0:00:59.1]} 2... Bg7 {[%clk 0:00:59.6]} 3. Bc4 {[%clk 0:00:58]} 3... e6 {[%clk 0:00:59.3]} 4. d3 {[%clk 0:00:57.2]} 4... Ne7 {[%clk 0:00:58.6]} 5. Bg5 {[%clk 0:00:56.3]} 5... b6 {[%clk 0:00:57.8]} 6. Qd2 {[%clk 0:00:55.4]} 6... Bb7 {[%clk 0:00:57.4]} 7. Bh6 {[%clk 0:

In [14]:
sample_high_elo_players = get_data(player="colinsong1", elo_lb=1400, elo_ub=1600, num_players_cap=20, num_games_cap=50)

requesting
fulfilled
Searching
Found a game!
Searching
Found a game!
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Found a game!
Searching
Found a game!
Searching
Found a game!
Searching
Found a game!
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Found a game!
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching
Searching


KeyboardInterrupt: 

In [30]:
store_games(sample_high_elo_players[0], "games.txt")

games = read_games("games.txt")
games[0]

{'url': 'https://www.chess.com/game/live/474899076',
 'pgn': '[Event "Live Chess"]\n[Site "Chess.com"]\n[Date "2013.03.17"]\n[Round "-"]\n[White "hastily"]\n[Black "FabianoCaruana"]\n[Result "0-1"]\n[CurrentPosition "r1b1r1k1/pp3ppp/2n5/2Pp4/7P/2b5/P1P1BPP1/3KR3 w - -"]\n[Timezone "UTC"]\n[ECO "C00"]\n[ECOUrl "https://www.chess.com/openings/French-Defense-Knight-Variation-2...d5-3.exd5-exd5-4.d4"]\n[UTCDate "2013.03.17"]\n[UTCTime "15:19:38"]\n[WhiteElo "1189"]\n[BlackElo "1451"]\n[TimeControl "180"]\n[Termination "FabianoCaruana won by resignation"]\n[StartTime "15:19:38"]\n[EndDate "2013.03.17"]\n[EndTime "15:20:51"]\n[Link "https://www.chess.com/game/live/474899076"]\n\n1. e4 {[%clk 0:03:00]} 1... e6 {[%clk 0:03:00]} 2. Nf3 {[%clk 0:02:59.7]} 2... d5 {[%clk 0:02:59]} 3. exd5 {[%clk 0:02:57.8]} 3... exd5 {[%clk 0:02:57.9]} 4. d4 {[%clk 0:02:57.5]} 4... Nf6 {[%clk 0:02:57.3]} 5. Qe2+ {[%clk 0:02:56.2]} 5... Be7 {[%clk 0:02:56.2]} 6. Bg5 {[%clk 0:02:54.7]} 6... O-O {[%clk 0:02:55.1]} 7

In [7]:
components = games.json()["games"][0]["pgn"].split("\n")
components

['[Event "Live Chess"]',
 '[Site "Chess.com"]',
 '[Date "2013.03.17"]',
 '[Round "-"]',
 '[White "FabianoCaruana"]',
 '[Black "hastily"]',
 '[Result "1-0"]',
 '[CurrentPosition "3r4/p6p/Q2b2p1/3Rk2N/P1p1P3/5P2/1P2K1PP/7R b - -"]',
 '[Timezone "UTC"]',
 '[ECO "D10"]',
 '[ECOUrl "https://www.chess.com/openings/Slav-Defense-3.Nc3-dxc4"]',
 '[UTCDate "2013.03.17"]',
 '[UTCTime "15:16:14"]',
 '[WhiteElo "1363"]',
 '[BlackElo "1193"]',
 '[TimeControl "180"]',
 '[Termination "FabianoCaruana won by resignation"]',
 '[StartTime "15:16:14"]',
 '[EndDate "2013.03.17"]',
 '[EndTime "15:19:21"]',
 '[Link "https://www.chess.com/game/live/474897192"]',
 '',
 '1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:00]} 2. c4 {[%clk 0:02:57.8]} 2... dxc4 {[%clk 0:02:57.2]} 3. Nc3 {[%clk 0:02:56.9]} 3... c6 {[%clk 0:02:56.5]} 4. a4 {[%clk 0:02:54.8]} 4... Nf6 {[%clk 0:02:52.7]} 5. e4 {[%clk 0:02:53.5]} 5... c5 {[%clk 0:02:48.1]} 6. d5 {[%clk 0:02:51.5]} 6... Bg4 {[%clk 0:02:44]} 7. f3 {[%clk 0:02:49.7]} 7... Bh5 {[

In [7]:
ldrboard = get_leaderboards().json # dictionary containing leaderboard (top 50 players).
daily_players = ldrboard["leaderboards"]["daily"]
daily_players

[{'player_id': 2305524,
  '@id': 'https://api.chess.com/pub/player/zgorl',
  'url': 'https://www.chess.com/member/Zgorl',
  'username': 'Zgorl',
  'score': 2624,
  'rank': 1,
  'country': 'https://api.chess.com/pub/country/NL',
  'title': 'FM',
  'status': 'premium',
  'avatar': 'https://images.chesscomfiles.com/uploads/v1/user/2305524.5341b605.200x200o.67a89f6c51b4.jpeg',
  'trend_score': {'direction': 1, 'delta': 9},
  'trend_rank': {'direction': 0, 'delta': 0},
  'flair_code': 'diamond_traditional',
  'win_count': 321,
  'loss_count': 204,
  'draw_count': 45},
 {'player_id': 1448848,
  '@id': 'https://api.chess.com/pub/player/the_evil_ducklings',
  'url': 'https://www.chess.com/member/The_Evil_Ducklings',
  'username': 'The_Evil_Ducklings',
  'score': 2530,
  'rank': 2,
  'country': 'https://api.chess.com/pub/country/US',
  'title': 'FM',
  'name': 'Roger LaFlair',
  'status': 'premium',
  'avatar': 'https://images.chesscomfiles.com/uploads/v1/user/1448848.309d598e.200x200o.91897c97

In [8]:
(lb_elo, ub_elo) = (2000, 2500)
usernames = []
for player in daily_players:
    if player['score'] <= ub_elo and  player['score'] >= lb_elo:
        usernames.append(player["username"])
        
usernames

NameError: name 'daily_players' is not defined

## Get the List of Desired Games

In [None]:
games_lst = []

for p in player_1500s:
    response = get_player_game_archives(p)
    gamezero = requests.get(url=response.json["archives"][0]) # the first game by the player
    games_lst.append(gamezero)

Way to get games:
1) Find any 1500ish player
2) Get a random game
3) Go into their opponents account and get a random game
4) Repeat
5) Compile all of these games to make a set

Things to consider:
- Ratings are wrong when accounts are new or have low game numbers
- Pick games from unique players
- Python chess has opening and endgame databases
https://python-chess.readthedocs.io/en/latest/

Action Items:
- Parse PGN
- Find an efficient way to gather data
    - Make the dataset of PGNs
- Compute time differences between moves
- Make a "stage of game" variable to classify opening/mid/endgame
- EDA of times. Summary statistics of time vs stage of game, etc

Observations:
- Shows opening URL. Could help in game stage classification
- Current Board is a thing
