In [3]:
from chessdotcom import get_player_game_archives
from queue import Queue
import requests
import json

## Retrieve Games within a Certain ELO Range

In [62]:
def reformat(game):
    '''
    Signifcantly condenses the game game
    game: the dictionary of game
    '''
    components = game["pgn"].split("\n")[4:]
    #indices=[2,4,7,8,11,13,14,15,16,17,19]  # Manually found
 
    #for i in sorted(indices, reverse=True):
       # del components[i]
    
    return components


def store_games(games, file_name):
    '''
    Stores game dictionaries in a text file, separated by new line characters.

    games: a list of dictionaries of games to be stored.
    file_name: the path to a valid .txt file to store.
    '''
    with open(file_name, 'w') as f:
        for game in games:
            # json.dumps converts games to a string for storage
            f.write(json.dumps(game))
            f.write("\n")


def read_games(file_name):
    '''
    Retrieves games stored in a text file.

    file_name: the path to a valid .txt file with stored games.
    '''
    games = []

    with open(file_name, 'r') as f:
        for line in f:
            games.append(json.loads(line))

    return games


def get_data(player, elo_lb, elo_ub, num_players_cap, num_games_cap):
    '''
    performs a bfs to get game data from a list of users of a desired elo.
    player: the player origin of the bfs.
    elo_lb: lower bound.
    elo_ub: upper bound.
    num_players_cap: number of players we want to cap our requests to.
    num_games_cap: number of games per player we want to cap our exploration at.
    '''
    num_games = 0
    games_lst = [] # populate a list of games.


    # Initialize a Queue
    user_list = Queue()
    user_list.put(player)

    # Mark the start node as visited
    visited = {player}

    # Loop until the queue is empty
    while len(visited) < num_players_cap: # stop when we have requested num_players_cap players.

        # Dequeue a vertex from the queue
        curr_user = user_list.get()

        # Request player games from server.
        requested_player = get_player_game_archives(curr_user)
        
        this_playergames = requests.get(url=requested_player.json["archives"][-1]).json()["games"]

        # Keep track of how many games we've seen from this player
        games_from_curr = 0
        cap_exceeded = False

        # Add all adjacent vertices to the queue
        for game in this_playergames: # loop through the first num_games_cap games
            
            # Make sure it's a 5 min blitz
            if game['time_control'] == '180':
                # Detect what color and elo the opponent is
                black_player = game['black']['username']
                
                if curr_user == black_player:
                    opponent_elo = game['white']['rating']
                    opponent_user = black_player
                else:
                    opponent_elo = game['black']['rating']
                    opponent_user = game['black']['username']

                if opponent_elo >= elo_lb and opponent_elo <= elo_ub and opponent_user not in visited and opponent_user != curr_user and "pgn" in game:
                    
                    if "game abandoned" not in game["pgn"]:
                                             
                        condensed = reformat(game)
                        games_lst.append(condensed)
                        user_list.put(opponent_user)

                        # Exit loop once we achieve the number of games cap
                        games_from_curr += 1
                        if games_from_curr >= num_games_cap:
                            break
        
        # Add user to visited to prevent duplicates
        visited.add(curr_user)
    
    return (games_lst, visited)

In [63]:
data = get_data(player="colinsong1", elo_lb=1390, elo_ub=1610, num_players_cap=2000, num_games_cap=2)
print(len(data[0]))
store_games(data[0], "games_condensed.txt")

4628


In [22]:
games = read_games("games_raw.txt")

print(len(games))
for game in games:
    if "game abandoned" in game["pgn"]:
        games.remove(game)
print(len(games))

for i in range(len(games)):
    games[i] = reformat(games[i])

store_games(games, "games_condensed.txt")

1043
1015


In [16]:
print(games[51]["pgn"])

[Event "Live Chess"]
[Site "Chess.com"]
[Date "2023.04.01"]
[Round "-"]
[White "ianfairplay"]
[Black "Jokerchessss"]
[Result "0-1"]
[CurrentPosition "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq -"]
[Timezone "UTC"]
[UTCDate "2023.04.01"]
[UTCTime "07:07:32"]
[WhiteElo "1548"]
[BlackElo "1536"]
[TimeControl "180"]
[Termination "Jokerchessss won - game abandoned"]
[StartTime "07:07:32"]
[EndDate "2023.04.01"]
[EndTime "07:07:32"]
[Link "https://www.chess.com/game/live/74071864685"]

0-1



Way to get games:
1) Find any 1500ish player
2) Get a random game
3) Go into their opponents account and get a random game
4) Repeat
5) Compile all of these games to make a set

Things to consider:
- Ratings are wrong when accounts are new or have low game numbers
- Pick games from unique players
- Python chess has opening and endgame databases
https://python-chess.readthedocs.io/en/latest/

Action Items:
- Parse PGN
- Find an efficient way to gather data
    - Make the dataset of PGNs
- Compute time differences between moves
- Make a "stage of game" variable to classify opening/mid/endgame
- EDA of times. Summary statistics of time vs stage of game, etc

Observations:
- Shows opening URL. Could help in game stage classification
- Current Board is a thing
