In [None]:
# goals
# is winrate dependent on time spent in last game?
# is winrate dependent on accuracy?
# is winrate dependent on accuracy of game before, if this game is a reque?
# is winrate dependent on # of reques?

# how to find time spent in a game?
# how do you determine if a game is a reque?

import archives_manager
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.width', 10000)

In [None]:
start_unix = 1696132800
end_unix = 1698724800
player_name = "BIG_TONKA_T"

games_archive = archives_manager.get_games_between_timestamps(
    player_name=player_name,
    start_unix=start_unix,
    end_unix=end_unix,
    time_class='rapid',
    filter_func=archives_manager.build_archive_filter(rated=True)
)

archives_manager.simplified_archived_game(games_archive[0])

{'url': 'https://www.chess.com/game/live/89923795267',
 'end_time': 1696185098,
 'date': '2023.10.01',
 'rated': True,
 'time_class': 'rapid',
 'white': {'username': 'Quarini', 'rating': 1064, 'result': 'resigned'},
 'black': {'username': 'BIG_TONKA_T', 'rating': 1089, 'result': 'win'}}

In [None]:
archives_manager.simplified_archived_game(games_archive[-1])

{'url': 'https://www.chess.com/game/live/92035707081',
 'end_time': 1698296732,
 'date': '2023.10.26',
 'rated': True,
 'time_class': 'rapid',
 'white': {'username': 'Primodiurno', 'rating': 1456, 'result': 'resigned'},
 'black': {'username': 'BIG_TONKA_T', 'rating': 1494, 'result': 'win'}}

In [None]:
games_data = []

unix_range = end_unix - start_unix

for game in games_archive:
    opp_games_archive = None
    opp_name = archives_manager.get_opponent_name(game, player_name)

    # log progress
    print(opp_name, round((game['end_time'] - start_unix) / (unix_range), 2))

    try:
        opp_games_archive = archives_manager.get_games_between_timestamps(
            player_name=opp_name,
            start_unix=game['end_time'] - 30 * 24 * 60 * 60,
            end_unix=game['end_time'],
            time_class='rapid',
            filter_func=archives_manager.build_archive_filter(rated=True)
        )
    except archives_manager.ArchiveRetrievalError as e:
        print(f'{opp_name} archive failed')

    if opp_games_archive is None:
        continue

    games_data.append({
        'game': game,
        'opp_name': opp_name,
        'opp_games_archive': opp_games_archive
    })

In [None]:
tyler_rows = []
opp_data = []

for data in games_data:
    game = data['game']
    opp_name = data['opp_name']
    opp_games_archive = data['opp_games_archive']

    # analyze the opponent's history (for opp_data)
    opp_game_rows = []
    for opp_game in opp_games_archive:
        # get elo of the 'Player' from the perspective of player_name=opp_name, to get that historical elo rating
        opp_game_elo = archives_manager.get_elo(opp_game, player_name=opp_name)

        opp_game_rows.append({
            'unix': opp_game['end_time'],
            'elo': opp_game_elo['Player'], # 'Player' elo from the perspective of player_name=opp_name
            'elo_diff': opp_game_elo['Player'] - opp_game_elo['Opponent'], # Difference between opponent's rating, and opponent's oppenent rating
            # time of day as 'cos_24h' and 'sin_24h'
            # player pieces left
            # opp pieces left
            # move count
            # accuracy
            # acc_diff
            # average eval change per move, per game state
            # opening classification
            # average time spent per move, per game state
            'color': archives_manager.get_color(opp_game, opp_name),
            'won': archives_manager.get_won(opp_game, opp_name)
        })
    opp_data.append(opp_game_rows)

    # analyze this game (for tyler_rows)
    elo = archives_manager.get_elo(game, player_name)
    acc = archives_manager.get_accuracy(game, player_name)
    color = archives_manager.get_color(game, player_name)

    tyler_rows.append({
        # basic info
        'url': game['url'],
        'unix': game['end_time'],
        'player_elo': elo['Player'],
        'opp_elo': elo['Opponent'],
        'opp_name': opp_name,

        # predictors
        'opp_30d': len(opp_games_archive),
        'elo_diff': elo['Player'] - elo['Opponent'],
        'color': color,
        # time of day as 'cos_24h', 'sin_24h' https://chat.openai.com/share/fb073052-f000-4160-bace-fdee65678589

        # game review
        'player_acc': None if acc is None else acc['Player'],
        'opp_acc': None if acc is None else acc['Opponent'],
        'acc_diff': None if acc is None else acc['Player'] - acc['Opponent'],
        # player pieces left
        # opp pieces left
        # move count
        # average eval change per move, per game state
        # average time spent per move, per game state
        # opening classification https://www.nature.com/articles/s41598-023-31658-w.pdf

        # targets
        'won': archives_manager.get_won(game, player_name)
    })

In [None]:
# for each opponent history data, fix the elo since we know the true value based on the associated game in the player history

# this output highlights the issue
print(tyler_rows[0]) # tyler's least recent game (beginning of player data)
print(opp_data[0][-1]) # for that game, the opponent's most recent game in history is the same game but with guessed elo

{'url': 'https://www.chess.com/game/live/89923795267', 'unix': 1696185098, 'player_elo': 1089, 'opp_elo': 1064, 'opp_name': 'Quarini', 'opp_30d': 249, 'elo_diff': 25, 'color': False, 'player_acc': 69.43, 'opp_acc': 62.25, 'acc_diff': 7.180000000000007, 'won': 1}
{'unix': 1696185098, 'elo': 1063, 'elo_diff': -27, 'color': True, 'won': 0}


In [None]:
for i in range(len(tyler_rows)):
    opp_elo = tyler_rows[i]['opp_elo']
    opp_data[i][-1]['elo'] = opp_elo

print(tyler_rows[0]) # tyler's least recent game (beginning of player data)
print(opp_data[0][-1]) # for that game, the opponent's most recent game in history is the same game but with guessed elo

{'url': 'https://www.chess.com/game/live/89923795267', 'unix': 1696185098, 'player_elo': 1089, 'opp_elo': 1064, 'opp_name': 'Quarini', 'opp_30d': 249, 'elo_diff': 25, 'color': False, 'player_acc': 69.43, 'opp_acc': 62.25, 'acc_diff': 7.180000000000007, 'won': 1}
{'unix': 1696185098, 'elo': 1064, 'elo_diff': -27, 'color': True, 'won': 0}


In [None]:
# turn tyler_rows into tyler_df and make even more predictors

# ma5, ma20, x-ma5, x-ma20, d_x-ma5, d_x-ma20
# is_reque
# num_reque
# prev_avg_eval_change _open _mid _end
# prev_time_spent _open _mid _end
# prev_acc
# prev_acc_diff
# prev_ player opp _pieces_left


# finding reques
# https://chat.openai.com/share/dc34a56e-11a7-4e53-844a-b98258651090
# is_reque = [num_reque != 0]



In [None]:
# for each opp_rows in opp_data, turn opp_rows into opp_df and make simliar predictors

# ma5, ma20, x-ma5, x-ma20, d_x-ma5, d_x-ma20
# is_reque
# num_reque
# prev_avg_eval_change _open _mid _
# prev_time_spent _open _mid _end
# prev_acc
# prev_acc_diff
# prev_ player opp _pieces_left

In [None]:
# ideas for classifying/clustering players/games/opponents

# # # cluster board states
# 1. extract features of the board
# white/black _pieces, white/black _passed_pawns, open/semi-open-white/semi-open-black/closed _files, developed_pieces, material_advantage, engine_eval
# some features for pinned material
# features for important spots for material, 'knights in the center' 'rooks on the 7th'
# features for pieces defending other pieces
# features for board control of certain areas (mid, left flank, right flank, top, bottom) that belong to each color, by placement and by areas to move
# 2. do PCA on the feature space of boards across many games for a given player
# look at the most interesting/variable features by the scores in the first few principal componenets
#

# do PCA on the features of board state