In [1]:
import archives_manager

In [2]:
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.width', 10000)

In [3]:
player_name = 'BIG_TONKA_T'

games_archive = archives_manager.get_games_between_timestamps(
    player_name,
    1696176000, # october 1st
    1701363600, # same time nov 30th
    time_class='rapid', # filter for rapid
    filter_func=archives_manager.build_archive_filter(rated=True, max_elo_diff=150), # filter for Rated, maximum rating difference of 150
    verbose=True, # print stuff
    max_games=None # all the games in the time period
)

# print number of games
print(f'\nlength of games for {player_name}: {len(games_archive)}\n')

Scanning games from BIG_TONKA_T from 1696176000 to 1701363600
1701238334 macaiyla v.s. BIG_TONKA_T
1701237297 BIG_TONKA_T v.s. macaiyla
1701237035 macaiyla v.s. BIG_TONKA_T
1701236127 BIG_TONKA_T v.s. macaiyla
1700974382 BIG_TONKA_T v.s. macaiyla
1700892464 cgbs1013 v.s. BIG_TONKA_T
1700891486 BIG_TONKA_T v.s. knightmook
1700890757 BIG_TONKA_T v.s. JayKeiEmeych
1700890297 Fredy2210 v.s. BIG_TONKA_T
1700889828 BIG_TONKA_T v.s. GustavoM79
1700888902 themetaphor v.s. BIG_TONKA_T
1700888517 BIG_TONKA_T v.s. peter703
1700888221 whizup v.s. BIG_TONKA_T
1700887871 JoyBlackOfficial v.s. BIG_TONKA_T
1700707784 BIG_TONKA_T v.s. Inpassingking777
1700656628 BIG_TONKA_T v.s. PrinceJWoo
1700654476 BIG_TONKA_T v.s. lanea83
1700653734 Kralin1 v.s. BIG_TONKA_T
1700653240 BIG_TONKA_T v.s. joshnar
1700653199 Deldel92i v.s. BIG_TONKA_T
1700653101 BIG_TONKA_T v.s. jeanboiffard3
1700652795 Survivor251 v.s. BIG_TONKA_T
1700652070 BIG_TONKA_T v.s. LeyteTacloban
1700650970 ABDULLAH_TAYYAB v.s. BIG_TONKA_T
1700

In [4]:
archives_manager.simplified_archived_game(games_archive[0]) # print out a simplified version of the first (oldest) game in the archive

{'url': 'https://www.chess.com/game/live/89923795267',
 'end_time': 1696185098,
 'date': '2023.10.01',
 'rated': True,
 'time_class': 'rapid',
 'white': {'username': 'Quarini', 'rating': 1063, 'result': 'resigned'},
 'black': {'username': 'BIG_TONKA_T', 'rating': 1090, 'result': 'win'}}

In [5]:
archives_manager.simplified_archived_game(games_archive[-1]) # print out a simplified version of the most recent game in the archive

{'url': 'https://www.chess.com/game/live/94630695929',
 'end_time': 1700892464,
 'date': '2023.11.25',
 'rated': True,
 'time_class': 'rapid',
 'white': {'username': 'cgbs1013', 'rating': 1434, 'result': 'resigned'},
 'black': {'username': 'BIG_TONKA_T', 'rating': 1408, 'result': 'win'}}

In [6]:
# filter function for opponent histories, which specifies that we want rated games
opp_filter_func = archives_manager.build_archive_filter(
    rated=True
)

for game in games_archive:
    # get opponents name
    opp_name = archives_manager.get_opponent_name(game, player_name)
    print(f"Getting recent archive of opponent of {player_name}: {opp_name}")

    # attempt to get opponent archive
    try:
        opp_recent_games = archives_manager.get_games_between_timestamps( # time-based retrieval
            opp_name,
            start_unix=game['end_time'] - 30 * 24 * 60 * 60, # from 30 days ago
            end_unix=game['end_time'],                       # upto and including this game
            time_class='rapid', # filter for rapid
            filter_func=opp_filter_func, # opponent filter (see top of code cell)
            verbose=False,
            max_games=25 # only a handful of games necessary
        )

        game['opp_recent_archive'] = opp_recent_games # save the recent opponent history to this game
    except archives_manager.ArchiveRetrievalError as e: # handle errors (deleted accounts, mainly)
        print("archive retrieval error", player_name, opp_name, e)

Getting recent archive of opponent of BIG_TONKA_T: Quarini
Getting recent archive of opponent of BIG_TONKA_T: hgarciarod
Getting recent archive of opponent of BIG_TONKA_T: HERFJERF
Getting recent archive of opponent of BIG_TONKA_T: olas109
Getting recent archive of opponent of BIG_TONKA_T: InkLv
Getting recent archive of opponent of BIG_TONKA_T: alamrini
Getting recent archive of opponent of BIG_TONKA_T: djgreen1
Getting recent archive of opponent of BIG_TONKA_T: Pytoo91
Getting recent archive of opponent of BIG_TONKA_T: mumergaddafi
Getting recent archive of opponent of BIG_TONKA_T: Copertus
Getting recent archive of opponent of BIG_TONKA_T: CyborgCerberus94
Getting recent archive of opponent of BIG_TONKA_T: Nadirmitch
Getting recent archive of opponent of BIG_TONKA_T: Rensa0
Getting recent archive of opponent of BIG_TONKA_T: wimperneel
Getting recent archive of opponent of BIG_TONKA_T: nightjumper39
Getting recent archive of opponent of BIG_TONKA_T: Spiderbreeze
Getting recent archiv

In [7]:
games_archive[-1]['opp_recent_archive'][-1] # print out the data for the most recent game's opponent's most recent game (should be the same as most recent game from code cell above)

{'url': 'https://www.chess.com/game/live/94630695929',
 'pgn': '[Event "Live Chess"]\n[Site "Chess.com"]\n[Date "2023.11.25"]\n[Round "-"]\n[White "cgbs1013"]\n[Black "BIG_TONKA_T"]\n[Result "0-1"]\n[CurrentPosition "8/p7/1p2p3/2p3kP/K2b2P1/1P6/8/6q1 w - -"]\n[Timezone "UTC"]\n[ECO "C00"]\n[ECOUrl "https://www.chess.com/openings/Pirc-Defense-Small-Center-Defense"]\n[UTCDate "2023.11.25"]\n[UTCTime "05:53:05"]\n[WhiteElo "1425"]\n[BlackElo "1417"]\n[TimeControl "600"]\n[Termination "BIG_TONKA_T won by resignation"]\n[StartTime "05:53:05"]\n[EndDate "2023.11.25"]\n[EndTime "06:07:44"]\n[Link "https://www.chess.com/game/live/94630695929"]\n\n1. e4 {[%clk 0:09:59.9]} 1... e6 {[%clk 0:09:59.4]} 2. d4 {[%clk 0:09:52.5]} 2... d6 {[%clk 0:09:57.4]} 3. Bf4 {[%clk 0:09:44.8]} 3... Nd7 {[%clk 0:09:56.2]} 4. Bc4 {[%clk 0:09:42.8]} 4... Nb6 {[%clk 0:09:54.7]} 5. Bb3 {[%clk 0:09:33.6]} 5... Ne7 {[%clk 0:09:51.2]} 6. Nf3 {[%clk 0:09:31.7]} 6... Ng6 {[%clk 0:09:50.5]} 7. Bg3 {[%clk 0:09:27.7]} 7... Be

In [8]:
data_rows = []

player_rolling_elo = []

for game in games_archive: # iterate the archive (that is now appended with opponent histories)
    if 'opp_recent_archive' not in game:
        continue # skip this game if we couldn't get the opponent's history

    elo = archives_manager.get_elo(game, player_name) # get elo ratings
    player_rolling_elo.append(elo['Player']) # append player elo

    # moving average 5 calculation
    ma_5 = None
    if len(player_rolling_elo) >= 5:
        elo_sum = sum([historical_elo for historical_elo in player_rolling_elo[-5:]])
        ma_5 = elo_sum / 5

    # moving average 20 calculation
    ma_20 = None
    if len(player_rolling_elo) >= 20:
        elo_sum = sum([historical_elo for historical_elo in player_rolling_elo[-20:]])
        ma_20 = elo_sum / 20

    # opponent name and their recent archive, that we queried in the previous code cells
    opp_name = archives_manager.get_opponent_name(game, player_name)
    opp_recent_archive = game['opp_recent_archive']

    # calculate opponent ma5
    opp_ma_5 = None
    if len(opp_recent_archive) >= 5:
        opp_elo_sum = sum([archives_manager.get_elo(opp_game, opp_name)['Player'] for opp_game in opp_recent_archive[-5:]])
        opp_ma_5 = opp_elo_sum / 5

    # calculate opponent ma20
    opp_ma_20 = None
    if len(opp_recent_archive) >= 20:
        opp_elo_sum = sum([archives_manager.get_elo(opp_game, opp_name)['Player'] for opp_game in opp_recent_archive[-20:]])
        opp_ma_20 = opp_elo_sum / 20

    # get accuracies
    acc = archives_manager.get_accuracy(game, player_name)
    player_acc = None if acc == None else acc['Player']
    opp_acc = None if acc == None else acc['Opponent']
    acc_diff = None if acc == None else player_acc - opp_acc

    # create dataframe entry
    data_rows.append({
        'unix': game['end_time'],
        'player_name': player_name,
        'opp_name': opp_name,
        'player_elo': elo['Player'],
        'opp_elo': elo['Opponent'],
        'elo_diff': elo['Player'] - elo['Opponent'],
        'player_acc': player_acc,
        'opp_acc': opp_acc,
        'acc_diff': acc_diff,
        'color': archives_manager.get_color(game, player_name),
        'x-ma5': None if ma_5 is None else elo['Player'] - ma_5,
        'x-ma20': None if ma_20 is None else elo['Opponent'] - ma_20,
        'opp_x-ma5': None if opp_ma_5 is None else elo['Opponent'] - opp_ma_5,
        'opp_x-ma20': None if opp_ma_20 is None else elo['Opponent'] - opp_ma_20,
        'won': archives_manager.get_won(game, player_name)
    })

df = pd.DataFrame(data_rows)

df.sample(15)

Unnamed: 0,unix,player_name,opp_name,player_elo,opp_elo,elo_diff,player_acc,opp_acc,acc_diff,color,x-ma5,x-ma20,opp_x-ma5,opp_x-ma20,won
1120,1699306876,BIG_TONKA_T,GGoder,1454,1415,39,75.76,72.34,3.42,True,-7.4,-40.5,-12.0,-7.4,1.0
652,1697448679,BIG_TONKA_T,gdetelinov,1264,1256,8,62.03,73.34,-11.31,False,18.8,19.6,-2.2,-10.75,0.0
1478,1699848289,BIG_TONKA_T,rivascarlos,1364,1450,-86,74.35,86.32,-11.97,False,15.6,104.7,2.0,36.7,0.0
390,1697067500,BIG_TONKA_T,Usernamethatisavailable,1417,1340,77,70.01,66.71,3.3,True,-7.8,-66.55,-6.4,-23.25,1.0
1391,1699740254,BIG_TONKA_T,11mik,1442,1426,16,72.94,80.66,-7.72,False,-5.6,-8.7,-4.6,13.7,0.0
531,1697249260,BIG_TONKA_T,selangkanganphiton,1187,1161,26,63.34,59.3,4.04,True,0.8,-29.05,7.4,22.7,1.0
319,1696958290,BIG_TONKA_T,NotgrandbutSmallmaster,1293,1316,-23,82.33,76.33,6.0,False,2.4,25.2,-9.2,8.2,1.0
201,1696624122,BIG_TONKA_T,BennyMakomo,1316,1326,-10,85.07,86.75,-1.68,True,17.2,35.7,-15.4,-36.2,1.0
667,1697495179,BIG_TONKA_T,vivyehn,1257,1205,52,75.6,68.2,7.4,False,-4.4,-49.85,1.0,10.55,1.0
120,1696467345,BIG_TONKA_T,raphaelpaiva,1241,1229,12,75.99,89.29,-13.3,False,-12.8,9.45,-0.2,10.55,0.0


In [9]:
# add reques to dataframe

num_reque = []

prev_unix = None
consecutive_requests = 0

for index, row in df.iterrows():
    current_unix = row['unix']
    
    if prev_unix is None or (current_unix - prev_unix) > 1250:
        consecutive_requests = 0
    else:
        consecutive_requests += 1
    
    num_reque.append(consecutive_requests)
    
    prev_unix = current_unix

df['num_reque'] = num_reque

In [10]:
# time since last game

time_since_prev = []
prev_unix = None

for index, row in df.iterrows():
    current_unix = row['unix']

    if prev_unix is None:
        time_since_prev.append(0)
    else:
        time_since_prev.append(current_unix - prev_unix)

    prev_unix = current_unix

df['time_since_prev'] = time_since_prev

In [11]:
df['prev_acc'] = df['player_acc'].shift(1)
df['prev_x-ma5'] = df['x-ma5'].shift(1)
df['prev_x-ma20'] = df['x-ma20'].shift(1)

In [12]:
print(len(df))
df.sample(15)

1753


Unnamed: 0,unix,player_name,opp_name,player_elo,opp_elo,elo_diff,player_acc,opp_acc,acc_diff,color,x-ma5,x-ma20,opp_x-ma5,opp_x-ma20,won,num_reque,time_since_prev,prev_acc,prev_x-ma5,prev_x-ma20
611,1697414284,BIG_TONKA_T,Cavallosullaregina2023,1216,1235,-19,77.96,79.07,-1.11,False,1.4,49.9,15.0,,1.0,6,417,77.67,-3.6,-54.6
240,1696874923,BIG_TONKA_T,jtrinchera,1417,1412,5,65.6,55.91,9.69,True,16.6,58.2,2.8,13.2,1.0,1,532,71.3,16.6,56.9
148,1696541001,BIG_TONKA_T,alidailami,1280,1238,42,67.81,63.52,4.29,False,-6.8,-30.35,8.8,17.9,0.0,7,1115,73.7,0.6,-20.65
1611,1700520526,BIG_TONKA_T,Rjmanville,1497,1452,45,77.66,80.29,-2.63,True,-4.6,-57.65,-0.2,-9.25,0.0,11,374,86.18,-16.6,-48.8
813,1698093128,BIG_TONKA_T,sashafras,1396,1435,-39,88.32,80.87,7.45,False,6.4,48.1,-9.6,-64.1,1.0,0,252311,80.7,-0.4,21.1
1008,1699057596,BIG_TONKA_T,mikethemonkeyman89,1503,1459,44,79.75,84.86,-5.11,False,-1.0,-59.65,0.8,-9.25,0.0,19,672,66.45,6.4,-31.85
1607,1700517535,BIG_TONKA_T,thatsabee,1515,1536,-21,69.7,76.28,-6.58,True,0.4,25.75,-3.6,10.0,0.0,7,486,74.59,5.6,57.2
1199,1699408312,BIG_TONKA_T,claucab59,1508,1527,-19,60.8,51.36,9.44,True,-1.2,10.2,-0.8,19.4,1.0,15,370,80.01,-3.8,-46.2
171,1696568812,BIG_TONKA_T,Dragonzee1952,1282,1308,-26,61.89,69.04,-7.15,True,-4.8,19.1,-5.8,0.4,0.0,10,952,75.56,-15.0,-19.25
112,1696458919,BIG_TONKA_T,vmval1,1225,1218,7,76.1,62.91,13.19,True,8.8,21.15,12.0,37.75,1.0,17,1026,75.27,3.8,15.3


In [15]:
df.to_csv('dataset_tyler_combined_match_1753.csv', index=False)