In [1]:
import os
import pickle
import requests
import pandas as pd
from bs4 import BeautifulSoup

Scrape Titled Tuesday Game Links For 2024 

In [8]:
dir = '../output/Chess.com/Titled Tuesday Results/'

In [2]:
try:
    tourn_links = pd.read_pickle('../output/Chess.com/tourn_links.pkl')
except: 
    tt = 'https://www.chess.com/tournament/live/titled-tuesdays'
    tourn_links = []
    for i in range(5): 
        r = requests.get(tt+'?&page='+str(i+1))
        soup = BeautifulSoup(r.content, 'html.parser')

        tourn_list = soup.find('table', class_='table-component table-hover table-clickable tournaments-live-table')
        if tourn_list:
            tourn_table_rows = tourn_list.find_all('tr')[1:]  # Skip header row
            
            for j in tourn_table_rows:
                tourn_link = j.find('a', class_='tournaments-live-name')['href']
                if '2024' in tourn_link:  # Filter for links containing "2024"
                    tourn_links.append(tourn_link)

In [3]:
len(tourn_links)

106

In [4]:
tourn_links[0]

'https://www.chess.com/tournament/live/late-titled-tuesday-blitz-december-31-2024-5327791'

In [5]:
tourn_links[-1]

'https://www.chess.com/tournament/live/early-titled-tuesday-blitz-january-02-2024-4490237'

In [24]:
with open('../output/Chess.com/tourn_links.pkl', 'wb') as f:
    pickle.dump(tourn_links, f)

### Results Data

Get final results data, save to file

In [None]:
for url in tourn_links:
    parts = url.split('/')
    tournament_id = parts[-1]
    output_filename = dir + f"{tournament_id}.csv"
    
    if os.path.exists(output_filename):
        print("File already exists: " + output_filename)
        continue
    
    r = requests.get(url+'?&players=100')
    soup = BeautifulSoup(r.content, 'html.parser')
    i_p = soup.find('div', class_ = 'index-pagination')
    data_total_pages = 0
    
    if i_p:
        data_total_pages = int(i_p.find('div')['data-total-pages'])
    print("total pages: " + str(data_total_pages))

    ranks = []
    rank = 0
    for i in range(data_total_pages):
        print('page: ' + str(i+1))
        r = requests.get(url+'?&players='+str(i+1))
        soup = BeautifulSoup(r.content, 'html.parser')
        table = soup.find('table', class_ = 'table-component tournaments-live-view-results-table tournaments-live-view-extra-borders')
        table_rows = table.find_all('tr')[1:]  # Skip the header row

        for x in table_rows:
            rank += 1
            if rank % 100 == 0:
                print("completed: " + str(rank))
            
            username = x.select_one('.user-tagline-username').get_text(strip=True)
            country = x.select_one('.country-flags-component')['v-tooltip']
            rating = x.select_one('.user-rating').get_text(strip=True).replace('(', '').replace(')', '')
            if rating != 'Unrated':
                rating = int(rating)
            title_element = x.select_one('.post-view-meta-title')
            title = title_element.get_text(strip=True) if title_element is not None else None
            score = float(x.select_one('.tournaments-live-view-total-score').get_text(strip=True))
            tie_break = float(x.select_one('.tournaments-live-view-tie-break').get_text(strip=True))
            wdb = x.find('div', class_='tournaments-live-view-total-score')['v-tooltip'].split(',')
            wins = int(wdb[0].strip().split()[0])
            draws = int(wdb[1].strip().split()[0])
            byes = int(wdb[2].strip().split()[0])
            
            player = {"rank": rank, "username": username, "country": country, "rating": rating,
                "title": title, "score": score, "tie_break": tie_break, "wins": wins, "draws": draws, "byes": byes
            }
            ranks.append(player)

    df = pd.DataFrame(ranks)
    
    df.to_csv(output_filename, index=False)
    print("written: " + output_filename)

### Pairings Data

Get pairings data, save to file

In [20]:
for url in tourn_links:
    parts = url.split('/')
    tournament_id = parts[-1]
    output_filename = f"../output/Chess.com/Titled Tuesday Pairings/{tournament_id}_pairings.csv"
    
    if os.path.exists(dir+output_filename):
        print("File already exists: " + output_filename)
        continue
    
    r = requests.get(url) 
    soup = BeautifulSoup(r.content, 'html.parser')

    data_total_pages = 10 # guess
    matches = []
    rank = 0

    for i in range(12): # for each of the 11 rounds
        for page in range(1, data_total_pages + 1):  # Loop through all pages of each round
            r = requests.get(f"{url}?&round={i+1}&pairings={page}")  # Add page parameter
            soup = BeautifulSoup(r.content, 'html.parser')

            table = soup.find('table', class_='table-component table-hover tournaments-live-view-pairings-table')  # Pairings table
            if not table:
                continue  # Skip if the table is not found
            
            table_rows = table.find_all('tr')[1:]  # Skip header row

            for x in table_rows:
                rank += 1
                if rank % 100 == 0:
                    print("Completed: " + str(rank))
                
                player_1_status = x.select_one('td:first-of-type .post-view-meta-title').text.strip() if x.select_one('td:first-of-type .post-view-meta-title') else None
                player_1_username = x.select_one('td:first-of-type .user-tagline-username').text.strip()
                player_1_elo = x.select_one('td:first-of-type .user-rating').text.strip("()")

                result = x.select_one('td:nth-of-type(2)').text.strip()

                player_2_status = x.select_one('td:last-of-type .post-view-meta-title').text.strip() if x.select_one('td:last-of-type .post-view-meta-title') else None
                player_2_username = x.select_one('td:last-of-type .user-tagline-username').text.strip()
                player_2_elo = x.select_one('td:last-of-type .user-rating').text.strip("()")
                
                match = {"white_rank": player_1_status, "white_username": player_1_username, "white_elo": player_1_elo,
                            "result": result, "black_rank": player_2_status, "black_username": player_2_username, "black_elo": player_2_elo}
                matches.append(match)

    # Create a DataFrame from the collected match data
    df = pd.DataFrame(matches)
    df = df.drop_duplicates()  # Remove duplicates
    df.to_csv(output_filename, index=False)
    print("Written: " + output_filename)

Completed: 100
Completed: 200
Completed: 300
Completed: 400
Completed: 500
Completed: 600
Completed: 700
Completed: 800
Completed: 900
Completed: 1000
Completed: 1100
Completed: 1200
Completed: 1300
Completed: 1400
Completed: 1500
Completed: 1600
Written: ../output/Chess.com/Titled Tuesday Pairings/late-titled-tuesday-blitz-december-31-2024-5327791_pairings.csv
Completed: 100
Completed: 200
Completed: 300
Completed: 400
Completed: 500
Completed: 600
Completed: 700
Completed: 800
Completed: 900
Completed: 1000
Completed: 1100
Completed: 1200
Completed: 1300
Completed: 1400
Completed: 1500
Completed: 1600
Completed: 1700
Completed: 1800
Completed: 1900
Completed: 2000
Completed: 2100
Completed: 2200
Completed: 2300
Completed: 2400
Completed: 2500
Completed: 2600
Completed: 2700
Written: ../output/Chess.com/Titled Tuesday Pairings/early-titled-tuesday-blitz-december-31-2024-5327789_pairings.csv
Completed: 100
Completed: 200
Completed: 300
Completed: 400
Completed: 500
Completed: 600
Compl