In [3]:
from bs4 import BeautifulSoup
import requests as rq
from datetime import datetime, timedelta
import pandas as pd

In [13]:
def get_first_day_of_week(year, week_number):
    # Find the first day of the week using ISO week date standard
    # (Monday is the first day of the week, and week 1 is the week that contains the first Thursday of the year)
    first_day = datetime.strptime(f'{year}-W{week_number}-1', "%Y-W%W-%w")
    return first_day

tourney_dates = ['2023-06-26','2023-05-22','2024-01-07','2023-08-22']

tourney_date_dict = {}
for curr_date in tourney_dates:  # Loop until the end of 2024
    date = datetime.strptime(curr_date, '%Y-%m-%d').date()
    week_number = date.isocalendar()[1]
    
    fdow = get_first_day_of_week(date.year, week_number).strftime('%Y-%m-%d')
    tourney_date_dict[fdow] = curr_date

print(tourney_date_dict)

{'2023-06-26': '2023-06-26', '2023-05-22': '2023-05-22', '2024-01-01': '2024-01-07', '2023-08-21': '2023-08-22'}


In [32]:
tourney_date_dict.keys()

dict_keys(['2023-06-26', '2023-05-22', '2024-01-01', '2023-08-21'])

In [17]:
atp_ranks = []
for week in list(tourney_date_dict.keys()):
    url = "https://www.atptour.com/en/rankings/singles?RankRange=0-5000&DateWeek="+week 
    page = rq.get(url)
    content = page.content
    soup = BeautifulSoup(content)
    rows = soup.body.find('div', class_ = 'atp_rankings-all').find('table', class_ = 'mega-table desktop-table').find('tbody').findAll('tr')
    player_ranks = {}
    for row in rows:
        try:
            player = row.find('td', class_ = 'player bold heavy').find('li', class_ = 'name center').find('span').text
            rank = row.find('td', class_ = 'rank bold heavy').text
            player_ranks[player] = rank
        except:
            continue

    week_ranks = pd.DataFrame(list(player_ranks.items()), columns = ['player','rank'])
    week_ranks['week'] = tourney_date_dict[week]
    print(week,"ATP rankings extracted")

    atp_ranks.append(week_ranks)

atp_ranks = pd.concat(atp_ranks)

2023-06-26 ATP rankings extracted
2023-05-22 ATP rankings extracted
2024-01-01 ATP rankings extracted
2023-08-21 ATP rankings extracted


In [29]:
atp_ranks[~atp_ranks['rank'].str.isnumeric()]

Unnamed: 0,player,rank,week
903,Pablo Cuevas,904T,2023-06-26
904,Adam Neff,904T,2023-06-26
923,Menelaos Efstathiou,924T,2023-06-26
924,John McNally,924T,2023-06-26
1001,Lorenzo Gagliardo,1002T,2023-06-26
...,...,...,...
2059,Youssef Labbene,2057T,2023-08-21
2060,Ignacio Benzal Alia,2061T,2023-08-21
2061,Akram El Sallaly,2061T,2023-08-21
2062,Abhinav Sanjeev Shanmugam,2061T,2023-08-21


In [31]:
atp_ranks.to_csv("data/atp_ranks.csv", index=False)