In [38]:
from bs4 import BeautifulSoup
import requests as rq
from datetime import datetime, timedelta
import pandas as pd

In [12]:
def get_first_day_of_week(year, week_number):
    # Find the first day of the week using ISO week date standard
    # (Monday is the first day of the week, and week 1 is the week that contains the first Thursday of the year)
    first_day = datetime.strptime(f'{year}-W{week_number}-1', "%Y-W%W-%w")
    return first_day

# Get the current date
current_date = datetime.now()

# Loop through each week in 2023 and up to the current day in 2024
start_date = datetime(2023, 1, 2)  # Start from the beginning of 2023
current_week = current_date.isocalendar()[1]  # Get the current ISO week number

weeks = []
while start_date.year < 2025:  # Loop until the end of 2024
    week_number = start_date.isocalendar()[1]
    
    fdow = get_first_day_of_week(start_date.year, week_number).strftime('%Y-%m-%d')
    weeks.append(fdow)
    
    # Move to the next week
    start_date += timedelta(days=7)

    # Break the loop if we reach the current week in 2024
    if start_date.year == current_date.year and week_number == current_week:
        break

print(weeks)

['2023-01-02', '2023-01-09', '2023-01-16', '2023-01-23', '2023-01-30', '2023-02-06', '2023-02-13', '2023-02-20', '2023-02-27', '2023-03-06', '2023-03-13', '2023-03-20', '2023-03-27', '2023-04-03', '2023-04-10', '2023-04-17', '2023-04-24', '2023-05-01', '2023-05-08', '2023-05-15', '2023-05-22', '2023-05-29', '2023-06-05', '2023-06-12', '2023-06-19', '2023-06-26', '2023-07-03', '2023-07-10', '2023-07-17', '2023-07-24', '2023-07-31', '2023-08-07', '2023-08-14', '2023-08-21', '2023-08-28', '2023-09-04', '2023-09-11', '2023-09-18', '2023-09-25', '2023-10-02', '2023-10-09', '2023-10-16', '2023-10-23', '2023-10-30', '2023-11-06', '2023-11-13', '2023-11-20', '2023-11-27', '2023-12-04', '2023-12-11', '2023-12-18', '2023-12-25', '2024-01-01', '2024-01-08', '2024-01-15', '2024-01-22', '2024-01-29']


In [59]:
atp_ranks = []
for week in weeks:
    url = "https://www.atptour.com/en/rankings/singles?"+week 
    page = rq.get(url)
    content = page.content
    soup = BeautifulSoup(content)
    rows = soup.body.find('div', class_ = 'atp_rankings-all').find('table', class_ = 'mega-table desktop-table').find('tbody').findAll('tr')
    player_ranks = {}
    for row in rows:
        try:
            player = row.find('td', class_ = 'player bold heavy').find('li', class_ = 'name center').find('span').text
            rank = row.find('td', class_ = 'rank bold heavy').text
            player_ranks[player] = rank
        except:
            continue

    week_ranks = pd.DataFrame(list(player_ranks.items()), columns = ['player','rank'])
    week_ranks['week'] = week
    print(week,"ATP rankings extracted")

    atp_ranks.append(week_ranks)

atp_ranks = pd.concat(atp_ranks)

2023-01-02 ATP rankings extracted
2023-01-09 ATP rankings extracted
2023-01-16 ATP rankings extracted
2023-01-23 ATP rankings extracted
2023-01-30 ATP rankings extracted
2023-02-06 ATP rankings extracted
2023-02-13 ATP rankings extracted
2023-02-20 ATP rankings extracted
2023-02-27 ATP rankings extracted
2023-03-06 ATP rankings extracted
2023-03-13 ATP rankings extracted
2023-03-20 ATP rankings extracted
2023-03-27 ATP rankings extracted
2023-04-03 ATP rankings extracted
2023-04-10 ATP rankings extracted
2023-04-17 ATP rankings extracted
2023-04-24 ATP rankings extracted
2023-05-01 ATP rankings extracted
2023-05-08 ATP rankings extracted
2023-05-15 ATP rankings extracted
2023-05-22 ATP rankings extracted
2023-05-29 ATP rankings extracted
2023-06-05 ATP rankings extracted
2023-06-12 ATP rankings extracted
2023-06-19 ATP rankings extracted
2023-06-26 ATP rankings extracted
2023-07-03 ATP rankings extracted
2023-07-10 ATP rankings extracted
2023-07-17 ATP rankings extracted
2023-07-24 ATP

In [61]:
atp_ranks.to_csv("data/atp_ranks.csv", index=False)