In [None]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import time
import random
from IPython.display import display
import re

# 1) Create a Session with a random or fixed User-Agent
s = requests.Session()
s.headers.update({
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/109.0.0.0 Safari/537.36"
    )
})

def get_soup(session, url):
    """Fetch a URL, return BeautifulSoup or None on failure."""
    try:
        resp = session.get(url, timeout=10)
        # Check status code
        if resp.status_code == 200:
            return BeautifulSoup(resp.text, 'html.parser')
        elif resp.status_code == 429:
            print("Received 429. Too many requests. Backing off.")
            # Wait longer or do an exponential backoff
            time.sleep(60)
            return None
        else:
            print(f"Error: got status {resp.status_code} for {url}")
            return None
    except requests.RequestException as e:
        print(f"Request failed: {e}")
        return None

def get_team_links(soup):
    """Extract team links from the league table soup."""
    tables = soup.select('table.stats_table')
    if not tables:
        print("No stats_table found!")
        return []
    league_table = tables[0]
    links = league_table.find_all('a')
    hrefs = [l.get('href') for l in links if l.get('href')]
    hrefs = [h for h in hrefs if 'squads' in h]
    team_urls = [f"https://fbref.com{h}" for h in hrefs]
    return team_urls

def get_player_links(session, team_url):
    soup = get_soup(session, team_url)
    if not soup:
        return []
    tables = soup.select('table')
    if not tables:
        return []
    player_table = tables[0]
    links = player_table.find_all('a')
    hrefs = [l.get('href') for l in links if l.get('href')]
    hrefs = [h for h in hrefs if 'players' in h and 'matchlogs' not in h]
    return [f"https://fbref.com{h}" for h in hrefs]

def get_player_data(session, player_url):
    # Fetch and parse the page
    soup = get_soup(session, player_url)
    if not soup:
        print(f"Failed to retrieve page for {player_url}")
        return None

    # Extract player name
    player_name = None
    h1_tag = soup.find("h1", {"itemprop": "name"}) or soup.find("h1")
    player_name = h1_tag.get_text(strip=True) if h1_tag else "Unknown"

    try:
        # Extract player stats table
        tables = pd.read_html(str(soup), match=player_name)
        if not tables:
            print(f"No matching table found for {player_name}.")
            return None

        df = tables[0]  # Assume first table is relevant

        # Extract minutes played
        minutes = None
        footer_div = soup.find("div", id=re.compile("^tfooter_scout_summary_"))
        if footer_div:
            strong_tag = footer_div.find("strong")
            if strong_tag:
                minutes_text = strong_tag.get_text(strip=True)
                minutes = int(minutes_text.split()[0])  # Extract numeric value safely

        # Add "Minutes Played" as a new row if minutes were found
        if minutes is not None:
            print(f"Found {minutes} minutes for {player_name}")
            new_row = pd.DataFrame({"Stat": ["Minutes Played"], "Value": [minutes]})
            df = pd.concat([df, new_row], ignore_index=True)

        return df

    except ValueError as e:
        print(f"Error processing {player_name}: {e}")
        return None
    
def convert_csv(df):
    return df.to_csv(index=False)

team_names = [] #To use later on for scraping associated ELO scores
years = ['2017-2018', '2018-2019', '2019-2020', '2020-2021', '2021-2022', '2022-2023', '2023-2024', '2024-2025']
def main():
    team_data = {}
    league_table_url = "https://fbref.com/en/comps/9/Premier-League-Stats"
    
    soup_league = get_soup(s, league_table_url)
    if not soup_league:
        return
    
    team_urls = get_team_links(soup_league)
    
    # Example: just scrape first few teams to limit requests
    # Create one list per team to store player data
    
    for team_url in team_urls[:2]:
        team_name = team_url.split("/")[-1].replace("-", " ")
        clean_team_name = team_name.replace(" Stats", "")

        print(f"Scraping {team_name}")
        print(f"clean team name {clean_team_name}")
        team_names.append(clean_team_name)

        team_data[team_name] = {'players': [], 'keepers': []}   
        player_urls = get_player_links(s, team_url)
        for purl in player_urls:
            print(f"Scraping {purl}")
            df = get_player_data(s, purl)
            if df is not None:
                if df.iloc[0, 0] == "PSxG-GA":
                    team_data[team_name]['keepers'].append(df)
                else:
                    team_data[team_name]['players'].append(df)
            else:
                print(f"No data for {purl}")
            # random sleep
            time.sleep(random.uniform(2, 5))
    
    print(f"Scraped data for {len(team_data)} teams.")
    print("Data keys:", team_data.keys())
    print("Data for first team:", team_data['Liverpool Stats'])



    # Convert each team's data to a single CSV for players and another for keepers
    for team, data in team_data.items():
        for key, dfs in data.items():
            if dfs:  # Ensure there is data to concatenate
                combined_df = pd.concat(dfs, ignore_index=True)  # Combine all DataFrames
                print(f"\n===== {team} - {key.upper()} =====")
                display(combined_df)
                csv = convert_csv(combined_df)  # Convert to CSV format
                filename = f"{team}_{key}.csv"
                
                with open(filename, "w") as f:
                    f.write(csv)
                    print(f"Saved {filename}")


if __name__ == "__main__":
    main()


Scraping Liverpool Stats
clean team name Liverpool
Scraping https://fbref.com/en/players/e06683ca/Virgil-van-Dijk


  tables = pd.read_html(str(soup), match=player_name)


Found 4296 minutes for Virgil van Dijk
Scraping https://fbref.com/en/players/e342ad68/Mohamed-Salah


  tables = pd.read_html(str(soup), match=player_name)


Found 3865 minutes for Mohamed Salah
Scraping https://fbref.com/en/players/b8e740fb/Ryan-Gravenberch


  tables = pd.read_html(str(soup), match=player_name)


Found 3086 minutes for Ryan Gravenberch
Scraping https://fbref.com/en/players/2e4f5f03/Andrew-Robertson


  tables = pd.read_html(str(soup), match=player_name)


Found 3257 minutes for Andrew Robertson
Scraping https://fbref.com/en/players/cd1acf9d/Trent-Alexander-Arnold


  tables = pd.read_html(str(soup), match=player_name)


Found 2907 minutes for Trent Alexander-Arnold
Scraping https://fbref.com/en/players/83d074ff/Alexis-Mac-Allister


  tables = pd.read_html(str(soup), match=player_name)


Found 3598 minutes for Alexis Mac Allister
Scraping https://fbref.com/en/players/934e1968/Dominik-Szoboszlai


  tables = pd.read_html(str(soup), match=player_name)


Found 2857 minutes for Dominik Szoboszlai
Scraping https://fbref.com/en/players/5ed9b537/Ibrahima-Konate


  tables = pd.read_html(str(soup), match=player_name)


Found 2632 minutes for Ibrahima Konaté
Scraping https://fbref.com/en/players/4a1a9578/Luis-Diaz


  tables = pd.read_html(str(soup), match=player_name)


Found 3202 minutes for Luis Díaz
Scraping https://fbref.com/en/players/7a2e46a8/Alisson


  tables = pd.read_html(str(soup), match=player_name)


Found 2688 minutes for Alisson
Scraping https://fbref.com/en/players/1971591f/Cody-Gakpo


  tables = pd.read_html(str(soup), match=player_name)


Found 2664 minutes for Cody Gakpo
Scraping https://fbref.com/en/players/4fb9c88f/Curtis-Jones


  tables = pd.read_html(str(soup), match=player_name)


Found 1688 minutes for Curtis Jones
Scraping https://fbref.com/en/players/178ae8f8/Diogo-Jota


  tables = pd.read_html(str(soup), match=player_name)


Found 995 minutes for Diogo Jota
Scraping https://fbref.com/en/players/62d7ef38/Caoimhin-Kelleher


  tables = pd.read_html(str(soup), match=player_name)


Found 1800 minutes for Caoimhín Kelleher
Scraping https://fbref.com/en/players/4d77b365/Darwin-Nunez


  tables = pd.read_html(str(soup), match=player_name)


Found 2054 minutes for Darwin Núñez
Scraping https://fbref.com/en/players/7a11550b/Joe-Gomez


  tables = pd.read_html(str(soup), match=player_name)


Found 1354 minutes for Joe Gomez
Scraping https://fbref.com/en/players/f315ca93/Kostas-Tsimikas


  tables = pd.read_html(str(soup), match=player_name)


Found 912 minutes for Kostas Tsimikas
Scraping https://fbref.com/en/players/bbd67769/Conor-Bradley


  tables = pd.read_html(str(soup), match=player_name)


Found 1239 minutes for Conor Bradley
Scraping https://fbref.com/en/players/4125cb98/Jarell-Quansah


  tables = pd.read_html(str(soup), match=player_name)


Found 1360 minutes for Jarell Quansah
Scraping https://fbref.com/en/players/c149016b/Wataru-Endo


  tables = pd.read_html(str(soup), match=player_name)


Found 1186 minutes for Wataru Endo
Scraping https://fbref.com/en/players/b9e1436c/Harvey-Elliott


  tables = pd.read_html(str(soup), match=player_name)


Found 1115 minutes for Harvey Elliott
Scraping https://fbref.com/en/players/b0f7e36c/Federico-Chiesa


  tables = pd.read_html(str(soup), match=player_name)


Found 1041 minutes for Federico Chiesa
Scraping https://fbref.com/en/players/12bb4d6a/Vitezslav-Jaros


  tables = pd.read_html(str(soup), match=player_name)


Error processing Vitezslav Jaros: No tables found matching pattern 'Vitezslav Jaros'
No data for https://fbref.com/en/players/12bb4d6a/Vitezslav-Jaros
Scraping https://fbref.com/en/players/003cf4d1/Jayden-Danns
Error processing Jayden Danns: No tables found matching pattern 'Jayden Danns'
No data for https://fbref.com/en/players/003cf4d1/Jayden-Danns
Scraping https://fbref.com/en/players/fd08a24b/Harvey-Davies
Error processing Harvey Davies: No tables found matching pattern 'Harvey Davies'
No data for https://fbref.com/en/players/fd08a24b/Harvey-Davies
Scraping https://fbref.com/en/players/bf973eeb/James-McConnell
Error processing James McConnell: No tables found matching pattern 'James McConnell'
No data for https://fbref.com/en/players/bf973eeb/James-McConnell
Scraping https://fbref.com/en/players/2bc28bb9/Tyler-Morton
Found 902 minutes for Tyler Morton
Scraping https://fbref.com/en/players/398a24f6/Amara-Nallo


  tables = pd.read_html(str(soup), match=player_name)


Error processing Amara Nallo: No tables found matching pattern 'Amara Nallo'
No data for https://fbref.com/en/players/398a24f6/Amara-Nallo
Scraping https://fbref.com/en/players/1d3b3d77/Trey-Nyoni
Error processing Trey Nyoni: No tables found matching pattern 'Trey Nyoni'
No data for https://fbref.com/en/players/1d3b3d77/Trey-Nyoni
Scraping Arsenal Stats
clean team name Arsenal
Scraping https://fbref.com/en/players/98ea5115/David-Raya
Found 4260 minutes for David Raya
Scraping https://fbref.com/en/players/972aeb2a/William-Saliba


  tables = pd.read_html(str(soup), match=player_name)


Found 4019 minutes for William Saliba
Scraping https://fbref.com/en/players/67ac5bb8/Gabriel-Magalhaes


  tables = pd.read_html(str(soup), match=player_name)


Found 4049 minutes for Gabriel Magalhães
Scraping https://fbref.com/en/players/529f49ab/Thomas-Partey


  tables = pd.read_html(str(soup), match=player_name)


Found 3027 minutes for Thomas Partey
Scraping https://fbref.com/en/players/41034650/Jurrien-Timber


  tables = pd.read_html(str(soup), match=player_name)


Found 2285 minutes for Jurriën Timber
Scraping https://fbref.com/en/players/fed7cb61/Kai-Havertz


  tables = pd.read_html(str(soup), match=player_name)


Found 3805 minutes for Kai Havertz
Scraping https://fbref.com/en/players/1c7012b8/Declan-Rice


  tables = pd.read_html(str(soup), match=player_name)


Found 3719 minutes for Declan Rice
Scraping https://fbref.com/en/players/38ceb24a/Leandro-Trossard


  tables = pd.read_html(str(soup), match=player_name)


Found 2848 minutes for Leandro Trossard
Scraping https://fbref.com/en/players/48a5a5d6/Gabriel-Martinelli


  tables = pd.read_html(str(soup), match=player_name)


Found 2571 minutes for Gabriel Martinelli
Scraping https://fbref.com/en/players/79300479/Martin-Odegaard


  tables = pd.read_html(str(soup), match=player_name)


Found 3146 minutes for Martin Ødegaard
Scraping https://fbref.com/en/players/bc7dc64d/Bukayo-Saka


  tables = pd.read_html(str(soup), match=player_name)


Found 2866 minutes for Bukayo Saka
Scraping https://fbref.com/en/players/d080ed5e/Mikel-Merino


  tables = pd.read_html(str(soup), match=player_name)


Found 2124 minutes for Mikel Merino
Scraping https://fbref.com/en/players/aded8e6f/Riccardo-Calafiori


  tables = pd.read_html(str(soup), match=player_name)


Found 1786 minutes for Riccardo Calafiori
Scraping https://fbref.com/en/players/5dff6c28/Myles-Lewis-Skelly


  tables = pd.read_html(str(soup), match=player_name)


Found 730 minutes for Myles Lewis-Skelly
Scraping https://fbref.com/en/players/35e413f1/Ben-White


  tables = pd.read_html(str(soup), match=player_name)


Found 2318 minutes for Ben White
Scraping https://fbref.com/en/players/b66315ae/Gabriel-Jesus


  tables = pd.read_html(str(soup), match=player_name)


Found 1336 minutes for Gabriel Jesus
Scraping https://fbref.com/en/players/45db685d/Jorginho


  tables = pd.read_html(str(soup), match=player_name)


Found 1337 minutes for Jorginho
Scraping https://fbref.com/en/players/7f94982c/Ethan-Nwaneri


  tables = pd.read_html(str(soup), match=player_name)


Found 534 minutes for Ethan Nwaneri
Scraping https://fbref.com/en/players/b400bde0/Raheem-Sterling


  tables = pd.read_html(str(soup), match=player_name)


Found 721 minutes for Raheem Sterling
Scraping https://fbref.com/en/players/dc3e663e/Jakub-Kiwior


  tables = pd.read_html(str(soup), match=player_name)


Found 1214 minutes for Jakub Kiwior
Scraping https://fbref.com/en/players/51cf8561/Oleksandr-Zinchenko


  tables = pd.read_html(str(soup), match=player_name)


Found 693 minutes for Oleksandr Zinchenko
Scraping https://fbref.com/en/players/fce2302c/Kieran-Tierney


  tables = pd.read_html(str(soup), match=player_name)


Found 488 minutes for Kieran Tierney
Scraping https://fbref.com/en/players/b3af9be1/Takehiro-Tomiyasu


  tables = pd.read_html(str(soup), match=player_name)


Found 607 minutes for Takehiro Tomiyasu
Scraping https://fbref.com/en/players/c5bdb6e3/Reiss-Nelson


  tables = pd.read_html(str(soup), match=player_name)


Found 591 minutes for Reiss Nelson
Scraping https://fbref.com/en/players/3a686640/Nathan-Butler-Oyedeji


  tables = pd.read_html(str(soup), match=player_name)


Error processing Nathan Butler-Oyedeji: No tables found matching pattern 'Nathan Butler-Oyedeji'
No data for https://fbref.com/en/players/3a686640/Nathan-Butler-Oyedeji
Scraping https://fbref.com/en/players/64e17fab/Ayden-Heaven
Error processing Ayden Heaven: No tables found matching pattern 'Ayden Heaven'
No data for https://fbref.com/en/players/64e17fab/Ayden-Heaven
Scraping https://fbref.com/en/players/6b15cf32/Ismeal-Kabia
Error processing Ismeal Kabia: No tables found matching pattern 'Ismeal Kabia'
No data for https://fbref.com/en/players/6b15cf32/Ismeal-Kabia
Scraping https://fbref.com/en/players/7be4311f/Maldini-Kacurri
Error processing Maldini Kacurri: No tables found matching pattern 'Maldini Kacurri'
No data for https://fbref.com/en/players/7be4311f/Maldini-Kacurri
Scraping https://fbref.com/en/players/a9dc785c/Neto
Found 1170 minutes for Neto
Scraping https://fbref.com/en/players/676cf55d/Josh-Nichols


  tables = pd.read_html(str(soup), match=player_name)


Error processing Josh Nichols: No tables found matching pattern 'Josh Nichols'
No data for https://fbref.com/en/players/676cf55d/Josh-Nichols
Scraping https://fbref.com/en/players/a53649b7/Eddie-Nketiah
Found 698 minutes for Eddie Nketiah
Scraping https://fbref.com/en/players/b85c3273/Salah-Oulad-MHand


  tables = pd.read_html(str(soup), match=player_name)


Error processing Salah Oulad M'Hand: No tables found matching pattern "Salah Oulad M'Hand"
No data for https://fbref.com/en/players/b85c3273/Salah-Oulad-MHand
Scraping https://fbref.com/en/players/0b4ecd65/Jack-Porter
Error processing Jack Porter: No tables found matching pattern 'Jack Porter'
No data for https://fbref.com/en/players/0b4ecd65/Jack-Porter
Scraping https://fbref.com/en/players/466fb2c5/Aaron-Ramsdale
Found 1620 minutes for Aaron Ramsdale
Scraping https://fbref.com/en/players/d1a2e006/Tommy-Setford


  tables = pd.read_html(str(soup), match=player_name)


Error processing Tommy Setford: No tables found matching pattern 'Tommy Setford'
No data for https://fbref.com/en/players/d1a2e006/Tommy-Setford
Scraped data for 2 teams.
Data keys: dict_keys(['Liverpool Stats', 'Arsenal Stats'])
Data for first team: {'players': [                   Statistic Per 90  Percentile            Stat   Value
0          Non-Penalty Goals   0.04        51.0             NaN     NaN
1       npxG: Non-Penalty xG   0.06        76.0             NaN     NaN
2                Shots Total   0.90        93.0             NaN     NaN
3                    Assists   0.02        55.0             NaN     NaN
4   xAG: Exp. Assisted Goals   0.01        33.0             NaN     NaN
5                 npxG + xAG   0.08        59.0             NaN     NaN
6      Shot-Creating Actions   1.26        86.0             NaN     NaN
7                        NaN    NaN         NaN             NaN     NaN
8           Passes Attempted  81.39        93.0             NaN     NaN
9          Pass 

Unnamed: 0,Statistic,Per 90,Percentile,Stat,Value
0,Non-Penalty Goals,0.04,51.0,,
1,npxG: Non-Penalty xG,0.06,76.0,,
2,Shots Total,0.90,93.0,,
3,Assists,0.02,55.0,,
4,xAG: Exp. Assisted Goals,0.01,33.0,,
...,...,...,...,...,...
457,Interceptions,0.70,19.0,,
458,Blocks,1.70,92.0,,
459,Clearances,1.90,84.0,,
460,Aerials Won,0.50,22.0,,


Saved Liverpool Stats_players.csv

===== Liverpool Stats - KEEPERS =====


Unnamed: 0,Statistic,Per 90,Percentile,Stat,Value
0,PSxG-GA,-0.06,34.0,,
1,Goals Against,1.00,91.0,,
2,Save Percentage,69.7%,45.0,,
3,PSxG/SoT,0.29,47.0,,
4,Clean Sheet Percentage,34.5%,78.0,,
5,,,,,
6,Touches,36.96,46.0,,
7,Launch %,18.8%,12.0,,
8,Goal Kicks,2.61,2.0,,
9,Avg. Length of Goal Kicks,30.3,5.0,,


Saved Liverpool Stats_keepers.csv

===== Arsenal Stats - PLAYERS =====


Unnamed: 0,Statistic,Per 90,Percentile,Stat,Value
0,Non-Penalty Goals,0.04,56.0,,
1,npxG: Non-Penalty xG,0.06,65.0,,
2,Shots Total,0.20,9.0,,
3,Assists,0.00,27.0,,
4,xAG: Exp. Assisted Goals,0.01,37.0,,
...,...,...,...,...,...
523,Interceptions,0.26,19.0,,
524,Blocks,0.77,38.0,,
525,Clearances,0.90,82.0,,
526,Aerials Won,1.93,97.0,,


Saved Arsenal Stats_players.csv

===== Arsenal Stats - KEEPERS =====


Unnamed: 0,Statistic,Per 90,Percentile,Stat,Value
0,PSxG-GA,+0.04,57.0,,
1,Goals Against,0.70,99.0,,
2,Save Percentage,78.0%,94.0,,
3,PSxG/SoT,0.24,5.0,,
4,Save% (Penalty Kicks),16.7%,57.0,,
5,Clean Sheet Percentage,48.9%,99.0,,
6,,,,,
7,Touches,35.83,34.0,,
8,Launch %,34.2%,53.0,,
9,Goal Kicks,3.79,10.0,,


Saved Arsenal Stats_keepers.csv


In [1]:
# Fetch Team aggregated season data through API calls from FBRef

import requests

response = requests.post('https://fbrapi.com/generate_api_key')
api_key = response.json()['api_key']
print("API Key:", api_key)

def fetch_team_data(params):
    url = "https://fbrapi.com/team-season-stats"
    headers = {"X-API-Key": api_key}

    response = requests.get(url, params=params, headers=headers)

    return response.json()


def get_league_ids():
    url = "https://fbrapi.com/leagues"
    headers = {"X-API-Key": api_key}

    response = requests.get(url, headers=headers)
    
    # Print the raw response text
    print("Raw API Response:", response.text)

    try:
        leagues = response.json()  # Attempt to parse JSON
    except ValueError:
        print("Error: Response is not valid JSON.")
        return {}

    # Debugging: Check the actual structure of the API response
    print("Parsed JSON:", leagues)

    if isinstance(leagues, dict):
        # Check if data is inside a key like 'data' or 'leagues'
        if "data" in leagues:
            leagues = leagues["data"]
        elif "leagues" in leagues:
            leagues = leagues["leagues"]
    
    # Check the expected structure
    if not isinstance(leagues, list):
        print("Unexpected API response structure!")
        return {}

    top5_leagues = ["Premier League", "La Liga", "Bundesliga", "Serie A", "Ligue 1"]

    # Extract league IDs correctly
    league_ids = {league["name"]: league["league_id"] for league in leagues if "name" in league and league["name"] in top5_leagues}

    return league_ids

def get_teams_per_league(league_id, season):
    """Fetch all team IDs for a given league and season."""
    url = "https://fbrapi.com/teams"
    params = {"league_id": league_id, "season_id": season}
    headers = {"X-API-Key": api_key}

    response = requests.get(url, params=params, headers=headers)
    teams = response.json()

    # Return a dictionary of {team_name: team_id}
    return {team["name"]: team["team_id"] for team in teams}

top5_league_ids = get_league_ids()
print(top5_league_ids)

seasons = ["2017-2018", "2018-2019", "2019-2020", "2020-2021", "2021-2022", "2022-2023", "2023-2024"]
teams_per_league = {}

for league_name, league_id in top5_league_ids.items():
    for season in seasons:
        teams = get_teams_per_league(league_id, season)
        # Create season-specific entries for each team
        for team_name, team_id in teams.items():
            key = f"{team_name}_{season}"
            teams_per_league[key] = team_id

print(teams_per_league)






API Key: mcGxZifpo_yd6yUaUvnw3y7RtChH3K1JQfMvBOoM4GM
Raw API Response: {"message": "Internal Server Error"}

Parsed JSON: {'message': 'Internal Server Error'}
Unexpected API response structure!
{}
{}


In [9]:
import pandas as pd
import requests
import os

def fetch_clubelo_data(team_name):
    """
    Fetch ELO data for a given team from clubelo.com
    Returns a pandas DataFrame or None if the request fails
    """
    # Format team name for URL (replace spaces with hyphens, remove special characters)
    formatted_name = team_name.replace(" ", "").replace("-", "")
    url = f"http://api.clubelo.com/{formatted_name}"
    
    try:
        # Make the request
        response = requests.get(url)
        
        if response.status_code == 200:
            # Save the content to a temporary file
            temp_file = f"temp_{formatted_name}.csv"
            with open(temp_file, 'wb') as f:
                f.write(response.content)
            
            # Read the CSV into a pandas DataFrame
            df = pd.read_csv(temp_file)
            
            # Clean up the temporary file
            os.remove(temp_file)
            
            return df
        else:
            print(f"Failed to fetch data for {team_name}: Status code {response.status_code}")
            return None
            
    except Exception as e:
        print(f"Error fetching data for {team_name}: {str(e)}")
        return None

In [10]:
for team in team_names:
    df = fetch_clubelo_data(team)
    print(df)


      Rank       Club Country  Level          Elo        From          To
0      NaN  Liverpool     ENG      1  1551.140259  1946-07-07  1946-08-31
1      NaN  Liverpool     ENG      1  1561.357666  1946-09-01  1946-09-04
2      NaN  Liverpool     ENG      1  1551.169067  1946-09-05  1946-09-07
3      NaN  Liverpool     ENG      1  1559.834717  1946-09-08  1946-09-11
4      NaN  Liverpool     ENG      1  1549.396851  1946-09-12  1946-09-14
...    ...        ...     ...    ...          ...         ...         ...
5832   2.0  Liverpool     ENG      1  1990.022705  2025-02-20  2025-02-23
5833   2.0  Liverpool     ENG      1  1990.022705  2025-02-24  2025-02-26
5834   2.0  Liverpool     ENG      1  1990.022705  2025-02-27  2025-03-05
5835   2.0  Liverpool     ENG      1  1990.022705  2025-03-06  2025-03-06
5836   2.0  Liverpool     ENG      1  1990.022705  2025-03-07  2025-12-31

[5837 rows x 7 columns]
      Rank     Club Country  Level          Elo        From          To
0      NaN  Ars

In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import time
import random
from IPython.display import display
import re

# 1) Create a Session with a random or fixed User-Agent
s = requests.Session()
s.headers.update({
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/109.0.0.0 Safari/537.36"
    )
})

def get_soup(session, url):
    """Fetch a URL, return BeautifulSoup or None on failure."""
    try:
        resp = session.get(url, timeout=10)
        # Check status code
        if resp.status_code == 200:
            return BeautifulSoup(resp.text, 'html.parser')
        elif resp.status_code == 429:
            print("Received 429. Too many requests. Backing off.")
            # Wait longer or do an exponential backoff
            time.sleep(60)
            return None
        else:
            print(f"Error: got status {resp.status_code} for {url}")
            return None
    except requests.RequestException as e:
        print(f"Request failed: {e}")
        return None
    
def get_competition_links(allowed_links, soup):
    table = soup.find('table', class_='liste')
    if not table:
        print("Could not find table with class 'liste'")
        return []
    
    links = table.find_all('a')
    print(f"Found {len(links)} links in table")

    hrefs = [l.get('href') for l in links if l.get('href')]
    print(f"Found {len(hrefs)} hrefs")
    
    #Select only for Top 5 competitions
    competition_urls = []
    for link in links:
        if link['href'] in allowed_links:
            competition_urls.append(f"http://clubelo.com/{link['href']}")
         
    return competition_urls

def main():
    elo_url = "http://clubelo.com/"
    print(f"Fetching {elo_url}")
    
    soup_elo = get_soup(s, elo_url)
    if not soup_elo:
        print("Failed to get soup from main page")
        return
    
    allowed_links = {"/ENG", "/ESP", "/ITA", "/GER", "/FRA"}
    competition_links = get_competition_links(allowed_links, soup_elo)
    print(f"\nTotal competition links found: {len(competition_links)}")

    for link in competition_links:
        print(link)
    print(team_names)

if __name__ == "__main__":
    main()

Fetching http://clubelo.com/
Found 62 links in table
Found 62 hrefs

Total competition links found: 5
http://clubelo.com//ENG
http://clubelo.com//ESP
http://clubelo.com//ITA
http://clubelo.com//FRA
http://clubelo.com//GER


NameError: name 'team_names' is not defined