In [70]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

"""Scrape EPL team stats from FootyStats for a given season"""
url = "https://footystats.org/england/premier-league#"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")


# Find the main league table
table = soup.find("table", class_="full-league-table")
if not table:
    print(f"No table found for season {season}")
    

links = table.find("tbody").find_all("a")
links = [l.get("href") for l in links]
links = [l for l in links if "/clubs/" in l]
team_urls = [f"https://footystats.org{l}" for l in links]
team_url = team_urls[0]
data = requests.get(team_url, headers = headers)
data.text
stats = pd.read_html(data.text)
stats = stats[0]
stats

Unnamed: 0,Stats,Overall,At Home,At Away
0,Wins,65%,75%,55%
1,Draws,22%,17%,27%
2,Losses,13%,8%,18%
3,xG For / Match,1.72,1.83,1.61
4,xG Against / Match,0.89,0.72,1.08
5,Scored / Match,1.83,2.33,1.27
6,Conceded / Match,0.74,0.67,0.82
7,AVG (Match Goals Average),2.57,3,2.09
8,Clean Sheets %,48%,50%,45%
9,Failed to Score %,13%,8%,18%


In [71]:
stats.columns = ['Stat_Name', 'overall', 'Home', 'Away']
stats

Unnamed: 0,Stat_Name,overall,Home,Away
0,Wins,65%,75%,55%
1,Draws,22%,17%,27%
2,Losses,13%,8%,18%
3,xG For / Match,1.72,1.83,1.61
4,xG Against / Match,0.89,0.72,1.08
5,Scored / Match,1.83,2.33,1.27
6,Conceded / Match,0.74,0.67,0.82
7,AVG (Match Goals Average),2.57,3,2.09
8,Clean Sheets %,48%,50%,45%
9,Failed to Score %,13%,8%,18%


In [72]:
stats = stats.melt(
    id_vars=['Stat_Name'], 
    var_name='Location', 
    value_name='Value'
)
stats

Unnamed: 0,Stat_Name,Location,Value
0,Wins,overall,65%
1,Draws,overall,22%
2,Losses,overall,13%
3,xG For / Match,overall,1.72
4,xG Against / Match,overall,0.89
5,Scored / Match,overall,1.83
6,Conceded / Match,overall,0.74
7,AVG (Match Goals Average),overall,2.57
8,Clean Sheets %,overall,48%
9,Failed to Score %,overall,13%


In [73]:
stats_long['Combined_Name'] = stats_long['Location'] + ' ' + stats_long['Stat_Name']
stats_long

Unnamed: 0,Stat_Name,Location,Value,Combined_Name
0,Wins,overall,65%,overall Wins
1,Draws,overall,22%,overall Draws
2,Losses,overall,13%,overall Losses
3,xG For / Match,overall,1.72,overall xG For / Match
4,xG Against / Match,overall,0.89,overall xG Against / Match
5,Scored / Match,overall,1.83,overall Scored / Match
6,Conceded / Match,overall,0.74,overall Conceded / Match
7,AVG (Match Goals Average),overall,2.57,overall AVG (Match Goals Average)
8,Clean Sheets %,overall,48%,overall Clean Sheets %
9,Failed to Score %,overall,13%,overall Failed to Score %


In [74]:
stats = stats_long[['Combined_Name', 'Value']].set_index('Combined_Name')
stats

Unnamed: 0_level_0,Value
Combined_Name,Unnamed: 1_level_1
overall Wins,65%
overall Draws,22%
overall Losses,13%
overall xG For / Match,1.72
overall xG Against / Match,0.89
overall Scored / Match,1.83
overall Conceded / Match,0.74
overall AVG (Match Goals Average),2.57
overall Clean Sheets %,48%
overall Failed to Score %,13%


In [75]:
import pandas as pd

# 1. Reset index so 'Combined_Name' becomes a regular column
stats = stats.reset_index()

# 2. Transpose the data
# This makes 'Combined_Name' values become the header row
stats = stats.set_index('Combined_Name').T

# 3. Clean up (Optional)
# If you want to remove the index name 'Combined_Name' for a cleaner look:
stats.columns.name = None

stats

Unnamed: 0,overall Wins,overall Draws,overall Losses,overall xG For / Match,overall xG Against / Match,overall Scored / Match,overall Conceded / Match,overall AVG (Match Goals Average),overall Clean Sheets %,overall Failed to Score %,...,Away Possession AVG,Away Shots Taken / Match,Away Shots Conversion Rate,Away Fouls Committed / Match,Away Fouled Against / Match,Away Penalties Won,Away Penalties Conceded,Away Goal Kicks / Match,Away Throw-ins / Match,Away Free-Kicks / Match
Value,65%,22%,13%,1.72,0.89,1.83,0.74,2.57,48%,13%,...,57%,13.45,9%,10.55,9.64,1 in 11,0 in 11,5.64,16.18,10.18


In [187]:
def scrapeSeason(use_selenium=False):
    import requests
    from bs4 import BeautifulSoup
    import pandas as pd
    import re
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
    }
    
    url = "https://footystats.org/england/premier-league"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    
    # League table (ALWAYS WORKS)
    league_table = soup.find("table", class_="full-league-table")
    rows = league_table.find("tbody").find_all("tr")
    data = []
    
    for row in rows:
        cols = row.find_all("td")
        if len(cols) < 12: continue
            
        team_cell = cols[1].text.strip()
        stats = [col.text.strip() for col in cols]
        
        data.append({
            'Rank': stats[0],
            'Team': team_cell,
            'Played': stats[2],
            'W': stats[3],
            'D': stats[4],
            'L': stats[5],
            'GF': stats[6],
            'GA': stats[7],
            'GD': stats[8],
            'Pts': stats[9],
            'Form': stats[10],
            'PPG': stats[11],
            'ClShts': stats[12] if len(stats) > 12 else '',
            'BTTS': stats[13] if len(stats) > 13 else '',
            'xGF': stats[14] if len(stats) > 14 else ''
        })
    
    df_final = pd.DataFrame(data)
    
    #Save as CSV file
    print(f"âœ… Complete EPL table: {len(df_final)} teams")
    filename = f'epl_footystats_{pd.Timestamp.now().strftime("%Y%m%d_%H%M")}.csv'
    df_final.to_csv(filename, index=False)
    print(f"ðŸ’¾ Saved: {filename}")
    
    return df_final

# Use this - works immediately!
df = scrapeSeason()
df


âœ… Complete EPL table: 20 teams
ðŸ’¾ Saved: epl_footystats_20260131_2317.csv


Unnamed: 0,Rank,Team,Played,W,D,L,GF,GA,GD,Pts,Form,PPG,ClShts,BTTS,xGF
0,1,,Arsenal FC,24,16,5,3,46,17,29,53,WDDLW,2.21,50%,46%
1,2,,Manchester City FC,23,14,4,5,47,21,26,46,DDDLW,2.0,43%,43%
2,3,,Aston Villa FC,23,14,4,5,35,25,10,46,LWDLW,2.0,30%,52%
3,4,,Chelsea FC,24,11,7,6,42,27,15,40,DLWWW,1.67,38%,63%
4,5,,Manchester United FC,23,10,8,5,41,34,7,38,DDDWW,1.65,13%,74%
5,6,,Liverpool FC,23,10,6,7,35,32,3,36,DDDDL,1.57,30%,61%
6,7,,Fulham FC,23,10,4,9,32,32,0,34,DDWLW,1.48,22%,61%
7,8,,Everton FC,24,9,7,8,26,27,-1,34,LDWDD,1.42,38%,42%
8,9,,Newcastle United FC,23,9,6,8,32,29,3,33,WWWDL,1.43,30%,57%
9,10,,Brentford FC,23,10,3,10,35,32,3,33,DWWLL,1.43,22%,52%
