In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm
import time

headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/122.0.0.0 Safari/537.36"
    )
}

def get_all_stat_slugs():
    """Scrape all stat slugs from the NFL team stats page."""
    base_url = "https://www.teamrankings.com/nfl/team-stats"
    resp = requests.get(base_url, headers=headers)
    soup = BeautifulSoup(resp.text, "html.parser")
    slugs = [a["href"].split("/stat/")[-1] for a in soup.select("a[href^='/nfl/stat/']")]
    return sorted(set(slugs))  # dedupe, sorted for stability


def scrape_one_season(stat_slugs, season_label):
    """Scrape all current stats for one season into a combined DataFrame."""
    all_df = None
    for slug in tqdm(stat_slugs, desc=f"Scraping {season_label}", leave=False):
        url = f"https://www.teamrankings.com/nfl/stat/{slug}"
        try:
            html = requests.get(url, headers=headers).text
            tables = pd.read_html(html)
            if not tables:
                continue
            df = tables[0]
        except Exception:
            continue

        if "Team" not in df.columns:
            continue

        df = df.drop(columns=["Rank"], errors="ignore")

        # prefix all stat columns except "Team"
        df = df.rename(columns={col: f"{slug}_{col}" for col in df.columns if col != "Team"})

        # add a season label column
        df.insert(0, "Season", season_label)

        # merge
        if all_df is None:
            all_df = df
        else:
            all_df = all_df.merge(df, on=["Season", "Team"], how="left")

        time.sleep(1)  # respect site limits

    return all_df


# === RUN FOR CURRENT SEASON ===
SEASON = "2025"
stat_slugs = get_all_stat_slugs()
print(f"Found {len(stat_slugs)} stat slugs")

df = scrape_one_season(stat_slugs, SEASON)
if df is not None:
    out_path = f"nfl_stats_{SEASON}.csv"
    df.to_csv(out_path, index=False)
    print(f"Saved {out_path} with shape {df.shape}")
else:
    print("No data scraped.")

Found 226 stat slugs


                                                                

Saved nfl_stats_2025.csv with shape (32, 1358)




In [5]:
df

Unnamed: 0,Season,Team,1st-half-points-per-game_2025,1st-half-points-per-game_Last 3,1st-half-points-per-game_Last 1,1st-half-points-per-game_Home,1st-half-points-per-game_Away,1st-half-points-per-game_2024,1st-half-time-of-possession-share-pct_2025,1st-half-time-of-possession-share-pct_Last 3,...,yards-per-point-margin_Last 1,yards-per-point-margin_Home,yards-per-point-margin_Away,yards-per-point-margin_2024,yards-per-rush-attempt_2025,yards-per-rush-attempt_Last 3,yards-per-rush-attempt_Last 1,yards-per-rush-attempt_Home,yards-per-rush-attempt_Away,yards-per-rush-attempt_2024
0,2025,Indianapolis,17.3,14.7,14.0,18.5,15.0,11.0,51.64%,46.37%,...,3.4,6.1,4.4,-0.7,4.6,4.5,5.6,4.6,4.5,4.7
1,2025,Dallas,16.7,18.7,17.0,13.0,18.5,10.4,47.72%,45.30%,...,2.9,1.5,-0.6,-3.0,4.9,4.4,1.6,4.7,5.0,4.0
2,2025,Detroit,14.8,14.7,10.0,24.0,10.3,18.0,57.58%,53.89%,...,-5.6,9.8,-1.3,3.7,4.4,3.8,4.3,4.8,4.2,4.8
3,2025,Seattle,14.8,11.3,13.0,18.3,11.3,11.1,41.10%,41.57%,...,5.5,5.1,1.0,0.3,3.7,4.2,2.3,3.7,3.7,4.2
4,2025,Philadelphia,14.8,17.0,17.0,12.7,17.0,13.6,50.98%,53.11%,...,-9.2,3.2,2.5,3.3,3.5,3.3,3.7,3.8,3.3,5.0
5,2025,New England,14.7,18.7,22.0,15.0,14.3,8.2,52.33%,40.15%,...,2.3,2.3,3.0,-3.2,3.5,3.0,2.4,3.7,3.2,4.4
6,2025,Chicago,14.0,15.3,13.0,17.0,12.0,7.5,51.73%,49.72%,...,-1.5,3.1,-2.0,0.7,4.1,3.7,5.4,3.7,4.4,4.0
7,2025,Tampa Bay,13.8,13.0,20.0,15.3,12.3,14.6,52.50%,53.44%,...,7.4,-2.0,1.8,1.5,4.0,3.5,4.0,3.8,4.2,5.2
8,2025,Pittsburgh,13.6,12.3,9.0,11.5,15.0,9.4,45.38%,47.56%,...,13.0,1.0,7.0,1.6,3.4,3.6,3.6,3.5,3.3,4.0
9,2025,Jacksonville,12.3,10.0,6.0,10.0,17.0,8.6,53.75%,58.09%,...,-5.5,5.2,0.5,-1.0,4.7,4.1,3.1,4.5,4.9,4.2


In [None]:
master_path = r"C:\Users\Sam\Desktop\NFL Game Prediction project\teamrankings_data.csv"

# rename Season -> Year for compatibility
df = df.rename(columns={"Season": "Year"})

# load existing
existing = pd.read_csv(master_path)

# drop old YEAR rows
existing = existing[existing["Year"] != YEAR]

# append new scrape
final = pd.concat([existing, df], ignore_index=True)

# save back
final.to_csv(master_path, index=False)
print(f"[DONE] Updated {master_path} with {YEAR} stats (shape {final.shape})")

  existing = pd.read_csv(master_path)


[DONE] Updated C:\Users\Sam\Desktop\NFL Game Prediction project\teamrankings_data.csv with 2025 stats (shape (736, 1584))
