In [1]:
import nflreadpy as nfl
import pandas as pd

rosters = nfl.load_rosters().to_pandas()
qbs = rosters[rosters['position'] == 'QB']
active_qb_ids = qbs['pfr_id'].dropna().tolist()

In [2]:
active_qb_ids[0]

'RivePh00'

In [3]:
import requests
from bs4 import BeautifulSoup
import time

In [None]:
from io import StringIO

BASE_URL = "https://www.pro-football-reference.com/players"

def scrape_qb(pfr_slug):
    first_letter = pfr_slug[0]
    url = f"{BASE_URL}/{first_letter}/{pfr_slug}.htm"

    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")

    table = soup.find("table", {"id": "passing"})
    if table is None:
        return None

    df = pd.read_html(StringIO(str(table)))[0]

    cleaned = df[
        df["Season"].astype(str).str.match(r"^\d{4}$", na=False)
    ].copy()

    wanted_cols = [
        "Season", "Age", "Team", "Pos", "G", "GS", "Cmp", "Att", "Cmp%",
        "Yds", "TD", "Int", "Y/A", "Rate", "QBR", "AV", "Awards"
    ]

    for col in wanted_cols:
        if col not in cleaned.columns:
            cleaned.loc[:, col] = None

    final = cleaned[wanted_cols].copy()

    numeric_cols = final.columns.difference(["Team", "Pos", "Awards"])
    final.loc[:, numeric_cols] = final[numeric_cols].apply(
        pd.to_numeric, errors="coerce"
    )

    return final

In [16]:
rodgers

In [17]:
qb_meta = qbs[["pfr_id", "full_name", "position"]].drop_duplicates()
qb_meta

Unnamed: 0,pfr_id,full_name,position
1,RivePh00,Philip Rivers,QB
2,RodgAa00,Aaron Rodgers,QB
7,FlacJo00,Joe Flacco,QB
9,JohnJo05,Josh Johnson,QB
10,StafMa00,Matthew Stafford,QB
...,...,...,...
3033,MilrJa00,Jalen Milroe,QB
3035,WardCa00,Cam Ward,QB
3045,DartJa00,Jaxson Dart,QB
3052,GabrDi00,Dillon Gabriel,QB


In [18]:
import sqlite3

In [19]:
conn = sqlite3.connect("football_wordle.db")
cur = conn.cursor()

In [10]:
cur.execute("""
CREATE TABLE IF NOT EXISTS players (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    pfr_id TEXT UNIQUE,
    name TEXT,
    position TEXT
)
""")

<sqlite3.Cursor at 0x16aeb6340>

In [11]:
for _, row in qb_meta.iterrows():
    cur.execute(
        """
        INSERT OR IGNORE INTO players (pfr_id, name, position)
        VALUES (?, ?, ?)
        """,
        (row["pfr_id"], row["full_name"], row["position"])
    )

conn.commit()

In [12]:
cur.execute("""
CREATE TABLE IF NOT EXISTS passing_seasons (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    player_id INTEGER,
    season INTEGER,
    age INTEGER,
    team TEXT,
    games INTEGER,
    games_started INTEGER,
    completions INTEGER,
    attempts INTEGER,
    completion_pct REAL,
    yards INTEGER,
    touchdowns INTEGER,
    interceptions INTEGER,
    yards_per_attempt REAL,
    passer_rating REAL,
    qbr REAL,
    av REAL,
    awards TEXT,
    FOREIGN KEY(player_id) REFERENCES players(id)
)
""")

<sqlite3.Cursor at 0x16aeb6340>

In [None]:
for pfr_id in active_qb_ids:
    seasons = scrape_qb(pfr_id)
    if seasons is None or seasons.empty:
        continue

    cur.execute(
        "SELECT id FROM players WHERE pfr_id = ?",
        (pfr_id,)
    )
    player_id = cur.fetchone()[0]

    rows = []
    for _, s in seasons.iterrows():
        rows.append((
            player_id,
            s["Season"], s["Age"], s["Team"], s["G"], s["GS"],
            s["Cmp"], s["Att"], s["Cmp%"], s["Yds"],
            s["TD"], s["Int"], s["Y/A"],
            s["Rate"], s["QBR"], s["AV"], s["Awards"]
        ))

    cur.executemany(
        """
        INSERT INTO passing_seasons (
            player_id, season, age, team, games, games_started,
            completions, attempts, completion_pct, yards,
            touchdowns, interceptions, yards_per_attempt,
            passer_rating, qbr, av, awards
        )
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """,
        rows
    )

    conn.commit()
    print(f"Inserted {len(rows)} seasons for {pfr_id}")
    time.sleep(5)  # Increased delay to avoid rate limiting


Inserted 22 seasons for RivePh00
Inserted 21 seasons for RodgAa00
Inserted 20 seasons for FlacJo00
Inserted 14 seasons for JohnJo05
Inserted 17 seasons for StafMa00
Inserted 15 seasons for DaltAn00
Inserted 15 seasons for TaylTy00
Inserted 11 seasons for KeenCa00
Inserted 14 seasons for WilsRu00
Inserted 14 seasons for CousKi00
Inserted 12 seasons for SmitGe00
Inserted 11 seasons for BridTe00
Inserted 11 seasons for CarrDe02
Inserted 12 seasons for GaroJi00
Inserted 11 seasons for WinsJa00
Inserted 7 seasons for HeinTa00
Inserted 7 seasons for SiemTr00
Inserted 11 seasons for MariMa01
Inserted 5 seasons for AlleBr00
Inserted 7 seasons for DrisJe00
Inserted 10 seasons for WentCa00
Inserted 10 seasons for PresDa01
Inserted 10 seasons for GoffJa00
Inserted 10 seasons for BrisJa00
Inserted 8 seasons for MullNi00
Inserted 8 seasons for HillTa00
Inserted 7 seasons for WatsDe00
Inserted 8 seasons for RushCo00
Inserted 9 seasons for TrubMi00
Inserted 9 seasons for MahoPa00
Inserted 6 seasons f

In [20]:
def get_random_qb_career():
    conn = sqlite3.connect("football_wordle.db")

    player = pd.read_sql("""
        SELECT DISTINCT p.id, p.name
        FROM players p
        JOIN passing_seasons ps
        ON p.id = ps.player_id
        ORDER BY RANDOM()
        LIMIT 1;
    """, conn).iloc[0]

    seasons = pd.read_sql("""
        SELECT *
        FROM passing_seasons
        WHERE player_id = ?
        ORDER BY season
    """, conn, params=(int(player["id"]),))

    conn.close()
    return player['name'], seasons


In [21]:
name, df = get_random_qb_career()
name

'Will Levis'

In [22]:
df

Unnamed: 0,id,player_id,season,age,team,games,games_started,completions,attempts,completion_pct,yards,touchdowns,interceptions,yards_per_attempt,passer_rating,qbr,av,awards
0,576,89,2023,24,TEN,9,9,149,255,58.4,1808,8,4,7.1,84.2,34.0,6.0,
1,577,89,2024,25,TEN,12,12,190,301,63.1,2091,13,12,6.9,81.4,25.2,5.0,


In [23]:
def qb_career_to_payload():
    name, df = get_random_qb_career()
    return {
        "player_name": name,
        "seasons": df.to_dict(orient="records")
    }

In [24]:
qb_career_to_payload()

{'player_name': 'Sean Clifford',
 'seasons': [{'id': 557,
   'player_id': 77,
   'season': 2023,
   'age': 25,
   'team': 'GNB',
   'games': 2,
   'games_started': 0,
   'completions': 1,
   'attempts': 1,
   'completion_pct': 100.0,
   'yards': 37,
   'touchdowns': 0,
   'interceptions': 0,
   'yards_per_attempt': 37.0,
   'passer_rating': 118.7,
   'qbr': 100.0,
   'av': None,
   'awards': None}]}