In [2]:
leagueID = '3390992'
season = '2024'

In [79]:
import nest_asyncio
nest_asyncio.apply()
import asyncio
from playwright.async_api import async_playwright
import pandas as pd
import numpy as np
import re

### create mapping between team owners and names

In [11]:
dfs = []
seasons = ['2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024']
for season in seasons:
    name_df = await get_team_names('3390992', season)
    dfs.append(name_df)
names_master = pd.concat(dfs, ignore_index=True)
mapping_dict = dict(zip(names_master['teamName'], names_master['owner']))

### pull standings and season-level stats for all seasons

In [88]:
dfs = []
seasons = ['2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024']
for season in seasons:
    season_df = await pull_season_standings('3390992', season)
    final_standings = await pull_final_standings('3390992', season)
    
    merged = pd.merge(season_df, final_standings, on="teamName", how="inner")
    
    dfs.append(merged)
standings_master = pd.concat(dfs, ignore_index=True)
standings_master['owner'] = standings_master['teamName'].map(mapping_dict)


In [94]:
standings_master.to_csv('standings_master.csv')

### pull week-level stats for all seasons

In [471]:
dfs = []
seasons = ['2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024']
for season in seasons:
    team_df = await get_team_schedules('3390992', season)
    dfs.append(team_df)
weekly_master = pd.concat(dfs, ignore_index=True)
weekly_master['owner'] = weekly_master['teamName'].map(mapping_dict)
weekly_master['opponent_owner'] = weekly_master['opponent'].map(mapping_dict)

Future exception was never retrieved
future: <Future finished exception=TargetClosedError('Target page, context or browser has been closed')>
playwright._impl._errors.TargetClosedError: Target page, context or browser has been closed


In [472]:
weekly_master.head()

Unnamed: 0,season,teamName,week,team,opponent,result,owner,opponent_owner
0,2015,Cream Team,1,1,Quiggle On Your Face,150.20 - 171.90 Loss,Owen,Kevin
1,2015,Cream Team,2,1,Baby Get In Mah Belly,158.57 - 146.90 Win,Owen,nickolai
2,2015,Cream Team,3,1,Chub Chub,225.87 - 195.07 Win,Owen,John
3,2015,Cream Team,4,1,Mini Me I Love you Mini Me,156.87 - 138.93 Win,Owen,Peter
4,2015,Cream Team,5,1,Tushek,188.23 - 156.13 Win,Owen,zeyad


In [475]:
weekly_master.to_csv('weekly_master.csv')

In [13]:

async def pull_season_standings(leagueID, season):

    url = f'https://fantasy.nfl.com/league/{leagueID}/history/{season}/standings?historyStandingsType=regular'
    
    async with async_playwright() as p:
        # Launch the browser (headless mode)
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        await page.goto(url)
        
        # Wait until at least one of the desired cells loads
        await page.wait_for_selector("td.teamWinPct.numeric", timeout=20000)
        
        # Locate all the table rows.
        # Adjust this selector if necessary to target the correct rows.
        rows = await page.query_selector_all("table tbody tr")
        
        data = []
        for row in rows:
            # For each attribute, try to select the cell and extract its text.
            # If the cell isn't found, the value will be set to None.
            
            rank_el = await row.query_selector("td.teamRank.first")
            rank = await rank_el.inner_text() if rank_el else None

            name_el = await row.query_selector("td.teamImageAndName")
            name = await name_el.inner_text() if name_el else None

            record_el = await row.query_selector("td.teamRecord.numeric")
            record = await record_el.inner_text() if record_el else None

            win_pct_el = await row.query_selector("td.teamWinPct.numeric")
            win_pct = await win_pct_el.inner_text() if win_pct_el else None

            # For the first points value, select cells that do NOT have the 'last' class
            pts_el = await row.query_selector("td.teamPts.stat.numeric:not(.last)")
            pts = await pts_el.inner_text() if pts_el else None

            # For the last points value, select the cell that has the 'last' class
            pts_last_el = await row.query_selector("td.teamPts.stat.numeric.last")
            pts_last = await pts_last_el.inner_text() if pts_last_el else None
            
            # Append a dictionary for the row
            data.append({
                "season": season,
                "regSeasonRank": rank,
                "teamName": name,
                "teamRecord": record,
                "teamWinPct": win_pct,
                "teamPts": pts,
                "teamPtsLast": pts_last
            })
        
        await browser.close()
        df = pd.DataFrame(data)
        return df


In [86]:
async def pull_final_standings(leagueID, season):

    url = f'https://fantasy.nfl.com/league/{leagueID}/history/{season}/standings'
    
    async with async_playwright() as p:
        # Launch the browser (headless mode)
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        await page.goto(url)
    
        # Select all `li` elements with class starting with "place-"
        rows = await page.query_selector_all("ul > li[class^='place-']")
        data = []
    
        for row in rows:
            # Extract the rank from the class name (e.g., "place-4")
            rank_class = await row.get_attribute("class")
            rank_raw = rank_class.split("-")[-1] if rank_class else None  # Extract rank number

            rank_match = re.match(r"(\d+)", rank_raw) if rank_raw else None
            rank = int(rank_match.group(1)) if rank_match else None
    
            # Extract the team name inside the <a> tag within the <div class="value">
            value_div = await row.query_selector("div.value a")
            team_name = await value_div.inner_text() if value_div else None
    
            data.append({"rank": rank, "teamName": team_name})
    
        await browser.close()
        df = pd.DataFrame(data)
        return df


In [59]:


async def get_team_schedules(leagueID, season):
    results = []
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        # Loop through teams 1 to 8.
        for team_num in range(1, 9):
            url = (f"https://fantasy.nfl.com/league/{leagueID}/history/{season}/schedule?"
                   f"standingsTab=schedule&scheduleType=team&leagueId={leagueID}&scheduleDetail={team_num}")
            page = await browser.new_page()
            await page.goto(url)
            
            # Wait until the schedule table loads
            await page.wait_for_selector("td.weekName.first", timeout=20000)
            
            # Get the team name from the header (outside of the rows)
            t_name_el = await page.query_selector(f"h2.teamName.teamId-{team_num}")
            t_name = await t_name_el.inner_text() if t_name_el else None
            
            # Get all rows in the schedule table
            rows = await page.query_selector_all("table tbody tr")
            for row in rows:
                week_el = await row.query_selector("td.weekName.first")
                week_text = await week_el.inner_text() if week_el else None

                team_el = await row.query_selector("td.teamImageAndName")
                team_text = await team_el.inner_text() if team_el else None

                result_el = await row.query_selector("td.weekTeamMatchupResult")
                result_text = await result_el.inner_text() if result_el else None

                results.append({
                    "season": season,
                    "teamName": t_name,
                    "week": week_text,
                    "team": team_num,
                    "opponent": team_text,
                    "result": result_text
                })
            await page.close()
        await browser.close()
        df = pd.DataFrame(results)
        return df




In [9]:
async def get_team_names(leagueID, season):

    url = f'https://fantasy.nfl.com/league/{leagueID}/history/{season}/standings?historyStandingsType=regular'
    url = f'https://fantasy.nfl.com/league/{leagueID}/history/{season}/owners'
    async with async_playwright() as p:
        # Launch the browser (headless mode)
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        await page.goto(url)
        
        # Wait until at least one of the desired cells loads
        await page.wait_for_selector("td.teamOwnerName", timeout=20000)
        
        # Locate all the table rows.
        # Adjust this selector if necessary to target the correct rows.
        rows = await page.query_selector_all("table tbody tr")
        
        data = []
        for row in rows:
            # For each attribute, try to select the cell and extract its text.
            # If the cell isn't found, the value will be set to None.
            
            owner_el = await row.query_selector("td.teamOwnerName")
            owner = await owner_el.inner_text() if owner_el else None

            name_el = await row.query_selector("td.teamImageAndName")
            name = await name_el.inner_text() if name_el else None

            
            # Append a dictionary for the row
            data.append({
                "season": season,
                "owner": owner,
                "teamName": name
                
            })
        
        await browser.close()
        df = pd.DataFrame(data)
        return df
