In [1]:
import pandas as pd
import numpy as np
import requests
import re
import json
from bs4 import BeautifulSoup

In [2]:
position_enumerate = {
    'QB': 1,
    'RB': 2,
    'WR': 3,
    'TE': 4,
    'DL': 11,
    'LB': 12,
    'DB': 13,
    'K': 7
}
offense_stat_columns = [
    'Passing_Comp',
    'Passing_Yds',
    'Passing_TD',
    'Passing_Int',
    'Rushing_Yds',
    'Rushing_TD',
    'Receiving_Rec',
    'Receiving_Yds',
    'Receiving_TD',
    'Return_Yds',
    'Return_TD',
    'Misc_FumTD',
    'Misc_2PT',
    'Fumble_Lost',
    'Fumble_Fum',
    'Fantasy_Points'
]
defense_stat_columns = [
    'Tackles_Tot',
    'Tackles_Ast',
    'Tackles_Sck',
    'Tackles_TFL',
    'Turnover_Int',
    'Turnover_Frc Fum',
    'Turnover_Fum Rec',
    'Score_Int TD',
    'Score_FumTD',
    'Score_Blk TD',
    'Score_Saf',
    'Score_Def 2pt Ret',
    'Other_Blk',
    'Other_PDef',
    'Other_QB Hit',
    'Return_Int Yds',
    'Fantasy_Points'
]
kicker_stat_columns = [
    'PAT_Made',
    'PAT_Miss',
    'FG Made_0-19',
    'FG Made_20-29',
    'FG Made_30-39',
    'FG Made_40-49',
    'FG Made_50+',
    'FG Miss_0-19',
    'FG Miss_20-29',
    'FG Miss_30-39',
    'FG Miss_40-49',
    'Fantasy_Points'
]
stat_columns = list(set(offense_stat_columns + defense_stat_columns + kicker_stat_columns))

In [5]:
def get_soup(offset=1, position=0, stat_type='seasonStats', stat_season=2023, stat_week=1):
    """_summary_

    Args:
        offset (int, optional): the pagination of the tables, entries per page by default 25. Defaults to 1.
        position (int, optional): The enumerate of the position. Defaults to 0 = all offense players
            'QB': 1,
            'RB': 2,
            'WR': 3,
            'TE': 4,
            'DL': 11,
            'LB': 12,
            'DB': 13,
            'K': 7
        stat_type (str, optional): distinction between weekly stats [weekStats] and season [seasonStats]. Defaults to 'seasonStats'.
        stat_season (int, optional): The season to scrape. Defaults to 2023.
        stat_week (int, optional): The week of the week. Has only effect if stat_type set to [weekStats]. Defaults to 1.

    Returns:
        Beautifulsoup: the Beautifulsoup scraped object (html)  
    """
    url = 'https://fantasy.nfl.com/league/3967921/players'
    params = {
        'offset': offset,
        'playerStatus': 'all',
        'position': position,
        'statCategory': 'stats',
        'statType': stat_type,
        'statSeason': stat_season,
        'statWeek': stat_week,
        'sort': 'pts',
    }
    header = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
        "X-Requested-With": "XMLHttpRequest"
    }
    r = requests.get(url, headers=header, params=params)
    print(f"{r} - Scraped {url}")
    soup = BeautifulSoup(r.text, 'html.parser')
    return soup

In [7]:
def extract_player_stats_table(soup):
    # search for <div class="tableWrap">:
    div_table = soup.find('div', {'class': 'tableWrap'})
    if div_table:
        scrape_res = pd.read_html(str(div_table))
        df = scrape_res[0]
        df = clean_player_stats_table(df)
        return df
    else:
        return None

def clean_player_stats_table(df):
    rename_columns = {
        'Unnamed: 0_level_0_Action': 'Action',
        'Unnamed: 1_level_0_Player': 'Player',
        'Unnamed: 2_level_0_Opp': 'Opp',
        'Unnamed: 3_level_0_Manager': 'Manager'
    }
    df.columns = ['_'.join(col) for col in df.columns.values]
    df.rename(columns=rename_columns, inplace=True)
    df.drop(['Action'], axis=1, inplace=True)
    player_col_split = df['Player'].str.split('-', expand=True)    
    player_name_split = player_col_split[0].str.strip().str.rsplit(' ', n=1, expand=True)
    player_team_split = player_col_split[1].str.strip().str.split(' ', n=1, expand=True)
    player_col_id = df.columns.get_loc('Player')
    df.insert(loc = player_col_id+1, column = 'Name', value=player_name_split[0])
    df.insert(loc = player_col_id+2, column = 'Position', value=player_name_split[1])
    df.insert(loc = player_col_id+3, column = 'Team', value=player_team_split[0])
    for c in [col for col in stat_columns if col in df.columns]:
        df[c] = pd.to_numeric(df[c].replace('-', np.nan))
    return df
    
soup = get_soup(offset=1, position=position_enumerate['QB'], stat_type='weekStats', stat_season=2023, stat_week=1)  
data = extract_player_stats_table(soup)
data.head()

<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players


Unnamed: 0,Player,Name,Position,Team,Opp,Manager,Passing_Comp,Passing_Yds,Passing_TD,Passing_Int,...,Receiving_Rec,Receiving_Yds,Receiving_TD,Return_Yds,Return_TD,Misc_FumTD,Misc_2PT,Fumble_Lost,Fumble_Fum,Fantasy_Points
0,Tua Tagovailoa QB - MIA,Tua Tagovailoa,QB,MIA,@LAC,Alta Villa Cardinals,28,466,3.0,1.0,...,,,,,,,,1.0,2.0,98.6
1,Mac Jones QB - NE,Mac Jones,QB,NE,PHI,FA,35,316,3.0,1.0,...,,,,,,,,,,91.6
2,Matthew Stafford QB - LA,Matthew Stafford,QB,LA,@SEA,Magic Eulers,24,334,,,...,,,,,,,,,,69.6
3,Kirk Cousins QB - MIN,Kirk Cousins,QB,MIN,TB,FlyRamsFly,33,344,2.0,1.0,...,,,,,,,,2.0,2.0,66.8
4,Derek Carr QB - NO Q,Derek Carr,QB,NO,TEN,FA,23,305,1.0,1.0,...,,,,,,,,,,64.3


In [8]:
stat_season=2023
stat_week=4
stat_type='weekStats'
for position, enum in position_enumerate.items():
    print(f"Scrape {position}")
    df_list = list()
    offset = 1
    while offset > 0:
        soup = get_soup(offset=offset, position=position_enumerate[position], stat_type=stat_type, stat_season=stat_season, stat_week=stat_week)
        df = extract_player_stats_table(soup)
        if df is None:
            offset = 0
        else:
            offset += 26
            df_list.append(df)
    data = pd.concat(df_list)
    data.to_csv(f'./scraped_season_{stat_season}/season_{stat_season}_week_{str(stat_week).zfill(2)}_position_{position}.csv', index=False)
        
    

Scrape QB
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
Scrape RB
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> -

In [37]:
stat_season=2022
stat_type='weekStats'
for stat_week in range(19):
    for position, enum in position_enumerate.items():
        print(f"Scrape week {stat_week} - position {position}")
        df_list = list()
        offset = 1
        while offset > 0:
            soup = get_soup(offset=offset, position=position_enumerate[position], stat_type=stat_type, stat_season=stat_season, stat_week=stat_week)
            df = extract_player_stats_table(soup)
            if df is None:
                offset = 0
            else:
                offset += 26
                df_list.append(df)
        data = pd.concat(df_list)
        data.to_csv(f'./scraped_season_{stat_season}/season_{stat_season}_week_{str(stat_week).zfill(2)}_position_{position}.csv', index=False)

Scrape week 0 - position QB
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
Scrape week 0 - position RB
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/league/3967921/players
<Response [200]> - Scraped https://fantasy.nfl.com/leagu