In [None]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

from draft_optimizer.src.utils import DATA_DIR

# Specify paths
year = 2022
NFL_DIR = os.path.join(DATA_DIR, "nfl", str(year))

In [None]:
# Specify URL templates
TEMPLATE_OFFENSE = "https://fantasy.nfl.com/research/projections?offset={offset}&position=O&sort=projectedPts&statCategory=projectedStats&statSeason=2022&statType=weekProjectedStats&statWeek={week}"
TEMPLATE_KICKER = "https://fantasy.nfl.com/research/projections?offset={offset}&position=7&sort=projectedPts&statCategory=projectedStats&statSeason=2022&statType=weekProjectedStats&statWeek={week}"
TEMPLATE_DEFENSE = "https://fantasy.nfl.com/research/projections?offset={offset}&position=8&sort=projectedPts&statCategory=projectedStats&statSeason=2022&statType=weekProjectedStats&statWeek={week}"

In [None]:
# Get driver
chrome_options = Options()
chrome_options.binary_location = "C:\Program Files\Google\Chrome Beta\Application\chrome.exe"
chrome_options.headless = True
driver = webdriver.Chrome(options=chrome_options)

# Loop over weeks
raw = []
for template in [TEMPLATE_OFFENSE, TEMPLATE_KICKER, TEMPLATE_DEFENSE]:
    position = template.split("position=")[1][0]
    print(f"Position: {position}")
    for week in range(1, 19):  # 18 weeks
        print(f"   Week: {week}")

        # Prepare to concat
        week_df = []

        # Loop until no players remain
        offset = 1
        while True:
            print(f"      Offset: {offset}")

            # Get HTML
            url = template.format(week=week, offset=offset)
            driver.get(url)
            page_html = driver.page_source

            # Parse and append
            try:
                df = pd.read_html(page_html)[0]
            except ValueError:
                break  # no more tables
            df.columns = [f"{c[0]} {c[1]}" if "Unnamed" not in c[0] else c[1] for c in df.columns]
            df = df.rename({"Team": "Player"}, axis=1)  # for DEF
            week_df.append(df)

            # Increment offset
            offset += df.shape[0]

        # Concat and append
        week_df = pd.concat(week_df, axis=0, ignore_index=True)
        week_df["Week"] = week
        raw.append(week_df)

# Concat
raw = pd.concat(raw, axis=0, ignore_index=True)

# Clean up driver
driver.quit()

In [None]:
# Copy data
data = raw.copy()

# Get player, position, and team
player_pos_team_extra = data["Player"].str.rsplit(" - ", 1)
player_pos = player_pos_team_extra.str[0].str.extract(r"([\w\. \'\-]+)((?:QB)|(?:RB)|(?:WR)|(?:TE)|(?:DEF)|(?:K))")
player, pos = player_pos[0], player_pos[1]
team = player_pos_team_extra.str[1]
for to_replace in [" Q", " SUS", " IR", " PUP", " View News"]:
    team = team.str.replace(to_replace, "")
team[team == "R"] = np.nan

# Update data
data["Player"] = player.str.strip()
data["Position"] = pos.str.strip()
data["Team"] = team.str.strip()

# Fix dtypes
data = data.set_index(["Player", "Position", "Week", "Team", "Opp"])
data = data.replace("-", 0).astype(float)
data = data.reset_index()

print(data.shape)
data.head()

In [None]:
# Save
if not os.path.isdir(NFL_DIR):
    Path(NFL_DIR).mkdir(parents=True, exist_ok=True)
data.to_csv(os.path.join(NFL_DIR, "weekly_proj.csv"), index=False)