In [None]:
import pandas as pd
import requests
import os
from io import StringIO
from datetime import datetime

# Set the working directory to where you want to save the data
os.chdir(r"C:\Users\ryanb\OneDrive\Desktop\School\Projects\fantasy point predictor\data")

# Initialize an empty DataFrame to hold all positions combined
merged_df = pd.DataFrame()

# Initialize dictionary to hold cumulative data per position
position_dfs = {
    'qb': pd.DataFrame(),
    'rb': pd.DataFrame(),
    'wr': pd.DataFrame(),
    'te': pd.DataFrame(),
    'k': pd.DataFrame(),
    'dst': pd.DataFrame()
}

# Dictionary to map team names to their abbreviations
team_abbreviations = {
    "Arizona Cardinals": "ARI",
    "Atlanta Falcons": "ATL",
    "Baltimore Ravens": "BAL",
    "Buffalo Bills": "BUF",
    "Carolina Panthers": "CAR",
    "Chicago Bears": "CHI",
    "Cincinnati Bengals": "CIN",
    "Cleveland Browns": "CLE",
    "Dallas Cowboys": "DAL",
    "Denver Broncos": "DEN",
    "Detroit Lions": "DET",
    "Green Bay Packers": "GB",
    "Houston Texans": "HOU",
    "Indianapolis Colts": "IND",
    "Jacksonville Jaguars": "JAX",
    "Kansas City Chiefs": "KC",
    "Las Vegas Raiders": "LV",
    "Los Angeles Chargers": "LAC",
    "Los Angeles Rams": "LAR",
    "Miami Dolphins": "MIA",
    "Minnesota Vikings": "MIN",
    "New England Patriots": "NE",
    "New Orleans Saints": "NO",
    "New York Giants": "NYG",
    "New York Jets": "NYJ",
    "Philadelphia Eagles": "PHI",
    "Pittsburgh Steelers": "PIT",
    "San Francisco 49ers": "SF",
    "Seattle Seahawks": "SEA",
    "Tampa Bay Buccaneers": "TB",
    "Tennessee Titans": "TEN",
    "Washington Commanders": "WAS",
}

for year in range(2021, datetime.now().year):
    for week in range(1, 19):

        qb_url = f"https://www.fantasypros.com/nfl/projections/qb.php?week={week}&year={year}"
        rb_url = f"https://www.fantasypros.com/nfl/projections/rb.php?week={week}&scoring=PPR&year={year}"
        wr_url = f"https://www.fantasypros.com/nfl/projections/wr.php?week={week}&scoring=PPR&year={year}"
        te_url = f"https://www.fantasypros.com/nfl/projections/te.php?week={week}&scoring=PPR&year={year}"
        k_url = f"https://www.fantasypros.com/nfl/projections/k.php?week={week}&year={year}"
        dst_url = f"https://www.fantasypros.com/nfl/projections/dst.php?week={week}&year={year}"

        url_dict = {
            'qb': qb_url,
            'rb': rb_url,
            'wr': wr_url,
            'te': te_url,
            'k': k_url,
            'dst': dst_url
        }

        for position, url in url_dict.items():
            if not requests.head(url).ok:
                print(f"Skipping {url} as it does not exist.")
                continue

            # Get the page content
            response = requests.get(url)
            response.raise_for_status()

            tables = pd.read_html(StringIO(response.text))
            df = tables[0].copy()

            if isinstance(df.columns, pd.MultiIndex):
                df.columns = ['Player' if i == 0 else f"{col[0]}_{col[1]}" for i, col in enumerate(df.columns)]
                df.rename(columns={'MISC_FPTS': 'FPTS'}, inplace=True)
            else:
                df.columns = ['Player' if i == 0 else col for i, col in enumerate(df.columns)]
            
            # Add metadata columns
            if position == 'dst':
                df['Team'] = df['Player'].map(team_abbreviations)
            else:
                df[['Player', 'Team']] = df['Player'].str.rsplit(' ', n=1, expand=True)

            df['Week'] = week
            df['Year'] = year
            df['Position'] = position

            # Append to the cumulative DataFrame for the position
            position_dfs[position] = pd.concat([position_dfs[position], df], ignore_index=True)

            # Also append to the global merged DataFrame
            merged_df = pd.concat([merged_df, df], ignore_index=True)

# Save each position's full data to its own CSV
for position, df in position_dfs.items():
    df.to_csv(f"{position}_projections.csv", index=False)

# Save the combined data for all positions
merged_df.to_csv("full_projections.csv", index=False)

