In [112]:
import pandas as pd
import itertools
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()
DATABASE_URL = f"postgresql://{os.getenv('DB_USER')}:{os.getenv('DB_PASSWORD')}@{os.getenv('DB_HOST')}:{os.getenv('DB_PORT')}/{os.getenv('DB_NAME')}"
engine = create_engine(DATABASE_URL)

# Example 32 teams (abbreviations)
teams = ['ARI', 'ATL', 'BAL', 'BUF', 'CAR', 'CHI', 'CIN', 'CLE', 'DAL', 'DEN', 'DET', 'GB', 
         'HOU', 'IND', 'JAX', 'KC', 'LV', 'LAC', 'LAR', 'MIA', 'MIN', 'NE', 'NO', 'NYG', 
         'NYJ', 'PHI', 'PIT', 'SEA', 'SF', 'TB', 'TEN', 'WAS']

# Randomly generate pairings for 18 weeks
weeks = list(range(1, 19))
schedule = []

# Just for mock purposes: each team plays a different one at home each week
for week in weeks:
    for i, home_team in enumerate(teams[:16]):  # Limit to 16 matchups
        away_team = teams[(i + week) % 32]  # Rotate for variation
        schedule.append({
            'season': 2024,
            'week': week,
            'home_team': home_team,
            'away_team': away_team
        })

df = pd.DataFrame(schedule)

df = df.merge(teams_df, how='left', left_on='home_team', right_on='abbreviation')
df.rename(columns = {'team_id':'home_team_id'}, inplace=True)
df.drop(columns = ['abbreviation'], inplace = True)

df = df.merge(teams_df, how = 'left', left_on = 'away_team', right_on = 'abbreviation')
df.rename(columns = {'team_id':'away_team_id'}, inplace=True)
df.drop(columns = ['abbreviation'], inplace = True)

In [130]:
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

# Load credentials
load_dotenv()
DATABASE_URL = f"postgresql://{os.getenv('DB_USER')}:{os.getenv('DB_PASSWORD')}@{os.getenv('DB_HOST')}:{os.getenv('DB_PORT')}/{os.getenv('DB_NAME')}"
engine = create_engine(DATABASE_URL)

# Load NFLFastR player weekly stats parquet file
df = pd.read_parquet("data/raw/nflfastr/player_stats_2023.parquet")

# Prepare weekly stats dataframe
weekly_stats_df = df.rename(columns={
    'season': 'season',
    'week': 'week',
    'player_id': 'player_id',
    'recent_team': 'team_abbreviation',
    'targets': 'targets',
    'carries': 'carries',
    'offense_snaps': 'snaps'
})

# Link team_id from teams table
teams_df = pd.read_sql('SELECT team_id, abbreviation FROM teams', engine)
weekly_stats_df = weekly_stats_df.merge(teams_df, how='left', left_on='team_abbreviation', right_on='abbreviation')

# Final columns for ingestion
weekly_stats_df = weekly_stats_df[['season', 'week', 'player_id', 'team_id', 'fantasy_points', 'targets', 'carries']]

# Drop rows where player_id or team_id is missing (data integrity)
weekly_stats_df = weekly_stats_df.dropna(subset=['player_id', 'team_id'])

# Ensure types match schema
weekly_stats_df['player_id'] = weekly_stats_df['player_id'].str.replace(r'[^a-zA-Z0-9]', '', regex=True)
weekly_stats_df['player_id'] = weekly_stats_df['player_id'].astype(int)
weekly_stats_df['team_id'] = weekly_stats_df['team_id'].astype(int)

# Ingest into PostgreSQL
weekly_stats_df.to_sql('weekly_stats', engine, if_exists='replace', index=False)

print("✅ Weekly stats successfully ingested into PostgreSQL.")

✅ Weekly stats successfully ingested into PostgreSQL.


In [128]:
df = pd.read_parquet("data/raw/nflfastr/player_stats_2023.parquet")

In [129]:
df.columns

Index(['player_id', 'player_name', 'player_display_name', 'position',
       'position_group', 'headshot_url', 'recent_team', 'season', 'week',
       'season_type', 'opponent_team', 'completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions',
       'receptions', 'targets', 'receiving_yards', 'receiving_tds',
       'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
       'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
       'wopr', 'special_teams_tds', 'fantasy_points

In [None]:
# SQL Query to assemble player-week stats
query = """
SELECT
    ws.season,
    ws.week,
    p.player_id,
    p.name,
    t.abbreviation AS team,
    p.team_id,
    p.position,
    CASE 
        WHEN p.team_id = g.home_team_id THEN g.away_team
        ELSE g.home_team
    END AS opponent,
    ws.fantasy_points,
    ws.targets,
    ws.carries,
    d.depth_position,
    i.injury_status
FROM weekly_stats ws
LEFT JOIN players p ON ws.player_id = p.player_id
LEFT JOIN teams t ON p.team_id = t.team_id
LEFT JOIN games g ON ws.season = g.season AND ws.week = g.week
LEFT JOIN depth_chart d ON p.player_id = d.player_id
LEFT JOIN injuries i 
    ON p.player_id = i.player_id 
    AND ws.week = i.week 
    AND ws.season = i.season
ORDER BY ws.season, ws.week, p.name;
"""

# Execute query and export
df = pd.read_sql(query, engine).drop_duplicates()
df.to_csv("data/processed/player_weekly_stats.csv", index=False)

print("✅ Exported to data/processed/player_weekly_stats.csv")