In [2]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [5]:
import pandas as pd
from src.config import RAW_DIR, PROCESSED_DIR

pbp = pd.read_parquet(RAW_DIR / "pbp_2018_2024.parquet")
ftn = pd.read_parquet(RAW_DIR / "ftn_2022_2024.parquet")
player_stats = pd.read_parquet(PROCESSED_DIR / "player_stats_2018_2024.parquet")
depth_charts = pd.read_parquet(RAW_DIR / "depth_charts_2018_2024.parquet")
injuries = pd.read_parquet(RAW_DIR / "injuries_2018_2024.parquet")
rosters = pd.read_parquet(RAW_DIR / "rosters_2018_2024.parquet")
schedules = pd.read_parquet(RAW_DIR / "schedules.parquet")
snaps = pd.read_parquet(RAW_DIR / "snaps_2018_2024.parquet")
trades = pd.read_parquet(RAW_DIR / "trades_2018_2024.parquet")
player_profiles = pd.read_parquet(PROCESSED_DIR / "player_profiles.parquet")

In [6]:
player_stats.columns.to_list

<bound method IndexOpsMixin.tolist of Index(['gsis_id', 'player_name', 'player_display_name', 'position',
       'position_group', 'headshot_url', 'recent_team', 'season', 'week',
       'season_type', 'opponent_team', 'completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions',
       'receptions', 'targets', 'receiving_yards', 'receiving_tds',
       'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
       'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
       'wopr', 

In [7]:
def generate_combined_weekly_snippets(player_stats, depth_charts, snaps):

    stats_snippets = []

    depth_dict = {(row["gsis_id"], row["season"], row["week"]): row for _, row in depth_charts.iterrows()}
    snap_dict = {(row["pfr_player_id"], row["season"], row["week"]): row for _, row in snaps.iterrows()}
    for _, row in player_stats.iterrows():
        player_name = row.get("player_display_name", "Unknown")
        position = row.get("position", "Unknown")
        team = row.get("recent_team", "Unknown")
        week = row.get("week")
        season = row.get("season")
        opponent = row.get("opponent_team", "Unknown")
        player_id = row.get("gsis_id", None)
        pfr_id = row.get("pfr_id", None)

        parts = [f"{player_name} ({position}, {team}), Week {week}, {season} vs. {opponent}:"]

        if row.get("attempts", 0) > 0:
            parts.append(f"Passing: {row["completions"]}/{row["attempts"]}, {row["passing_yards"]} yards, {row["passing_tds"]} TDs, {row["interceptions"]} INTs, {row["passing_epa"]} EPA)")
        if row.get("carries", 0) > 0:
            parts.append(f"Rushing: {row["carries"]} carries, {row["rushing_yards"]} yards, {row["rushing_fumbles"]} fumbles, {row["rushing_tds"]} TDs, {row["rushing_epa"]} EPA")
        if row.get("targets", 0) > 0:
            parts.append(f"Receiving: {row["targets"]} targets, {row["receptions"]} receptions, {row["receiving_yards"]} yards, {row["receiving_fumbles"]} fumbles, {row["receiving_tds"]} TDs, {row["receiving_yards_after_catch"]} yards after catch, {row["receiving_epa"]} EPA, {row["target_share"]} target share)")
        if "fantasy_points_ppr" in row:
            parts.append(f"Fantasy (PPR): {row["fantasy_points_ppr"]:.1f} pts")

        depth_row = depth_dict.get((player_id, season, week))
        if depth_row is not None:
            position_depth = depth_row.get("depth_team", "N/A")
            parts.append(f"Depth Chart: {position_depth}")

        snap_row = snap_dict.get((pfr_id, season, week))
        if snap_row is not None:
            off_snaps = snap_row.get("offense_snaps", 0)
            off_snap_pct = snap_row.get("offense_pct", 0)
            def_snaps = snap_row.get("defense_snaps", 0)
            def_snap_pct = snap_row.get("defense_pct", 0)
            st_snaps = snap_row.get("st_snaps", 0)
            st_snap_pct = snap_row.get("st_pct", 0)
            parts.append(f"{off_snaps} offensive snaps, {off_snap_pct} offensive snap %, {def_snaps} defensive snaps, {def_snap_pct} defensive snap %, {st_snaps} special teams snaps, {st_snap_pct} special teams snap %")
        
        snippet = " | ".join(parts)
        stats_snippets.append(snippet)

    return stats_snippets

In [8]:
combined_snippets = generate_combined_weekly_snippets(player_stats=player_stats, depth_charts=depth_charts, snaps=snaps)

In [10]:
from src.config import KNOWLEDGE_DIR

with open(KNOWLEDGE_DIR / "player_weekly_comnined_snippets.txt", "w") as f:
    for snippet in combined_snippets:
        f.write(snippet + "\n")