In [31]:
import pandas as pd
import ast
import os
from src.data_cleaning import flatten_nested_column
from src.expand_scores import expand_score_list_column

###change as needed
year = 2025

# **RATINGS**

#### **FPI** | **PPA TEAM** | **PREGAME WP** | **SP+**

In [32]:
folder = "ratings"

df_fpi = pd.read_csv(f"../Data/Raw/{year}/{folder}/fpi.csv")
df_ppa = pd.read_csv(f"../Data/Raw/{year}/{folder}/ppa_team.csv")
df_pgw = pd.read_csv(f"../Data/Raw/{year}/{folder}/pregame_wp.csv")
df_sp = pd.read_csv(f"../Data/Raw/{year}/{folder}/sp.csv")

processed_path = f"../Data/Processed/{year}/{folder}"
os.makedirs(processed_path, exist_ok=True)

def flatten_multiple(df, columns):
    for col, prefix in columns:
        df = flatten_nested_column(df, col, prefix=prefix)
    return df

df_fpi = flatten_multiple(df_fpi, [
    ("resumeRanks", "resume"),
    ("efficiencies", "eff"),
])

df_ppa = flatten_multiple(df_ppa, [
    ("offense", "off"),
    ("defense", "def"),
])

df_sp = flatten_multiple(df_sp, [
    ("offense", "off"),
    ("defense", "def"),
    ("specialTeams", "spec"),
])

df_fpi.to_csv(f"{processed_path}/fpi_cleaned.csv", index=False)
df_ppa.to_csv(f"{processed_path}/ppa_team_cleaned.csv", index=False)
df_sp.to_csv(f"{processed_path}/sp_cleaned.csv", index=False)

df_pgw.to_csv(f"{processed_path}/pregame_wp_cleaned.csv", index=False)

print("✅ All datasets processed and saved to:", processed_path)

✅ All datasets processed and saved to: ../Data/Processed/2025/ratings


# **GAMES**

#### **RECORDS** | **GAMES**

In [33]:
folder = "games"
raw_path = f"../Data/Raw/{year}/{folder}"
processed_path = f"../Data/Processed/{year}/{folder}"
os.makedirs(processed_path, exist_ok=True)

df_games = pd.read_csv(f"{raw_path}/games.csv")

df_games = df_games[
    (df_games["homeClassification"] == "fbs") |
    (df_games["awayClassification"] == "fbs")
].copy()

df_games = expand_score_list_column(df_games, "homeLineScores", prefix="home")
df_games = expand_score_list_column(df_games, "awayLineScores", prefix="away")

df_games.to_csv(f"{processed_path}/games_cleaned.csv", index=False)

df_records = pd.read_csv(f"{raw_path}/records.csv")

df_records = df_records[df_records["classification"] == "fbs"].copy()

records_columns = [
    ("total", "total"),
    ("conferenceGames", "conference"),
    ("homeGames", "home"),
    ("awayGames", "away"),
    ("neutralSiteGames", "neutralSite"),
    ("regularSeason", "regularSeason"),
    ("postseason", "postseason"),
]

df_records = flatten_multiple(df_records, records_columns)

df_records.to_csv(f"{processed_path}/records_cleaned.csv", index=False)
print("✅ All datasets processed and saved to:", processed_path)

✅ All datasets processed and saved to: ../Data/Processed/2025/games


# **RECRUITING**

#### **PLAYER USAGE** | **RECRUITS** | **RANKINGS** | **RETURNING PRODUCTION** | **TRANSFER PORTAL**

In [34]:
folder = "recruiting"
raw_path = f"../Data/Raw/{year}/{folder}"
processed_path = f"../Data/Processed/{year}/{folder}"
os.makedirs(processed_path, exist_ok=True)

df_pu = pd.read_csv(f"{raw_path}/player_usage.csv")
df_rec = pd.read_csv(f"{raw_path}/recruits.csv")
df_rr = pd.read_csv(f"{raw_path}/recruiting_rankings.csv")
df_rp = pd.read_csv(f"{raw_path}/returning_production.csv")
df_tp = pd.read_csv(f"{raw_path}/transfer_portal.csv")

processed_path = f"../Data/Processed/{year}/{folder}"
os.makedirs(processed_path, exist_ok=True)

def flatten_multiple(df, columns):
    for col, prefix in columns:
        df = flatten_nested_column(df, col, prefix=prefix)
    return df

df_pu = flatten_multiple(df_pu, [
    ("usage", "usage"),
])

df_rec = flatten_multiple(df_rec, [
    ("hometownInfo", "hometownInfo"),
])

df_pu.to_csv(f"{processed_path}/player_usage_cleaned.csv", index=False)
df_rec.to_csv(f"{processed_path}/recruits_cleaned.csv", index=False)

df_rr.to_csv(f"{processed_path}/recruiting_rankings_cleaned.csv", index=False)
df_rp.to_csv(f"{processed_path}/returning_production_cleaned.csv", index=False)
df_tp.to_csv(f"{processed_path}/transfer_portal_cleaned.csv", index=False)

print("✅ All datasets processed and saved to:", processed_path)

✅ All datasets processed and saved to: ../Data/Processed/2025/recruiting


# **STATS**

#### **ADVANCED SEASON STATS** | **TEAM STATS**

In [35]:
folder = "stats"
raw_path = f"../Data/Raw/{year}/{folder}"
processed_path = f"../Data/Processed/{year}/{folder}"
os.makedirs(processed_path, exist_ok=True)

df_adv = pd.read_csv(f"{raw_path}/advanced_season_stats.csv")
df_ts = pd.read_csv(f"{raw_path}/team_stats.csv")

processed_path = f"../Data/Processed/{year}/{folder}"
os.makedirs(processed_path, exist_ok=True)

def flatten_multiple(df, columns):
    for col, prefix in columns:
        df = flatten_nested_column(df, col, prefix=prefix)
    return df

df_adv = flatten_multiple(df_adv, [
    ("offense", "offense"),
    ("defense", "defense"),
])

df_adv.to_csv(f"{processed_path}/advanced_season_stats_cleaned.csv", index=False)
df_ts.to_csv(f"{processed_path}/team_stats_cleaned.csv", index=False)

print("✅ All datasets processed and saved to:", processed_path)

✅ All datasets processed and saved to: ../Data/Processed/2025/stats
